dear all,
might i know how to read this website:
http://news.chilin.hk/hk/
i have tried. but only the first two articles are downloaded.
thanks!
from calibre.web.feeds.news import BasicNewsRecipe
class MingPao(BasicNewsRecipe):
title = 'MingPao'
__author__ = 'chilin'
description = 'MingPao Newspaper'
BASE_INDEX = 'http://news.chilin.hk/hk/'
INDEX = 'http://news.chilin.hk/hk/?job=title&path=Q29udGVudF9TZWN0aW9uTmV3cy5jZm0%2F Q2hhbm5lbD1qYSZQYXRoPTY3MzM3NDkyODQyL2phaW5kZXguY2 Zt&sec=eJzbcGxpFAAGugJ2&page=9&pic=big&date=201609 26&'
language = 'big5'
remove_javascript = True
#auto_cleanup = True
no_stylesheets = True
def parse_index(self):
articles = []
feeds = []
soup = self.index_to_soup(self.INDEX)
for post in soup.findAll('a'):
self.log('--> post: ', post)
url = post['href']
self.log('--> url: ', url)
title = self.tag_to_string(post)
self.log('---> title: ', title)
articles.append({'title': title, 'url': self.BASE_INDEX + url,
'description': '', 'date': ''})
feeds.append(('Articles', articles))
return feeds