View Single Post
Old 09-26-2016, 12:02 AM   #1
timoy_kindle
Junior Member
timoy_kindle began at the beginning.
 
Posts: 7
Karma: 10
Join Date: Aug 2013
Device: kindle paperwhite
how to read this simple website into ebook

dear all,

might i know how to read this website:
http://news.chilin.hk/hk/

i have tried. but only the first two articles are downloaded.

thanks!


from calibre.web.feeds.news import BasicNewsRecipe


class MingPao(BasicNewsRecipe):

title = 'MingPao'
__author__ = 'chilin'
description = 'MingPao Newspaper'
BASE_INDEX = 'http://news.chilin.hk/hk/'
INDEX = 'http://news.chilin.hk/hk/?job=title&path=Q29udGVudF9TZWN0aW9uTmV3cy5jZm0%2F Q2hhbm5lbD1qYSZQYXRoPTY3MzM3NDkyODQyL2phaW5kZXguY2 Zt&sec=eJzbcGxpFAAGugJ2&page=9&pic=big&date=201609 26&'
language = 'big5'
remove_javascript = True
#auto_cleanup = True
no_stylesheets = True

def parse_index(self):
articles = []
feeds = []
soup = self.index_to_soup(self.INDEX)
for post in soup.findAll('a'):
self.log('--> post: ', post)
url = post['href']
self.log('--> url: ', url)
title = self.tag_to_string(post)
self.log('---> title: ', title)
articles.append({'title': title, 'url': self.BASE_INDEX + url,
'description': '', 'date': ''})
feeds.append(('Articles', articles))
return feeds
timoy_kindle is offline   Reply With Quote