MobileRead Forums - View Single Post - how to read this simple website into ebook

timoy_kindle · 09-26-2016, 01:02 AM

dear all,

might i know how to read this website:
http://news.chilin.hk/hk/

i have tried. but only the first two articles are downloaded.

thanks!

from calibre.web.feeds.news import BasicNewsRecipe

class MingPao(BasicNewsRecipe):

title = 'MingPao'
__author__ = 'chilin'
description = 'MingPao Newspaper'
BASE_INDEX = 'http://news.chilin.hk/hk/'
INDEX = 'http://news.chilin.hk/hk/?job=title&path=Q29udGVudF9TZWN0aW9uTmV3cy5jZm0%2F Q2hhbm5lbD1qYSZQYXRoPTY3MzM3NDkyODQyL2phaW5kZXguY2 Zt&sec=eJzbcGxpFAAGugJ2&page=9&pic=big&date=201609 26&'
language = 'big5'
remove_javascript = True
#auto_cleanup = True
no_stylesheets = True

def parse_index(self):
articles = []
feeds = []
soup = self.index_to_soup(self.INDEX)
for post in soup.findAll('a'):
self.log('--> post: ', post)
url = post['href']
self.log('--> url: ', url)
title = self.tag_to_string(post)
self.log('---> title: ', title)
articles.append({'title': title, 'url': self.BASE_INDEX + url,
'description': '', 'date': ''})
feeds.append(('Articles', articles))
return feeds

09-26-2016, 01:02 AM	#1
timoy_kindle Junior Member Posts: 7 Karma: 10 Join Date: Aug 2013 Device: kindle paperwhite	how to read this simple website into ebook dear all, might i know how to read this website: http://news.chilin.hk/hk/ i have tried. but only the first two articles are downloaded. thanks! from calibre.web.feeds.news import BasicNewsRecipe class MingPao(BasicNewsRecipe): title = 'MingPao' __author__ = 'chilin' description = 'MingPao Newspaper' BASE_INDEX = 'http://news.chilin.hk/hk/' INDEX = 'http://news.chilin.hk/hk/?job=title&path=Q29udGVudF9TZWN0aW9uTmV3cy5jZm0%2F Q2hhbm5lbD1qYSZQYXRoPTY3MzM3NDkyODQyL2phaW5kZXguY2 Zt&sec=eJzbcGxpFAAGugJ2&page=9&pic=big&date=201609 26&' language = 'big5' remove_javascript = True #auto_cleanup = True no_stylesheets = True def parse_index(self): articles = [] feeds = [] soup = self.index_to_soup(self.INDEX) for post in soup.findAll('a'): self.log('--> post: ', post) url = post['href'] self.log('--> url: ', url) title = self.tag_to_string(post) self.log('---> title: ', title) articles.append({'title': title, 'url': self.BASE_INDEX + url, 'description': '', 'date': ''}) feeds.append(('Articles', articles)) return feeds