MobileRead Forums - View Single Post - Failed to fetch multipage articles

Thread: Failed to fetch multipage articles

View Single Post

03-22-2019, 08:06 AM	#1
Susa Junior Member Posts: 2 Karma: 10 Join Date: Jan 2019 Device: Kindle Paperwhite 3	Failed to fetch multipage articles Hello, I have tried to fetch the articles on https://language.chinadaily.com.cn/5...03f6866ee845c/ but I only got the first pages.The append_page didn't seem to work. I wonder if anyone can help me with the recipe. Spoiler: # -- coding: utf-8 -- from calibre.web.feeds.news import BasicNewsRecipe class shuang1(BasicNewsRecipe): title = u'权威发布CD' description = 'From China Daily' encoding = 'utf-8' no_stylesheets = True remove_javascript = True keep_only_tags = [dict(name='div', attrs={'class':'main_title'}), dict(name='div', attrs={'class':'mian_txt'})] def get_title(self, link): return link.contents[0].strip() def parse_index(self): site = 'https://language.chinadaily.com.cn/5af95d44a3103f6866ee845c/' soup = self.index_to_soup(site) div = soup.findAll('p', { 'class': 'gy_box_txt2' }) articles = [] for link in div: til = link.a.contents[0].strip() url = 'https:' + link.a.get("href") a = { 'title': til, 'url': url } articles.append(a) ans = [(til, articles)] return ans def append_page(self, soup, appendtag, position): pager = soup.find('a', attrs={'class':'pagestyle'}) if pager: nexturl = 'https:' + pager['href'] soup2 = self.index_to_soup(nexturl) texttag = soup2.find('div', attrs={'class':'mian_txt'}) newpos = len(texttag.contents) self.append_page(soup2,texttag,newpos) texttag.extract() appendtag.insert(position,texttag) def preprocess_html(self, soup): self.append_page(soup, soup.body, 3) pager = soup.find('a', attrs={'class':'pagestyle'}) if pager: pager.extract() return self.adeify_images(soup)