""" scmp.com """ import json import re from datetime import datetime, timedelta, timezone from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.web.feeds.news import BasicNewsRecipe, classes class SCMP(BasicNewsRecipe): title = "South China Morning Post" __author__ = "llam" description = "SCMP.com, Hong Kong's premier online English daily provides exclusive up-to-date news, audio video news, podcasts, RSS Feeds, Blogs, breaking news, top stories, award winning news and analysis on Hong Kong and China." # noqa publisher = "South China Morning Post Publishers Ltd." oldest_article = 1 max_articles_per_feed = 25 no_stylesheets = True remove_javascript = True encoding = "utf-8" use_embedded_content = False language = "en" remove_empty_feeds = True publication_type = "newspaper" auto_cleanup = False compress_news_images = True ignore_duplicate_articles = {"title", "url"} remove_attributes = ['height','width'] masthead_url = 'https://upload.wikimedia.org/wikipedia/commons/c/c3/SCMP_logo.svg' def get_cover_url(self): from datetime import date cover = 'https://img.kiosko.net/' + str( date.today().year ) + '/' + date.today().strftime('%m') + '/' + date.today( ).strftime('%d') + '/cn/scmp.750.jpg' br = BasicNewsRecipe.get_browser(self) try: br.open(cover) except: index = 'https://es.kiosko.net/cn/np/scmp.html' soup = self.index_to_soup(index) for image in soup.findAll('img', src=True): if image['src'].endswith('750.jpg'): return 'https:' + image['src'] self.log("\nCover unavailable") cover = None return cover # used when unable to extract article from