View Single Post
Old 03-04-2023, 11:12 PM   #7
rasteps
Zealot
rasteps began at the beginning.
 
Posts: 140
Karma: 10
Join Date: Sep 2010
Device: Kindle, Android phone
Quote:
Originally Posted by unkn0wn View Post
Code:
"""
www.mainichi.jp/english
"""

from calibre.ptempfile import PersistentTemporaryFile
from calibre.web.feeds.news import BasicNewsRecipe

class MainichiEnglishNews(BasicNewsRecipe):
    title = u"The Mainichi"
    __author__ = 'unkn0wn'

    description = "Japanese traditional newspaper Mainichi news in English"
    publisher = "Mainichi News"
    publication_type = "newspaper"
    category = "news, japan"
    language = "en_JP"

    index = "http://mainichi.jp/english/"
    masthead_url = index + "images/themainichi.png"

    no_stylesheets = True
    remove_javascript = True
    auto_cleanup = True
    
    ignore_duplicate_articles = {'title'}
    
    articles_are_obfuscated = True
    
    def get_obfuscated_article(self, url):
        br = self.get_browser()
        try:
            br.open(url)
        except Exception as e:
            url = e.hdrs.get('location')
        soup = self.index_to_soup(url)
        link = soup.find('a', href=True)
        html = br.open(link['href']).read()
        pt = PersistentTemporaryFile('.html')
        pt.write(html)
        pt.close()
        return pt.name
    
    feeds = [
        ('Articles', 'https://news.google.com/rss/search?q=when:48h+allinurl:mainichi.jp%2Fenglish%2Farticles%2F&hl=en-US&gl=US&ceid=US:en')
    ]


Is the fix to add "www"???
rasteps is offline   Reply With Quote