Quote:
Originally Posted by unkn0wn
Code:
def get_browser(self):
return BasicNewsRecipe.get_browser(self, user_agent='common_words/based')
add this to the recipe
|
Still failing, and only capturing two headlines, not all, and capturing de only headlines, no content, please help to look again Thank you very much
from calibre.web.feeds.news import BasicNewsRecipe
class TheJerusalemPost(BasicNewsRecipe):
title = 'The Jerusalem Post'
oldest_article = 30
use_embedded_content = False
no_stylesheets = True
keep_only_tags = [
dict(name='h1', attrs={'class': 'headline'}),
dict(name='div', attrs={'class': 'article-text'}),
]
def get_browser(self):
return BasicNewsRecipe.get_browser(self, user_agent='common_words/based')
feeds = [
('Top Stories', 'https://www.jpost.com/Rss/RssFeedsHeadlines.aspx'),
('Israel News', 'https://www.jpost.com/Rss/RssFeedsIsrael.aspx'),
('International News', 'https://www.jpost.com/Rss/RssFeedsWorld.aspx'),
('Opinion', 'https://www.jpost.com/Rss/RssFeedsOpinion.aspx'),
('Features', 'https://www.jpost.com/Rss/RssFeedsFeatures.aspx'),
]