![]() |
#1 |
Member
![]() Posts: 18
Karma: 10
Join Date: Aug 2011
Device: Nook
|
Science News Recent Issues update
Better, larger images:
Code:
#!/usr/bin/env python __license__ = 'GPL v3' ''' sciencenews.org ''' from calibre.web.feeds.news import BasicNewsRecipe import re class ScienceNewsIssue(BasicNewsRecipe): title = u'Science News Recent Issues' __author__ = u'Darko Miletic, Sujata Raman and Starson17' description = u'''Science News is an award-winning weekly newsmagazine covering the most important research in all fields of science. Its 16 pages each week are packed with short, accurate articles that appeal to both general readers and scientists. Published since 1922, the magazine now reaches about 150,000 subscribers and more than 1 million readers. These are the latest News Items from Science News. This recipe downloads the last 30 days worth of articles.''' category = u'Science, Technology, News' publisher = u'Society for Science & the Public' oldest_article = 30 language = 'en' max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False timefmt = ' [%A, %d %B, %Y]' recursions = 1 remove_attributes = ['style'] conversion_options = {'linearize_tables' : True , 'comment' : description , 'tags' : category , 'publisher' : publisher , 'language' : language } extra_css = ''' .content_description{font-family:georgia ;font-size:x-large; color:#646464 ; font-weight:bold;} .content_summary{font-family:georgia ;font-size:small ;color:#585858 ; font-weight:bold;} .content_authors{font-family:helvetica,arial ;font-size: xx-small ;color:#14487E ;} .content_edition{font-family:helvetica,arial ;font-size: xx-small ;} .exclusive{color:#FF0000 ;} .anonymous{color:#14487E ;} .content_content{font-family:helvetica,arial ;font-size: medium ; color:#000000;} .description{color:#585858;font-family:helvetica,arial ;font-size: large ;} .credit{color:#A6A6A6;font-family:helvetica,arial ;font-size: xx-small ;} ''' keep_only_tags = [ dict(name='div', attrs={'class':'content_content'}), dict(name='ul', attrs={'id':'toc'}) ] remove_tags= [ dict(name='a', attrs={'class':'enlarge print-no'}), dict(name='a', attrs={'rel':'shadowbox'}) ] feeds = [(u"Science News Current Issues", u'http://www.sciencenews.org/view/feed/type/edition/name/issues.rss')] match_regexps = [ r'www.sciencenews.org/view/feature/id/', r'www.sciencenews.org/view/generic/id' ] def image_url_processor(self, baseurl, url): x = url.split('/') if x[4] == u'scale': url = u'http://www.sciencenews.org/view/download/id/' + x[6] + u'/name/' + x[-1] return url def get_cover_url(self): cover_url = None index = 'http://www.sciencenews.org/view/home' soup = self.index_to_soup(index) link_item = soup.find(name = 'img',alt = "issue") if link_item: cover_url = 'http://www.sciencenews.org' + link_item['src'] + '.jpg' return cover_url def preprocess_html(self, soup): for tag in soup.findAll(name=['span']): tag.name = 'div' return soup |
![]() |
![]() |
![]() |
|
![]() |
||||
Thread | Thread Starter | Forum | Replies | Last Post |
More Recent Science Fiction Authors | geordietastic | Reading Recommendations | 18 | 08-20-2012 05:28 AM |
Issues Sending Content to Device with Recent Versions | mjm2705 | Calibre | 4 | 11-29-2008 06:09 PM |