sun uk 5/3/13

scissors · 05-04-2013, 03:41 AM

minor update - cover site replaced

Spoiler:

Code:

import re, random

from calibre import browser
from calibre.web.feeds.recipes import BasicNewsRecipe

class AdvancedUserRecipe1325006965(BasicNewsRecipe):

    title          = u'The Sun UK'
    description = 'Recipe Author D.Asbury. Articles from The Sun tabloid UK'
    __author__ = 'Dave Asbury'
    # last updated 5/5/13 better cover fetch
    language = 'en_GB'
    oldest_article = 1
    max_articles_per_feed = 15
    remove_empty_feeds = True
    
    masthead_url = 'http://www.thesun.co.uk/sol/img/global/Sun-logo.gif'
    encoding = 'UTF-8'
    remove_javascript     = True
    no_stylesheets = True
    
    ignore_duplicate_articles = {'title','url'}
    compress_news_images = True
    #cover_url = 'http://www.thepaperboy.com/frontpages/current/The_Sun_newspaper_front_page.jpg'

    extra_css  = '''
    body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
           '''
    keep_only_tags = [
		dict(name='div',attrs={'class' : 'intro'}), 
                                dict(name='h3'),
		dict(name='div',attrs={'id' : 'articlebody'}),
           #dict(attrs={'class' : ['right_col_branding','related-stories','mystery-meat-link','ltbx-container','ltbx-var ltbx-hbxpn','ltbx-var ltbx-nav-loop','ltbx-var ltbx-url']}),
           #                dict(name='div',attrs={'class' : 'cf'}),
          # dict(attrs={'title' : 'download flash'}),
          #                 dict(attrs={'style' : 'padding: 5px'})

           ]
    remove_tags_after = [dict(id='bodyText')]
    remove_tags=[
	              dict(name='li'),
                              dict(attrs={'class' : 'grid-4 right-hand-column'}),
		]

    feeds          = [
    (u'News', u'http://www.thesun.co.uk/sol/homepage/news/rss'),
    (u'Sport', u'http://www.thesun.co.uk/sol/homepage/sport/rss'),
    (u'Showbiz', u'http://www.thesun.co.uk/sol/homepage/showbiz/rss'),
    (u'Woman', u'http://www.thesun.co.uk/sol/homepage/woman/rss'),
    ]
# starsons code
    def parse_feeds (self): 
      feeds = BasicNewsRecipe.parse_feeds(self) 
      for feed in feeds:
        for article in feed.articles[:]:
          print 'article.title is: ', article.title
          if 'Try out The Sun' in article.title.upper() or 'Try-out-The-Suns' in article.url:
            feed.articles.remove(article)
          if 'Web porn harms kids' in article.title.upper() or 'Sun-says-Web-porn' in article.url:
            feed.articles.remove(article)
      return feeds
  
    def get_cover_url(self):
           print '####################################################'
           br = browser()
           br.set_handle_redirect(False)
           cover_url = 'http://www.thepaperboy.com/frontpages/current/The_Sun_newspaper_front_page.jpg'
           
           try:
                br.open_novisit('http://www.thepaperboy.com/frontpages/current/The_Sun_newspaper_front_page.jpg')
                print '!!!!!!!!! cover exists'
           except: 
                     cover_url = random.choice([
                                   'http://img.thesun.co.uk/multimedia/archive/00905/errorpage6_677961a_905507a.jpg'
                                   ,'http://img.thesun.co.uk/multimedia/archive/00905/errorpage7_677962a_905505a.jpg'
                                   ,'http://img.thesun.co.uk/multimedia/archive/00905/errorpage5_677960a_905512a.jpg'
                                   ,'http://img.thesun.co.uk/multimedia/archive/00905/errorpage2_677957a_905502a.jpg'
                                   ,'http://img.thesun.co.uk/multimedia/archive/00905/errorpage3_677958a_905503a.jpg'
                                   ])

           return cover_url

Similar Threads
Thread	Thread Starter	Forum	Replies	Last Post
the sun 19/10/12	scissors	Recipes	0	10-19-2012 10:17 AM
The Sun UK - update 6/10/12	scissors	Recipes	0	10-06-2012 11:46 AM
The Sun UK	scissors	Recipes	8	11-03-2011 05:43 AM
Baltimore sun help?	copyrite	Recipes	2	10-31-2010 03:59 PM
PRS-900 Fading in the sun	vxf	Sony Reader	15	08-21-2010 11:36 PM