View Single Post
Old 06-10-2011, 03:08 PM   #5
jumaka
Junior Member
jumaka began at the beginning.
 
Posts: 1
Karma: 10
Join Date: Jun 2011
Device: Kindle;Sony PRS505
Updated Recipe

Hi

Since this does not seem to have been updated yet, here is my version of the recipe:

Code:
__license__   = 'GPL v3'
__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>, Justin Saunders <justin at jumaka dot co dot uk>'
'''
www.wired.co.uk
'''

from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class Wired_UK(BasicNewsRecipe):
    title                 = 'Wired Magazine - UK edition'
    __author__            = 'Darko Miletic, Justin Saunders'
    description           = 'Gaming news'
    publisher             = 'Conde Nast Digital'
    category              = 'news, games, IT, gadgets'
    oldest_article        = 32
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
    masthead_url          = ''
    language              = 'en_GB'
    extra_css             = ' body{font-family: Tahoma,Geneva,Verdana,sans-serif; font-size: 1.0em } img{margin-bottom: 0.8em } .img-descr{font-family: Tahoma,Arial,Helvetica,sans-serif; font-size: 0.6875em; display: block} '
    index                 = 'http://www.wired.co.uk/magazine'

    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }

    keep_only_tags = [
                      dict(name='div', attrs={'class':'layoutColumn1'}),
                      dict(name='div', attrs={'class':'articleMeta clearfix'}),
                      dict(name='div', attrs={'class':'imageWrapper'}),
                      dict(name='div', attrs={'class':'mainCopy'})]
    remove_tags = [
                     dict(name=['object','embed','iframe','link']),
                     dict(name='div', attrs={'class':'galleryThumbs'}),
                     dict(name='div', attrs={'class':'paging1 articleCopyPaging'}),
                     dict(name='h2', attrs={'class':'clearfix contentGroup cgMedium'}),
                     dict(attrs={'class':['commentAddBox linkit','opts','comment','stories','articleSidebar1']})
                  ]
    remove_tags_after = dict(name='div', attrs={'class':'clearfix'})
    remove_attributes = ['height','width']


    def parse_index(self):
        totalfeeds = []
        soup   = self.index_to_soup(self.index)
        for maincontent in soup.findAll('div',attrs={'class':['storyList narrowStoryList','storyList wideStoryList','storyList otherStoryList']}):
           mfeed = []
           secttitle = 'Unknown'
           secthead = maincontent.find('h3')
	   if secthead:
	      secta = secthead.find('a',attrs={'class':'ir'})
	      if secta:
	        secttitle = self.tag_to_string(secta)
	      secta = secthead.find('span',attrs={'class':'ir'})
	      if secta:
	        secttitle = self.tag_to_string(secta)
           for sec in maincontent.findAll('li'):
              for itt in sec.findAll('a',href=True):
               url   = 'http://www.wired.co.uk' + itt['href']
               title = self.tag_to_string(itt)
               description = ''
               date  = strftime(self.timefmt)
               mfeed.append({
                                  'title'      :title
                                 ,'date'       :date
                                 ,'url'        :url
                                 ,'description':description
                                })
           totalfeeds.append((secttitle, mfeed))
        return totalfeeds

    def get_cover_url(self):
        cover_url = None
        soup = self.index_to_soup(self.index)
        cover_item = soup.find('meta', attrs={'property':'og:image'})
        if cover_item:
           cover_url = cover_item['content']
        return cover_url

    def print_version(self, url):
        return url + '?page=all'
Enjoy, even though there is a lot less on the web site these days.
jumaka is offline   Reply With Quote