View Single Post
Old 03-07-2011, 05:04 AM   #2
miwie
Connoisseur
miwie began at the beginning.
 
Posts: 76
Karma: 12
Join Date: Nov 2010
Device: Android, PB Pro 602
How is this first shot?

Code:
'''
www.elpais.com/suple/babelia/
'''

from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe

class ElPaisSemanal(BasicNewsRecipe):
    title                 = 'El Pais Babelia'
    description           = 'Suplemento semanal de El Pais'
    publisher             = 'EL PAIS S.L.'
    category              = 'news, politics, Spain'
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
    language              = 'es'
    publication_type      = 'magazine'    

    # El Cigala
    # cover_url = 'http://pixhost.info/avaxhome/e8/27/001527e8_medium.jpeg'

    masthead_url          = 'http://www.elpais.com/im/tit_logo_int.gif'
    index                 = 'http://www.elpais.com/suple/babelia/'

    extra_css             = ' p{text-align: left} body{ text-align: left; font-family: Georgia,"Times New Roman",Times,serif } h2{font-family: Arial,Helvetica,sans-serif} img{margin-bottom: 0.4em} '

    conversion_options = {
                          'comment'      : description
                        , 'tags'         : category
                        , 'publisher'    : publisher
                        , 'language'     : language
                        }

    remove_attributes=['width','height']
    remove_tags=[dict(name='div', attrs={'id':'votosC'}),
	dict(name='div', attrs={'class':'votos'}),
	dict(name='div', attrs={'class':'rec'}),
	dict(name='div', attrs={'class':'rec rec-list'}),
	dict(name='div', attrs={'class':'rec rec-twitter'}),
	dict(name='div', attrs={'class':'rec rec-fbook'})
	]

    remove_tags_before = dict(name='div', attrs={'class':'estructura_2col'})
    remove_tags_after  = [dict(name='div', attrs={'id':'utilidades'}),
	dict(name='div', attrs={'id':'votosD'}),
	dict(name='div', attrs={'id':'mod_util'})
	]

    def parse_index(self):
        articles = []
        soup = self.index_to_soup(self.index)
        for item in soup.findAll('a',attrs={'class':['g19i003','g17r003','g17i003']}):
            description = ''
            title_prefix = ''
            feed_link = item
            if item.has_key('href'):
                url   = 'http://www.elpais.com' + item['href'].rpartition('/')[0]
                title = title_prefix + self.tag_to_string(feed_link)
                date  = strftime(self.timefmt)
                articles.append({
                                  'title'      :title
                                 ,'date'       :date
                                 ,'url'        :url
                                 ,'description':description
                                })
        return [(soup.head.title.string, articles)]

    def print_version(self, url):
	pr_url = url + '?print=1'
        return pr_url
miwie is offline   Reply With Quote