MobileRead Forums - View Single Post - Custom recipes (archive, read-only)

elsuave · 07-03-2010, 06:34 PM

Hey guys, I just noticed that the recipe for O Estado de S. Paulo is broken (Calibre 0.7.7). I have attempted to update it; because I have limited coding experience (and know no python at all), the code may not be efficient (if a more experienced recipe-maker would like to double-check, I'd appreciate it).

It appears to be getting the job done, though.

Code:

Spoiler:

Code:

#!/usr/bin/env  python

__license__   = 'GPL v3'
__copyright__ = '2010, elsuave'
'''
estadao.com.br
'''

from calibre.web.feeds.news import BasicNewsRecipe

class Estadao(BasicNewsRecipe):
    title                 = 'O Estado de S. Paulo'
    __author__            = 'elsuave (modified from Darko Miletic)'
    description           = 'News from Brasil in Portuguese'
    publisher             = 'O Estado de S. Paulo'
    category              = 'news, politics, Brasil'
    oldest_article        = 2
    max_articles_per_feed = 25
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf8'
    cover_url             = 'http://www.estadao.com.br/img/logo_estadao.png'
    remove_javascript     = True

    html2lrf_options = [
                          '--comment', description
                        , '--category', category
                        , '--publisher', publisher
                        ]

    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'

    keep_only_tags = [
                          dict(name='div', attrs={'class':['bb-md-noticia','c5']})
                     ]

    remove_tags = [
                     dict(name=['script','object','form','ul'])
                    ,dict(name='div', attrs={'class':['fnt2 Color_04 bold','right fnt2 innerTop15 dvTmFont','™_01 right outerLeft15','tituloBox','tags']})
                    ,dict(name='div', attrs={'id':['bb-md-noticia-subcom']})
                  ]

    feeds = [
               (u'Manchetes Estadao', u'http://www.estadao.com.br/rss/manchetes.xml')
              ,(u'Ultimas noticias', u'http://www.estadao.com.br/rss/ultimas.xml')
              ,(u'Nacional', u'http://www.estadao.com.br/rss/nacional.xml')
              ,(u'Internacional', u'http://www.estadao.com.br/rss/internacional.xml')
              ,(u'Cidades', u'http://www.estadao.com.br/rss/cidades.xml')
              ,(u'Esportes', u'http://www.estadao.com.br/rss/esportes.xml')
              ,(u'Arte & Lazer', u'http://www.estadao.com.br/rss/arteelazer.xml')
              ,(u'Economia', u'http://www.estadao.com.br/rss/economia.xml')
              ,(u'Vida &', u'http://www.estadao.com.br/rss/vidae.xml')
            ]



    language = 'pt'

    def get_article_url(self, article):
        url = BasicNewsRecipe.get_article_url(self, article)
        if '/Multimidia/' not in url:
            return url