View Single Post
Old 01-28-2011, 12:34 PM   #6
tolyluis
Enthusiast
tolyluis doesn't littertolyluis doesn't litter
 
Posts: 49
Karma: 196
Join Date: Jan 2011
Device: Kindle 3
20 Minutos (v0.8 ct)

A little changes is necesary in the code for optimal perfomance in testing mode using command ebook-export, no changes made in the "real" code, just has been erased some non-ascii characters.

SOURCE CODE

Code:
__license__   = 'GPL v3'
__author__    = 'Luis Hernandez'
__copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'


'''
www.20minutos.es
'''

class AdvancedUserRecipe1294946868(BasicNewsRecipe):

    title          = u'20 Minutos'
    publisher      = u'Grupo 20 Minutos'

    __author__            = 'Luis Hernandez'
    description           = 'Periodico gratuito independiente'
    cover_url     = 'http://estaticos.20minutos.es/mmedia/especiales/corporativo/css/img/logotipos_grupo20minutos.gif'

    oldest_article = 5
    max_articles_per_feed = 100

    remove_javascript = True
    no_stylesheets        = True
    use_embedded_content  = False

    encoding              = 'ISO-8859-1'
    language              = 'es'
    timefmt        = '[%a, %d %b, %Y]'

    keep_only_tags     = [
                                   dict(name='div', attrs={'id':['content','vinetas',]})
                                  ,dict(name='div', attrs={'class':['boxed','description','lead','article-content','cuerpo estirar']})
                                  ,dict(name='span', attrs={'class':['photo-bar']})
                                  ,dict(name='ul', attrs={'class':['article-author']})                                
                                ]

    remove_tags_before = dict(name='ul' , attrs={'class':['servicios-sub']})
    remove_tags_after  = dict(name='div' , attrs={'class':['related-news','col']})

    remove_tags = [
                     dict(name='ol', attrs={'class':['navigation',]})
                    ,dict(name='span', attrs={'class':['action']})
                    ,dict(name='div', attrs={'class':['twitter comments-list hidden','related-news','col','photo-gallery','calendario','article-comment','postto estirar','otras_vinetas estirar','kment','user-actions']})
                    ,dict(name='div', attrs={'id':['twitter-destacados','eco-tabs','inner','vineta_calendario','vinetistas clearfix','otras_vinetas estirar','MIN1','main','SUP1','INT']})
                    ,dict(name='ul', attrs={'class':['article-user-actions','stripped-list']})
                    ,dict(name='ul', attrs={'id':['site-links']})
                    ,dict(name='li', attrs={'class':['puntuacion','enviar','compartir']})
                       ]

    feeds = [
              (u'Portada'              , u'http://www.20minutos.es/rss/')
             ,(u'Nacional'             , u'http://www.20minutos.es/rss/nacional/')
             ,(u'Internacional'       , u'http://www.20minutos.es/rss/internacional/')
             ,(u'Economia'           , u'http://www.20minutos.es/rss/economia/')
             ,(u'Deportes'            , u'http://www.20minutos.es/rss/deportes/')
             ,(u'Tecnologia'          , u'http://www.20minutos.es/rss/tecnologia/')
             ,(u'Gente - TV'         , u'http://www.20minutos.es/rss/gente-television/')
             ,(u'Motor'                 , u'http://www.20minutos.es/rss/motor/')
             ,(u'Salud'                 , u'http://www.20minutos.es/rss/belleza-y-salud/')
             ,(u'Viajes'                , u'http://www.20minutos.es/rss/viajes/')
             ,(u'Vivienda'             , u'http://www.20minutos.es/rss/vivienda/')
             ,(u'Empleo'              , u'http://www.20minutos.es/rss/empleo/')
             ,(u'Cine'                  , u'http://www.20minutos.es/rss/cine/')
             ,(u'Musica'               , u'http://www.20minutos.es/rss/musica/')
             ,(u'Vinetas'              , u'http://www.20minutos.es/rss/vinetas/')
             ,(u'Comunidad20'     , u'http://www.20minutos.es/rss/zona20/')
            ]
Sorry for duplicating posts.
tolyluis is offline   Reply With Quote