View Single Post
Old 11-07-2010, 12:14 AM   #1
KRorschachZ
Update
KRorschachZ doesn't litterKRorschachZ doesn't litterKRorschachZ doesn't litter
 
KRorschachZ's Avatar
 
Posts: 100
Karma: 212
Join Date: Nov 2010
Device: kindle DX graphite, we need update/firmware with optios PDF Kindle 3
Example recipe RSS TV programs list channels future

example of the powerfull software CALIBRE and rss for television programming.

Kindle's images capture

Spoiler:

Code:
class AdvancedUserRecipe1289098587(BasicNewsRecipe):
    title          = u'GUIA PROGRAMACION TV ejemplo-example'
                               # 2 & 100 normal
    oldest_article = 1
    max_articles_per_feed = 150 

    __author__  = ' KRorschachZ.'
    description = 'Tv rss'
    timefmt = ' [%d %b, %Y]'
    language = 'es_ES'
    no_stylesheets = True
    remove_javascript = True

    extra_css              = ' .txt_articulo{ font-family: sans-serif; font-size: medium; text-align: justify } .contentheading{font-family: serif; font-size: large; font-weight: bold; color: #000000; text-align: center}'

    feeds          = [(u'Noticias TV', u'http://www.sincroguia.tv/rss/rss.php?types=news'), (u'TVE 1', u'http://www.miguiatv.com/rss/tve1.xml'), (u'TVE 2', u'http://www.miguiatv.com/rss/la2.xml'),(u'ANT 3', u'http://www.miguiatv.com/rss/antena3.xml'), (u'Cuatro TV', u'http://www.miguiatv.com/rss/cuatro.xml'), (u'Tele 5', u'http://www.miguiatv.com/rss/telecinco.xml'), (u'La Sexta', u'http://www.miguiatv.com/rss/la-sexta.xml'), (u'Peliculas', u'http://www.laguiatv.com/rss/feeds/peliculas.xml')]

    remove_tags_before = dict(id='article')
    remove_tags_after  = dict(id='article')
    remove_tags = [dict(attrs={'class':[
                            'articleFooter',
                            'articleTools',
                            'columnGroup doubleRule',
                            'columnGroup singleRule',
                            'columnGroup last',
                            'columnGroup  last',
                            'doubleRule',
                            'dottedLine',
                            'entry-meta',
                            'entry-response module',
                            'icon enlargeThis',
                            'leftNavTabs',
                            'module box nav',
                            'nextArticleLink',
                            'nextArticleLink clearfix',
                            'post-tools',
                            'relatedSearchesModule',
                            'side_tool',
                            'singleAd',
                            'subNavigation clearfix',
                            'subNavigation tabContent active',
                            'subNavigation tabContent active clearfix',
                            ]}),
                   dict(id=[
                            'adxLeaderboard',
                            'archive',
                            'articleExtras',
                            'articleInline',
                            'blog_sidebar',
                            'businessSearchBar',
                            'cCol',
                            'entertainmentSearchBar',
                            'footer',
                            'header',
                            'header_search',
                            'login',
                            'masthead',
                            'masthead-nav',
                            'memberTools',
                            'navigation',
                            'portfolioInline',
                            'relatedArticles',
                            'respond',
                            'side_search',
                            'side_index',
                            'side_tool',
                            'toolsRight',
                            ]),
                   dict(name=['script', 'noscript', 'style'])]

    remove_tags        = [
                             dict(name=['object','link','script','ul','iframe','ol'])
                            ,dict(name='span', attrs={'class':['article-section']})
                            ,dict(name='span', attrs={'class':['content_rating']})
                            ,dict(name='span', attrs={'class':['content_vote']})
                            ,dict(name='span', attrs={'class':['createby']})
                            ,dict(name='div', attrs={'class':['dialog']})
                            ,dict(name='div', attrs={'id':['jcWrapper']})
                            ,dict(name='div', attrs={'class':['buttonheading']})
                            ,dict(name='div', attrs={'class':['authordetails']})
                            ,dict(name='table', attrs={'class':['pagenav']})
                            ,dict(name='div', attrs={'id':['jc_commentFormDiv']})
                         ]

    remove_tags_after = dict(name='div', attrs={'id':'sidebar'})

    remove_attributes = ['width','height']

    extra_css              = '''
                                h2{font-family: serif; font-size: small; font-weight: bold; color: #000000; text-align: justify}
                                '''

    remove_tags        = [
                             dict(name=['object','link','script','ul'])
                            ,dict(name='div', attrs={'id':['scrAdSense','herramientas2','participacion','participacion2','bloque1resultados','bloque2resultados','cont_vinyetesAnt','tinta','noticiasSuperior','cintillopublicidad2']})
                            ,dict(name='p', attrs={'class':['masinformacion','hora']})
                            ,dict(name='a', attrs={'class':["'link'"]})
                            ,dict(name='div', attrs={'class':['addthis_toolbox addthis_default_style','firma','pretitularnoticia']})
                            ,dict(name='form', attrs={'id':['formularioDeBusquedaAvanzada']})
                         ]

    def preprocess_html(self, soup):
            for item in soup.findAll(style=True):
               del item['style']
#	 del item['link']
            return soup


The recipe need calls for "optimization", but the important thing is somthing to start "...

interesing on insert in version future calibre for country selection... example, the seven next days for tv program

obviously "date" filter has a curious effect in this case, anyone interested "in time what aired on TV the last 5 days before ;-) the top of articles should be high in this case, because "every" program is considered an entry...

Keep a copy for me when we manage to finish among all that...


The example is for Freeview TV Spain, but is applicable to other countries "less" advanced ;-) /

regards, from Spain.
Attached Thumbnails
Click image for larger version

Name:	screen_shot-17290.gif
Views:	420
Size:	13.6 KB
ID:	60755   Click image for larger version

Name:	screen_shot-17287.gif
Views:	420
Size:	26.4 KB
ID:	60756   Click image for larger version

Name:	screen_shot-17298.gif
Views:	399
Size:	18.6 KB
ID:	60757   Click image for larger version

Name:	screen_shot-17299.gif
Views:	393
Size:	38.7 KB
ID:	60758   Click image for larger version

Name:	screen_shot-17304.gif
Views:	414
Size:	66.6 KB
ID:	60759  

Last edited by KRorschachZ; 11-07-2010 at 12:20 AM.
KRorschachZ is offline   Reply With Quote