View Single Post
Old 05-03-2010, 04:21 PM   #1885
Tumaini
Junior Member
Tumaini began at the beginning.
 
Posts: 8
Karma: 10
Join Date: May 2010
Device: Bebook One (Hanlin v3)
Here are recipes for two Swedish news networks:

Ekot (NOTE - Ekot changed their format so this script probably won't work):
Code:
class Ekot_SE(BasicNewsRecipe):
    title                 = 'Ekot'
    __author__            = 'Joakim Lindskog'
    description           = 'Nyheter från Ekot'
    publisher             = 'Ekot'
    category              = 'news, politics, Sweden'
    oldest_article        = 7
    delay                 = 1
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
    language              = 'sv'

    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }

    keep_only_tags = [dict(name='h1', attrs={'class':'newsH2'}),
                               dict(name='div', attrs={'class':'articleTop'}),
                               dict(name='div', attrs={'class':'newsIntro'}),
                               dict(name='div', attrs={'class':'newsText'})]
    remove_tags = [
                     dict(name=['object','link','base'])
                    ,dict(name='span',attrs={'class':'relLink'})
                  ]

    feeds          = [(u'Ekot', u'http://api.sr.se/api/rssfeed/rssfeed.aspx?rssfeed=83'),
                          (u'Utrikes', u'http://api.sr.se/api/rssfeed/rssfeed.aspx?rssfeed=3304'),
                          (u'Radiosporten', u'http://api.sr.se/api/rssfeed/rssfeed.aspx?rssfeed=179')]

    def print_version(self, url):
        return url.replace('http://sverigesradio.se/cgi-bin/ekot/artikel.asp', 'http://sverigesradio.se/cgi-bin/isidorpub/PrinterFriendlyArticle.asp')+'&ProgramID=83'
Fria Tidningen (all categories, works great):
Code:
class FriaTidningen_SE(BasicNewsRecipe):
    title          = u'Fria Tidningen'
    __author__            = 'Joakim Lindskog'
    description           = 'Nyheter från Fria Tidningen'
    publisher             = 'Fria Tidningen'
    category              = 'news, politics, Sweden'
    oldest_article        = 7
    delay                 = 1
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'utf-8'
    language              = 'sv'

    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }

    keep_only_tags = [dict(name='div', attrs={'id':'content-area'})]
    remove_tags_before = dict(name='div', attrs={'id':'content-area'})
    remove_tags_after = dict(name='div',attrs={'id':'byline'})
    remove_tags = [
                     dict(name=['object','link','base']),
                     dict(name='div', attrs={'id':'comments'}),
                     dict(name='div', attrs={'id':'block-block-21'}),
                     dict(name='div', attrs={'id':'block-block-22'}),
                     dict(name='div', attrs={'id':'block-block-23'}),
                     dict(name='div', attrs={'id':'block-block-24'}),
                     dict(name='div', attrs={'id':'block-block-25'}),
                     dict(name='div', attrs={'id':'block-block-26'}),
                     dict(name='div', attrs={'id':'block-block-27'}),
                     dict(name='div', attrs={'id':'block-block-28'}),
                     dict(name='div', attrs={'id':'block-block-29'}),
                     dict(name='div', attrs={'id':'block-block-30'}),
                     dict(name='div', attrs={'id':'block-block-40'})
                  ]

    feeds          = [(u'Allt', u'http://www.fria.nu/feed'),
                          (u'Nyheter', u'http://www.fria.nu/taxonomy/term/13/feed/feed'),
                          (u'Inrikes', u'http://www.fria.nu/taxonomy/term/14/0/feed'),
                          (u'Utrikes', u'http://www.fria.nu/taxonomy/term/15/0/feed'),
                          (u'Ekonomi', u'http://www.fria.nu/taxonomy/term/27047/0/feed'),
                          (u'Opinion', u'http://www.fria.nu/taxonomy/term/22/0/feed'),
                          (u'Inledaren', u'http://www.fria.nu/taxonomy/term/24/0/feed'),
                          (u'Argument', u'http://www.fria.nu/taxonomy/term/23/0/feed'),
                          (u'Synpunkten', u'http://www.fria.nu/taxonomy/term/26/0/feed'),
                          (u'Debatt', u'http://www.fria.nu/taxonomy/term/25/0/feed'),
                          (u'Kultur', u'http://www.fria.nu/taxonomy/term/19/0/feed'),
                          (u'Kulturnyheter', u'http://www.fria.nu/taxonomy/term/24534/0/feed'),
                          (u'Recensioner', u'http://www.fria.nu/taxonomy/term/24535/0/feed'),
                          (u'BAK', u'http://www.fria.nu/taxonomy/term/27/0/feed'),
                          (u'Sport & Hälsa' u'http://www.fria.nu/taxonomy/term/27215/0/feed'),
                          (u'Sport', u'http://www.fria.nu/taxonomy/term/20/0/feed'),
                          (u'Hälsa', u'http://www.fria.nu/taxonomy/term/21/0/feed'),
                          (u'Fördjupning', u'http://www.fria.nu/taxonomy/term/24994/0/feed'),
                          (u'Fokus', u'http://www.fria.nu/taxonomy/term/24864/0/feed'),
                          (u'Samtal', u'http://www.fria.nu/taxonomy/term/28/0/feed'),
                          (u'Stockholm', u'http://www.fria.nu/taxonomy/term/122/0/feed'),
                          (u'Göteborg', u'http://www.fria.nu/taxonomy/term/73/0/feed'),
                          (u'Uppsala', u'http://www.fria.nu/taxonomy/term/27324/0/feed'),
                          (u'Malmö', u'http://www.fria.nu/taxonomy/term/28031/0/feed')]
Many thanks to Darko Miletic from who's recipes I borrowed code and of course to Kovid Goyal!

Last edited by Tumaini; 05-05-2010 at 10:25 AM.
Tumaini is offline