MobileRead Forums - View Single Post - Custom recipes (archive, read-only)

dongdong · 11-14-2009, 02:11 AM

Hi all

Anyone has the recipe for the subscriber version on The Straits Times (Digital Straits Times).

http://www.straitstimes.com/The+Prin...t+Edition.html

I tried to modify the exciting recipe for The Straits Times, managed to download the headlines, but when i click on the downloaded headlines, the ebook viewer showed that I need to subscribe.

Can anyone englighten what is missing? I have no programming knowledege :P

Code:

#!/usr/bin/env  python

__license__   = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
www.straitstimes.com
'''

from calibre.web.feeds.news import BasicNewsRecipe

class DigitalStraitsTimes(BasicNewsRecipe):
    title                  = 'Digital Straits Times'
    __author__             = 'Darko Miletic'
    description            = 'Singapore newspaper'
    oldest_article         = 2
    max_articles_per_feed  = 100
    no_stylesheets         = True
    use_embedded_content   = False
    encoding               = 'cp1252'
    publisher              = 'Singapore Press Holdings Ltd.'
    category               = 'news, politics, singapore, asia'
    language               = 'en'
    extra_css              = ' .top_headline{font-size: x-large; font-weight: bold} '

    conversion_options = {
                             'comments'  : description
                            ,'tags'      : category
                            ,'language'  : language
                            ,'publisher' : publisher
                         }
  
    needs_subscription    = True
    simultaneous_downloads= 5
    delay                 = 0
    LOGIN = 'http://sphreg.asiaone.com/RegAuth2/stpLogin.html?goto=http://www.straitstimes.com/vgn-ext-templating/sti/common/STIRedirect.jsp'
    
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
            br.open(self.LOGIN)
            br.select_form(name='loginForm')
            br['j_username'] = self.username
            br['j_password'] = self.password
            br.submit()
        return br

    remove_tags = [dict(name=['object','link','map'])]

    keep_only_tags = [dict(name='div', attrs={'class':['top_headline','story_text']})]

        
    feeds = [ 
               (u'Most Read Stories'         , u'http://www.straitstimes.com/STI/STIFILES/rss/mostreadstories.xml'        ) 
              ,(u'Top Stories'         , u'http://www.straitstimes.com/STI/STIFILES/rss/prime.xml'        ) 
              ,(u'Singapore'       , u'http://www.straitstimes.com/STI/STIFILES/rss/singapore.xml'      ) 
              ,(u'Asia', u'http://www.straitstimes.com/STI/STIFILES/rss/asia.xml') 
            ]

    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup