MobileRead Forums - View Single Post - Custom recipes (archive, read-only)

TonytheBookworm · 09-02-2010, 01:28 PM

Been looking at the AventureGamer code and I have a few questions.

Spoiler:

and here is my painful attempt

Spoiler:

Code:

from calibre.web.feeds.news import BasicNewsRecipe

class AdvancedUserRecipe1282101454(BasicNewsRecipe):
    title = 'How Stuff Works'
    language = 'en'
    __author__ = 'TonytheBookworm'
    description = 'How stuff works'
    publisher = 'Tony'
    category = 'information'
    oldest_article = 10
    max_articles_per_feed = 100
    no_stylesheets = True
    #INDEX                 = u'http://www.adventuregamers.com'
    #extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
    #masthead_url = 'http://gawand.org/wp-content/uploads/2010/06/ajc-logo.gif'
    #keep_only_tags    = [
     #                    dict(name='div', attrs={'class':['blogEntryHeader','blogEntryContent']})
      #                 ,dict(attrs={'id':['cxArticleText','cxArticleBodyText']})
      #                  ]
    feeds          = [
                      ('AutoStuff', 'http://feeds.feedburner.com/HowstuffworksAutostuffDailyRssFeed'),
                      
                    ]

   
        
        
    def append_page(self, soup, appendtag, position):
        pager = soup.find('div',attrs={'class':'pagination'})
        if pager:
           nexturl = pager.a['href']
           soup2 = self.index_to_soup(nexturl)
           texttag = soup2.find('div', attrs={'class':'content'})
           for it in texttag.findAll(style=True):
               del it['style']
           newpos = len(texttag.contents)          
           self.append_page(soup2,texttag,newpos)
           texttag.extract()
           appendtag.insert(position,texttag)     

    def preprocess_html(self, soup):
       mtag = '<meta http-equiv="Content-Language" content="en-US"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
       soup.head.insert(0,mtag)    
       for item in soup.findAll(style=True):
           del item['style']
       self.append_page(soup, soup.body, 3)
       pager = soup.find('div',attrs={'class':'toolbar_fat'})
       if pager:
          pager.extract()        
        return soup