MobileRead Forums - View Single Post

schuster · 05-23-2011, 03:53 PM

hi starson, I hope I can also ask this questions?

you are right. but i don't understand it.
i'm experimenting without success.

Code:

class AdvancedUserRecipe1305567197(BasicNewsRecipe):
    title          = u'Focus - test'
    __author__  = 'for_test'
    oldest_article = 20
    max_articles_per_feed = 10
    no_stylesheets         = True
    use_embedded_content   = False
    remove_javascript      = True
    

    def get_article_url(self, article):
        return article.get('id', article.get('guid', None))


    def append_page(self, soup, appendtag, position):
        pager = soup.find('a',attrs={'class':'nextPage greyButton'}) # here is pager
        if pager:
           nexturl = self.INDEX + pager.a['href']
           soup2 = self.index_to_soup(nexturl)
           texttag = soup2.find('div', attrs={'class':'textBlock'}) # here is text
           for it in texttag.findAll(style=True):
               del it['style']
           newpos = len(texttag.contents)
           self.append_page(soup2,texttag,newpos)
           texttag.extract()
           appendtag.insert(position,texttag)


    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        for item in soup.findAll('span', attrs={'class':'overhead'}): # here is bevor textblock
            item.extract()
        self.append_page(soup, soup.body, 3)
        pager = soup.find('div',attrs={'class':'pageCounter'}) # this is pager on next side
        if pager:
           pager.extract()
        return self.adeify_images(soup)


    feeds          = [	(u'Eilmeldungen', u'http://rss2.focus.de/c/32191/f/533875/index.rss'),
                                        (u'Wissen-News', u'http://rss2.focus.de/c/32191/f/533876/index.rss')]

# feed with multipage in "wissen-news":
# Ozonloch-Studie - Zwischen Euphorie und Hysterie

is this right? but i've got no luck to grab it.
it grabs only the normal pages, the multipages are lost.

greetings