hi starson, I hope I can also ask this questions?
you are right. but i don't understand it.
i'm experimenting without success.
Code:
class AdvancedUserRecipe1305567197(BasicNewsRecipe):
title = u'Focus - test'
__author__ = 'for_test'
oldest_article = 20
max_articles_per_feed = 10
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
def get_article_url(self, article):
return article.get('id', article.get('guid', None))
def append_page(self, soup, appendtag, position):
pager = soup.find('a',attrs={'class':'nextPage greyButton'}) # here is pager
if pager:
nexturl = self.INDEX + pager.a['href']
soup2 = self.index_to_soup(nexturl)
texttag = soup2.find('div', attrs={'class':'textBlock'}) # here is text
for it in texttag.findAll(style=True):
del it['style']
newpos = len(texttag.contents)
self.append_page(soup2,texttag,newpos)
texttag.extract()
appendtag.insert(position,texttag)
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('span', attrs={'class':'overhead'}): # here is bevor textblock
item.extract()
self.append_page(soup, soup.body, 3)
pager = soup.find('div',attrs={'class':'pageCounter'}) # this is pager on next side
if pager:
pager.extract()
return self.adeify_images(soup)
feeds = [ (u'Eilmeldungen', u'http://rss2.focus.de/c/32191/f/533875/index.rss'),
(u'Wissen-News', u'http://rss2.focus.de/c/32191/f/533876/index.rss')]
# feed with multipage in "wissen-news":
# Ozonloch-Studie - Zwischen Euphorie und Hysterie
is this right? but i've got no luck to grab it.
it grabs only the normal pages, the multipages are lost.
greetings