hi,
i have made a recipe for the latest news from local police.
it is working good but i have the entries double in the mobi file.
don't know why, could anyone help me?
Code:
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
class AdvancedUserRecipe(BasicNewsRecipe):
title = u'polizei_test'
__author__ = u'schuster'
masthead_url = 'http://www.presseportal.de/showbin.htx?id=65851&type=logo'
cover_url = 'http://www.polizei-nrw.de/rhein-kreis-neuss/stepone/data/images/95/02/00/200_rheinkreis_neuer_stern.jpg'
language = 'DE'
INDEX = 'http://www.presseportal.de/polizeipresse/p_story.htx?search=grevenbroich&firmaid=65851'
remove_tags = [
dict(name='div', attrs={'id':'logo'}),
dict(name='div', attrs={'id':'origin'}),
dict(name='pre', attrs={'class':'xml_contact'})]
no_stylesheets = True
remove_javascript = True
def parse_index(self):
articles = []
soup = self.index_to_soup(self.INDEX)
cover = None
feeds = []
for section in soup.findAll('div', attrs={'class':'storylist_item'}):
section_title = self.tag_to_string(section.find(name='h3', attrs={'class':'title'}))
articles = []
for post in section.findAll('a', href=True):
url = post['href']
if url.startswith('/polizeipresse/pm/65851/2'):
url = 'http://www.presseportal.de'+url
title = self.tag_to_string(post)
if str(post).find('class=') > 0:
klass = post['class']
if klass != "":
self.log()
self.log('--> post: ', post)
self.log('--> url: ', url)
self.log('--> title: ', title)
self.log('--> class: ', klass)
articles.append({'title':title, 'url':url})
if articles:
feeds.append((section_title, articles))
return feeds
def print_version(self,url):
segments = url.split('/')
printURL = '/'.join(segments[:3]) + '/print.htx?nr=' + '/'.join(segments[6:7]) + '&type=polizei'
return printURL
solved: forget to set "if klass != "more":"