#!/usr/bin/env python
__license__   = 'GPL v3'
__copyright__ = ''
'''
Fetch RSS-Feeds spektrum.de
'''
def classes(classes):
    q = frozenset(classes.split(' '))
    return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})


from calibre.web.feeds.recipes import BasicNewsRecipe
class Spektrum(BasicNewsRecipe):
    title                 = u'Spektrum der Wissenschaft'
    __author__            = 'Armin Geller, Bratzzo, Rainer Zenz' # Update AGE 2014-02-25, UDe 2018-02-22
    description           = u'German  online portal of Spektrum der Wissenschaft'
    publisher             = 'Spektrum der Wissenschaft Verlagsgesellschaft mbH'
    category              = 'science news, Germany'
    max_articles_per_feed = 100
    no_stylesheets        = True
    remove_javascript     = True
    remove_empty_feeds    = True
    language              = 'de'
    encoding              = 'utf8'
    ignore_duplicate_articles = {'title'}


    feeds = [
              (u'Spektrum.de', u'http://www.spektrum.de/alias/rss/spektrum-de-rss-feed/996406'),
             ]

    keep_only_tags = [
                        dict(name='article', attrs={'class':'content'}),
                      ]

    remove_tags = [ classes('hide-for-print'),classes('content__meta'),classes('content__author'),classes('content__video'),
                    dict(name='div', attrs={'role':'navigation'}),
                    dict(name='span', attrs={'class':'sr-only'}),
                  ]
    def parse_feeds(self):
        # Call parent's method.
        feeds = BasicNewsRecipe.parse_feeds(self)
        # Loop through all feeds.
        for feed in feeds:
            # Loop through all articles in feed.
            for article in feed.articles[:]:
                if 'VIDEO' in article.title:
                    feed.articles.remove(article)
                # Remove articles with '..' in the url.
                elif 'podcast' in article.url:
                    feed.articles.remove(article)
                elif 'video' in article.url:
                    feed.articles.remove(article)
                elif 'rezension' in article.url:
                    feed.articles.remove(article)

        return feeds

    def preprocess_html(self, soup):
        for noscript in soup.findAll('noscript'):
            noscript.name = 'div'
        return soup
    
 
