#!/usr/bin/env python
##
## Written:      March 2020
## Version:      1.1
## Last update:  2023-03-31
##
from __future__ import unicode_literals, division, absolute_import, print_function

'''
Fetch RSS-Feeds from saechsische.de
'''

from calibre.web.feeds.news import BasicNewsRecipe

def classes(classes):
    q = frozenset(classes.split(' '))
    return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})

		

class Saechsische(BasicNewsRecipe):
    title = 'Saechsische Zeitung'
    __author__ = 'epubli'
    description = 'RSS-Feeds von saechsische.de'
    publisher = 'SZ'
    publication_type = 'newspaper'
    language = 'de'
    encoding = 'UTF-8'
    oldest_article = 1
    max_articles_per_feed = 40
    no_stylesheets = True
    remove_javascript = True
    remove_empty_feeds = True
    compress_news_images = True
    compress_news_images_auto_size = 8
    scale_news_images_to_device = True
    delay = 1
    ignore_duplicate_articles = {'title', 'url'}

    cover_url             = 'https://www.saechsische.de/img/logo.svg'

    feeds = [
        ('Dresden', 'feed://www.saechsische.de/rss/dresden'), 
        ('Sachsen', 'feed://saechsische.de/rss/sachsen'), 
        ('Deutschland und Welt', 'feed://www.saechsische.de/rss/deutschland-welt'), 
        ('Politik', 'feed://www.saechsische.de/rss/politik'), 
        ('Wirtschaft', 'feed://www.saechsische.de/rss/wirtschaft'), 
        ('Feuilleton', 'feed://www.saechsische.de/rss/feuilleton'), 
        ('Sport', 'feed://www.saechsische.de/rss/sport'), 
        #('Dynamo', 'feed://www.saechsische.de/rss/dynamo'), 
        #('Bautzen', 'feed://www.saechsische.de/rss/bautzen'), 
        #('Bischofswerda', 'feed://www.saechsische.de/rss/bischofswerda'), 
        #('Dippoldiswalde', 'feed://www.saechsische.de/rss/dippoldiswalde'), 
        #('Döbeln', 'feed://www.saechsische.de/rss/doebeln'), 
        #('Freital', 'feed://www.saechsische.de/rss/freital'), 
        #('Großenhain', 'feed://www.saechsische.de/rss/grossenhain'), 
        #('Görlitz', 'feed://www.saechsische.de/rss/goerlitz'), 
        #('Kamenz', 'feed://www.saechsische.de/rss/kamenz'), 
        #('Löbau', 'feed://www.saechsische.de/rss/loebau'), 
        #('Meißen', 'feed://www.saechsische.de/rss/meissen'), 
        #('Niesky', 'feed://www.saechsische.de/rss/niesky'), 
        #('Pirna', 'feed://www.saechsische.de/rss/pirna'), 
        #('Radeberg', 'feed://www.saechsische.de/rss/radeberg'), 
        #('Radebeul', 'feed://www.saechsische.de/rss/radebeul'), 
        #('Riesa', 'feed://www.saechsische.de/rss/riesa'), 
        #('Sebnitz', 'feed://www.saechsische.de/rss/sebnitz'), 
        #('Zittau', 'feed://www.saechsische.de/rss/zittau'),
    ]

    template_css = '\n .article_date {\n color: gray; font-family: monospace;\n}\n\n .article_description {\n text-indent: 0pt;\n }\n\n a.article {\n font-weight: bold; text-align:left;\n }\n\n a.feed {\n font-weight: bold;\n }\n\n .calibre_navbar {\n font-size: 200% !important;\n }\n '
    
    extra_css = '''
        h2 {margin-top: 0em;}
    '''
    keep_only_tags = [
                        dict(name='article', attrs={'class':'article-detail'}),
                      ]

    remove_tags = [ classes('article-fill'),
                    dict(name='div', attrs={'class':'related-articles'}),
                    dict(name='a', attrs={'class':'article-remember-link'}),
                    dict(name='a', attrs={'href':'https://www.saechsische.de/dresden'}),
                    dict(name='a', attrs={'href':'https://www.saechsische.de/content/newsletter-lp?utm_content=dresden_kompakt'}),
                    dict(name='div', attrs={'class':'article-detail-socials'}),
                    dict(name='div', attrs={'class':'d-desktop-none'}),
                    dict(name='div', attrs={'class':'floating-share-icon'}),
                      ]

    def parse_feeds(self):
      # Call parent's method.
      feeds = BasicNewsRecipe.parse_feeds(self)
      # Loop through all feeds.
      for feed in feeds:
        # Loop through all articles in feed.
        for article in feed.articles[:]:
          # Remove articles with '...' in the url.
          if '/anzeige/' in article.url:
            print('Removing:',article.title)
            feed.articles.remove(article)
          elif 'newsletter-dresden' in article.url:
            print('Removing:',article.title)
            feed.articles.remove(article)
          # Remove articles with '...' in the title.
          elif 'Newsblog' in article.title:
              print('Removing:',article.title)
              feed.articles.remove(article)
          elif 'Podcast' in article.title:
              print('Removing:',article.title)
              feed.articles.remove(article)

      return feeds

    def preprocess_raw_html(self, raw, url):
      # remove Newsblogs, articles requiring login and advertisements
      unwanted_article_keywords = ['unser Newsblog', 'Zum Login', '00:00 Uhr',]
      for keyword in unwanted_article_keywords:
          if keyword in raw:
            print('Skipping unwanted article with keyword(s):',keyword)
            self.abort_article('Skipping unwanted article')
      return raw      

