#!/usr/bin/env python
##
##

__license__   = 'GPL v3'
__copyright__ = 'Copyright 2011 Starson17'
'''
engadget.com
'''

from calibre.web.feeds.news import BasicNewsRecipe


class Engadget(BasicNewsRecipe):
    title                 = u'Engadget'
    __author__            = 'Starson17, modified by epubli'
    __version__           = 'v1.00'
    __date__              = '08, Feb 2021'
    description           = 'Tech news'
    language              = 'en'
    oldest_article        = 7
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    remove_javascript     = True
    remove_empty_feeds    = True
    compress_news_images = True
    scale_news_images_to_device = True
    remove_attributes = ['class']

    keep_only_tags = [ 
        dict(name='figure'),
        dict(name='div', attrs={'data-component':'ArticleHeader'}),
        dict(name='div', attrs={'class':['article-text','article-text c-gray-1 no-review']})
    ]
    remove_tags = [
        dict(name='div', attrs={'data-component':'ArticleAuthorInfo'}),
    	dict(name='span', attrs={'class':'c-gray-7'})
    ]

    feeds = [(u'Posts', u'https://www.engadget.com/rss.xml')]

    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:small;}
                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
    '''

    def preprocess_raw_html(self, raw, url):
    	# remove sponsored articles and daily article with summaries of previous articles
    	unwanted_article_keywords = [
    		'made possible by our sponsor', 
    		'The Morning After'
    	]
	for keyword in unwanted_article_keywords:
		test=raw.find(keyword)
		if test<>-1:
			self.abort_article('Skipping unwanted article')
        return raw