#!/usr/bin/env  python
# License: GPLv3 Copyright: 2008, Kovid Goyal <kovid at kovidgoyal.net>

try:
    from http.cookiejar import Cookie
except ImportError:
    from cookielib import Cookie

import json
from html5_parser import parse
from lxml import etree

from calibre import replace_entities
from calibre.ebooks.BeautifulSoup import NavigableString, Tag
from calibre.utils.cleantext import clean_ascii_chars
from calibre.web.feeds.news import BasicNewsRecipe

def E(parent, name, text='', **attrs):
    ans = parent.makeelement(name, **attrs)
    ans.text = text
    parent.append(ans)
    return ans


def process_node(node, html_parent):
    ntype = node.get('type')
    if ntype == 'tag':
        c = html_parent.makeelement(node['name'])
        c.attrib.update({k: v or '' for k, v in node.get('attribs', {}).items()})
        html_parent.append(c)
        for nc in node.get('children', ()):
            process_node(nc, c)
    elif ntype == 'text':
        text = node.get('data')
        if text:
            text = replace_entities(text)
            if len(html_parent):
                t = html_parent[-1]
                t.tail = (t.tail or '') + text
            else:
                html_parent.text = (html_parent.text or '') + text


def load_article_from_json(raw, root):
    data = json.loads(raw)['props']['pageProps']['content']
    # open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True))
    if isinstance(data, list):
        data = data[0]
    body = root.xpath('//body')[0]
    for child in tuple(body):
        body.remove(child)
    article = E(body, 'article')
    E(article, 'h1', data['headline'], style='font-size: x-large')
    E(article, 'div', data['children'], style='font-style: normal')
    images = data['image']
    if 'main' in images:
        div = E(article, 'div')
        try:
            E(div, 'img', src=images['main']['url']['canonical'])
        except Exception:
            pass
    text = data['text']
    for node in text:
        process_node(node, article)


def classes(classes):
    q = frozenset(classes.split(' '))
    return dict(attrs={
        'class': lambda x: x and frozenset(x.split()).intersection(q)})


class NoArticles(Exception):
    pass

class Economist(BasicNewsRecipe):

    title = 'The Economist Espresso'
    language = 'en'

    __author__ = "Kovid Goyal"
    description = (
        'Espresso is a rich, full-flavoured shot of daily global analysis'
        ' from the editors of The Economist to get you up to speed, fast.'
         'Maximise your understanding of the most significant business, '
         'economic, political and cultural developments globally.'
    )
   
    no_stylesheets = True
    
    remove_attributes = ['data-reactid', 'width', 'height']
    # economist.com has started throttling after about 60% of the total has
    # downloaded with connection reset by peer (104) errors.
    #delay = 1
    
    extra_css = 'blockquote{ font-size:large; font-style:italic; }'
    
    keep_only_tags = [
            dict(name='main', attrs={'id':'content'}),
            classes('_quote-container'),
            
            ]
    remove_tags = [
        classes('_podcast-promo _newsletter-promo-container _time-last-updated _description')
    ]
        

    needs_subscription = False

    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        if self.output_profile.short_name.startswith('kindle'):
            # Reduce image sizes to get file size below amazon's email
            # sending threshold
            self.web2disk_options.compress_news_images = True
            self.web2disk_options.compress_news_images_auto_size = 5
            self.log.warn('Kindle Output profile being used, reducing image quality to keep file size below amazon email threshold')

    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        # Add a cookie indicating we have accepted Economist's cookie
        # policy (needed when running from some European countries)
        ck = Cookie(
            version=0,
            name='notice_preferences',
            value='2:',
            port=None,
            port_specified=False,
            domain='.economist.com',
            domain_specified=False,
            domain_initial_dot=True,
            path='/',
            path_specified=False,
            secure=False,
            expires=None,
            discard=False,
            comment=None,
            comment_url=None,
            rest={'HttpOnly': None},
            rfc2109=False
        )
        br.cookiejar.set_cookie(ck)
        br.set_handle_gzip(True)
        return br

    
    def parse_index(self):
        raw = self.index_to_soup('', raw=True)
        soup = self.index_to_soup(raw)
        ans = self.economist_parse_index(soup)
        if not ans:
            raise NoArticles(
                'Could not find any articles, either the '
                'economist.com server is having trouble and you should '
                'try later or the website format has changed and the '
                'recipe needs to be updated.'
            )
        return ans

    def economist_parse_index(self, soup):
        script_tag = soup.find("script", id="__NEXT_DATA__")
        if script_tag is not None:
            data = json.loads(script_tag.string)
        feeds = []
        articles = []
        feeds.append(('|Espresso', articles))
        articles.append({'title': 'The World in Brief', 'url': 'https://www.economist.com/the-world-in-brief', 'description' : 'Catch up quickly on the global stories that matter'})
        return feeds
        
        
calibre_most_common_ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36'