from calibre.web.feeds.news import BasicNewsRecipe, classes
from datetime import datetime
from calibre import browser


class HBR(BasicNewsRecipe):
    title = 'Harvard Business Review'
    __author__ = 'unkn0wn'
    description = (
        'Harvard Business Review is the leading destination for smart management thinking.'
        ' Through its flagship magazine, books, and digital content and tools published on HBR.org,'
        ' Harvard Business Review aims to provide professionals around the world with rigorous insights'
        ' and best practices to help lead themselves and their organizations more effectively and to make a positive impact.')
    language = 'en'
    use_embedded_content = False
    no_stylesheets = True
    remove_javascript = True
    masthead_url = 'http://hbr.org/resources/css/images/hbr_logo.svg'
    remove_attributes = ['height', 'width', 'style']
    encoding = 'utf-8'
    ignore_duplicate_articles = {'url'}
    extra_css = '''
        article-sidebar{ font-size:small; text-align:left; font-style:italic; }
        [close-caption]{ font-size:small; font-style:italic; text-align:center;}
        article-ideainbrief{ font-size:small; text-align:left; }
        '''

    keep_only_tags = [
        classes(
            'headline-container pub-date hero-image-content article-summary article-body standard-content'
        ),
        dict(name='article-sidebar'),
    ]

    remove_tags = [
        classes(
            'left-rail--container translate-message follow-topic newsletter-container '
        ),
    ]

    def parse_index(self):
        soup = self.index_to_soup('https://hbr.org/magazine')
        a = soup.find('a', href=lambda x: x and x.startswith('/archive-toc/'))
        url = a['href']
        self.log('Downloading issue:', url)
        cov_url = a.find('img', attrs={'src': True})['src']
        self.cover_url = 'https://hbr.org' + cov_url
        soup = self.index_to_soup('https://hbr.org' + url)
        ans = []

        for h3 in soup.findAll('h3', attrs={'class': 'hed'}):
            d = datetime.today()
            for a in h3.findAll(
                'a', href=lambda x: x.startswith('/' + d.strftime('%Y') + '/')
            ):
                title = self.tag_to_string(a)
                url = a['href']
                url = 'https://hbr.org' + url
            div = h3.find_next_sibling('div', attrs={'class': 'stream-item-info'})
            if div:
                auth = self.tag_to_string(div)
            dek = h3.find_next_sibling('div', attrs={'class': 'dek'})
            if dek:
                des = self.tag_to_string(dek)
            desc = des + ' |' + auth
                
            self.log('\t', title)
            self.log('\t', desc)
            self.log('\t\t', url)
                
            ans.append({
                'title': title,
                'url': url,
                'description': desc})
        return [('Articles', ans)]

    # HBR changes the content it delivers based on cookies, so the
    # following ensures that we send no cookies
    def get_browser(self, *args, **kwargs):
        return self

    def clone_browser(self, *args, **kwargs):
        return self.get_browser()

    def open_novisit(self, *args, **kwargs):
        br = browser()
        return br.open_novisit(*args, **kwargs)

    open = open_novisit


calibre_most_common_ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36'