
'''
economictimes.indiatimes.com
'''

import re
from calibre.web.feeds.news import BasicNewsRecipe, classes


class TheEconomicTimes(BasicNewsRecipe):
    title = 'The Economic Times | Print Edition'
    __author__ = 'unkn0wn'
    description = 'Financial news from India.'
    publisher = 'economictimes.indiatimes.com'
    category = 'news, finances, politics, India'
    no_stylesheets = True
    use_embedded_content = False
    encoding = 'utf-8'
    language = 'en_IN'
    remove_attributes = ['style', 'height', 'width']
    publication_type = 'newspaper'
    masthead_url = 'https://upload.wikimedia.org/wikipedia/commons/9/98/The_Economic_Times_logo.svg'
    ignore_duplicate_articles = {'title', 'url'}
    extra_css = '''
    .summary {color:#404040; font-style:italic;}
    time{font-size:small;}
    '''

    def get_cover_url(self):
        from datetime import date
        yr = str(date.today().year)
        mn = date.today().strftime('%m')
        dy = date.today().strftime('%d')
        cover = 'https://asset.harnscloud.com/PublicationData/ET/etbg/'\
         + yr + '/' + mn + '/' + dy + '/Page/' + dy + '_' + mn + '_' + yr + '_001_etbg.jpg'
        self.log('cover_url ', cover)
        br = BasicNewsRecipe.get_browser(self)
        try:
            br.open(cover)
        except:
            self.log("\nCover unavailable")
            cover = None
        return cover

    keep_only_tags = [
        dict(name='h1'),
        classes(
            'artByline artSyn artImg artText publisher publish_on slideshowPackage'
        ),
    ]
    remove_tags = [
        classes(
            'story_title storyCollection shareBar sr_widget_free jsSrWidgetFree srwidgetfree_3'
            ' sr_paid jsSrWidgetPaid ar_wrp arwd_ld_chk adBox custom_ad mgid orn_free_r bold'
        ),
    ]

    def parse_index(self):
        soup = self.index_to_soup(
            'https://economictimes.indiatimes.com/print_edition.cms'
        )
        date = soup.find(**classes('labelDate'))
        if date:
            self.timefmt = ' [' + self.tag_to_string(date).strip() + ']'
            self.log(self.timefmt)
        ans = self.et_parse_index(soup)
        return ans

    def et_parse_index(self, soup):
        feeds = []
        for section in soup.findAll('div', attrs={'class': 'printBox'}):
            h2 = section.find('h2')
            sec = self.tag_to_string(h2)
            secname = re.sub(r'[0-9]', '', sec)
            self.log(secname)
            articles = []
            for h3 in section.findAll(("h1", "h3", "h4", "h5")):
                span = h3.find(
                    'span',
                    href=lambda x: x and x.startswith('/epaper/'),
                    attrs={'class': 'banner'}
                )
                url = span['href']
                url = 'https://economictimes.indiatimes.com' + url
                title = self.tag_to_string(span)
                div = h3.find_next_sibling('div', attrs={'class': 'dsc'})
                if div is not None:
                    desc = self.tag_to_string(div)
                articles.append({'title': title, 'url': url, 'description': desc})
                self.log('\t', title)
                self.log('\t', desc)
                self.log('\t\t', url)
            if articles:
                feeds.append((secname, articles))
        return feeds

    def preprocess_html(self, soup):
        h2 = soup.find(**classes('summary'))
        if h2:
            h2.name = 'p'
        for image in soup.findAll('img', attrs={'src': True}):
            image['src'] = image['src'].replace("width-300", "width-640")
        for img in soup.findAll('img', attrs={'data-original': True}):
            img['src'] = img['data-original'].replace('photo', 'thumb').replace('quality-100', 'quality-100,width-600,resizemode-4')
        return soup
    
calibre_most_common_ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36'