from calibre.web.feeds.news import BasicNewsRecipe, classes
from collections import OrderedDict

class Sportstar(BasicNewsRecipe):
    title = u'Sportstar'
    __author__ = 'unkn0wn'
    description = (
        'Sportstar began as a Print Only multi-sport weekly on July 15, 1978.'
        ' In 2018 the periodicity of Sportstar was made fortnightly with a lot of in-depth articles coming into the mix.'
        ' Our readers have been the fount of inspiration in our attempts at exploring new angles in sports journalism.')
    language = 'en_IN'
    no_stylesheets = True
    remove_javascript = True
    use_embedded_content = False
    encoding = 'utf-8'
    ignore_duplicate_articles = {'url'}
    resolve_internal_links = True
    masthead_url = 'https://sportstar.thehindu.com/theme/images/ss-online/sslogo.png'
    remove_attributes = ['height', 'width']
    extra_css = '''
        .text-secondary{font-style:italic; color:#404040;}
        .lead-img-caption, .caption-cont{font-size:small; text-align:center;}
        .auth-name, .datelinew {font-size:small;}
    '''

    keep_only_tags = [
        dict(name='h1'),
        dict(name='h2', attrs={'class':'text-secondary'}),
        classes('lead-img-cont auth-name datelinew art-content')
    ]
    
    remove_tags = [
        classes('mbot capsletter article-body1')
    ]
    
    def parse_index(self):
        soup = self.index_to_soup('https://sportstar.thehindu.com/magazine/')
        url = soup.find('a', href=lambda x: x and x.startswith('https://sportstar.thehindu.com/magazine/issue/'))['href']
        self.log('Downloading Issue: ', url)
        soup = self.index_to_soup(url)
        
        feeds = OrderedDict()
        
        info = soup.find('div', attrs={'class':'sptar-cover-item'})
        self.cover_url = info.find('div', attrs={'class':'card'}
                                    ).find('img')['data-original'].replace('FREE_320', 'FREE_1200')
        data = info.find('div', attrs={'class':'cover-content'})
        self.timefmt = ' (' + self.tag_to_string(data.h3).strip() + ') [' +\
             self.tag_to_string(data.find('span', attrs={'class':'date'})) + ']'
        self.description = self.tag_to_string(data.p).strip()
        
        for content in soup.findAll('div', attrs={'class':'brief-cnt-wrap'}):
            articles = []
            h4 = content.find('h4', attrs={'class':'brief-title'})
            a = h4.find('a', href=True)
            url = a['href']
            title = self.tag_to_string(a).strip()
            desc = self.tag_to_string(content.find('div', attrs={'class':'artbody'})).strip()
            section_title = self.tag_to_string(content.find('span', attrs={'class':'brief-place'})).strip()
            self.log(section_title)
            self.log('\t', title)
            self.log('\t', desc)
            self.log('\t\t', url)
            articles.append({
                'title': title,
                'url': url,
                'description': desc})
                
            if articles:
                if section_title not in feeds:
                    feeds[section_title] = []
                feeds[section_title] += articles
        ans = [(key, val) for key, val in feeds.items()]
        return ans

    def preprocess_html(self, soup):
        h2 = soup.findAll(**classes('text-secondary'))
        if h2[0]:
            h2[0].name = 'p'
        if h2[1]:
            h2[1].extract()
        for img in soup.findAll('img', attrs={'data-original':True}):
            if img['data-original'].endswith('1x1_spacer.png'):
                source = img.findPrevious('source', srcset=True)
                img.extract()
                if source:
                    source['src'] = source['srcset']
                    source.name = 'img'
            else:
                img['src'] = img['data-original']
                
        for cap in soup.findAll('div', attrs={'class':'caption-cont'}):
            h4 = cap.find('h4')
            if h4:
                h4.name='figcaption'
        return soup
    
    def postprocess_html(self, soup, first_fetch):
        for src in soup.findAll('source'):
            src.extract()
        return soup


calibre_most_common_ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36'