from calibre.web.feeds.news import BasicNewsRecipe, prefixed_classes

def absurl(url):
    if url.startswith('/'):
        url =  'https://www.spectator.co.uk' + url
    return url

class spectator(BasicNewsRecipe):
    title = 'Spectator Magazine'
    __author__ = 'unkn0wn'
    description = 'The Spectator was established in 1828, and is the best-written and most influential weekly in the English language.'
    language = 'en'
    no_stylesheets = True
    remove_attributes = ['height', 'width', 'style']
    ignore_duplicate_articles = {'url'}
    masthead_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/c/c7/The_Spectator_logo.svg/320px-The_Spectator_logo.svg.png'
    encoding = 'utf-8'
    remove_empty_feeds = True	
    resolve_internal_links = True
    
    extra_css = '''
        [class^="ContentPageFooterAuthor_author"] {font-size:small;}
        #fig-c {text-align:center; font-size:small;}
        blockquote, em {color:#404040;}
    '''
    
    keep_only_tags = [
        prefixed_classes(
            'ContentPageHeader_main ContentPageHero_container ContentPageBody_body__container__'
            ' ContentPageFooterAuthor_author__'),
        ]
    
    remove_tags = [
        dict(name=('aside', 'iframe')),
        prefixed_classes('ContentPageBody_measure__ ContentPageAuthor_author__pic')
    ]
    
    def preprocess_html(self, soup):
        h2 = soup.find('h2')
        if h2:
            h2.name = 'h4'
        for fc in soup.findAll('figcaption'):
            fc['id'] = 'fig-c'
        for fig in soup.findAll('figure'):
            for nos in fig.findAll('noscript'):
                nos.name = 'span'
        return soup

    def parse_index(self):
        soup = self.index_to_soup('https://www.spectator.co.uk/magazine')
        self.cover_url = soup.find(**prefixed_classes(
                                'MagazinePage_spectator-magazine__image-and-subsections__'
                                    )).img['src']
        issue = self.tag_to_string(soup.find(**prefixed_classes(
                                                'MagazinePage_spectator-magazine-issue__title__'
                                            ))).strip()
        self.timefmt = ' (' + issue + ') [' + self.tag_to_string(soup.find(**prefixed_classes
                                                    ('MagazinePage_spectator-magazine-issue__date__')
                                                    )).strip() + ']'
        self.log('Downloading Issue: ', self.timefmt)
        nav_div = soup.find('ul', **prefixed_classes('Tabs_spectator-table-of-contents__'))
        section_list = []

        for x in nav_div.findAll(['a']):
            section_list.append(
                (self.tag_to_string(x).strip(), absurl(x['href']))
                )
        feeds = []

        # For each section title, fetch the article urls
        for section in section_list:
            section_title = section[0]
            section_url = section[1]
            self.log(section_title, section_url)
            soup = self.index_to_soup(section_url)
            articles = self.articles_from_soup(soup)
            if articles:
                feeds.append((section_title, articles))   
        return feeds
    
    def articles_from_soup(self, soup):
        ans = []
        for div in soup.findAll('div', **prefixed_classes(
                                            'MagazineContent_spectator-magazine-content__article-card___'
                                                )):
            a = div.find('a', attrs={'href': lambda x: x and x.startswith(
                                                            ('/article/', '/illustration/')
                                                                            )})
            url = absurl(a['href'])
            title = self.tag_to_string(div.find('div', **prefixed_classes(
                                                        'ArticleCard_spectator-article-card__headline__'
                                                        ))).strip()
            teaser = div.find('p', **prefixed_classes('ArticleCard_spectator-article-card__media-teaser__'))
            desc = ''
            if teaser:
                desc = self.tag_to_string(teaser).strip()
            obj = div.find('object')
            if obj:
                desc = self.tag_to_string(obj).strip() + ' | ' + desc
            sec = div.findParent('div').find('a', attrs={'href': lambda x: x and x.startswith('/magazines/')})
            if sec:
                desc = self.tag_to_string(sec).strip() + ' | ' + desc
            
            self.log('\t', title, '\n\t', desc, '\n\t\t', url)
            ans.append({
                'title': title,
                'description':desc,
                'url': url})
        return ans