from calibre.web.feeds.news import BasicNewsRecipe
from datetime import date

class DieZeitMagazineRecipe(BasicNewsRecipe):
    __license__  = 'GPL v3'
    __author__ = 'kwetal'
    language = 'de_DE'
    version = 1

    title = u'Die Zeit Magazine'
    publisher = u'Die Zeit'
    category = u'News, Germany'
    description = u'Weekly magazine from Germany'

    no_stylesheets = True
    remove_javascript = True

    keep_only_tags = []
    keep_only_tags.append(dict(name = 'div', attrs = {'id': 'main', 'class': 'article'}))

    remove_tags = []
    remove_tags.append(dict(name = 'div', attrs = {'class': 'inline link'}))
    remove_tags.append(dict(name = 'div', attrs = {'class': 'pagination block'}))
    remove_tags.append(dict(name = 'div', attrs = {'id': 'comments'}))

    cover_url = None

    extra_css = '''
                body{font-family:verdana,arial,helvetica,geneva,sans-serif;}
                h1.supertitle {font-size: small; color: #363636; margin-bottom: 0.1em;}
                h2.title {font-size: xx-large; font-weight: bold;}
                p.excerpt {font-size: large;}
                span.author, span.date {font-size: small; color: #666666;}
                p.copyright {font-size: x-small; color: #696969; font-style: italic; text-align: right}
                p.caption {font-size: small; color: #666666;}
                p.intertitle {font-size: large; font-weight: bold;}
                span.source {font-size: small; color: #696969;}
                '''

    conversion_options = {'comments': description, 'language': 'de',
                          'publisher': publisher}

    def parse_index(self):
        currentUrl = self.getCurrentUrl(date.today().year)
        soup = self.index_to_soup(currentUrl)
        answer = []

        for section in soup.findAll('li', attrs = {'class': 'archiveressort'}):
            articles = []
            for li in section.findNextSiblings('li'):
                if li.has_key('class') and li['class'] == 'archiveressort':
                    # Found all artricles in this section.
                    break

                title = self.tag_to_string(li.find('h4', attrs = {'class': 'title'}))
                url = self.tag_to_string(li.find('a', attrs = {'href': True})['href'])
                description = self.tag_to_string(li.find(lambda tag: tag.name == 'p' and len(tag.attrs) == 0))
                articles.append({'title': title, 'date': None, 'url': url, 'description': description})

            answer.append((self.tag_to_string(section), articles))

        return answer

    def print_version(self, url):
        return url + '?page=all&print=true'

    def preprocess_html(self, soup):
        for ul in soup.findAll('ul', attrs = {'class': 'tools'}):
            for li in ul.findAll('li'):
                li.name = 'span'
            ul.name = 'div'
        li.extract()

        return soup

    def getCurrentUrl(self, year):
        url = 'http://www.zeit.de/' + str(year) + '/index'
        soup = self.index_to_soup(url)
        issues = soup.findAll('a', attrs = {'class': 'issue'})
        if len(issues) == 0:
            # There is not yet an issue in this year; try the previous year.
            self.getCurrentUrl(year - 1)
        else:
            # Get the last issue on the page
            a = reversed(issues).next()
            if hasattr(a, 'img'):
                self.cover_url = a.img['src']
            h4 = a.findPreviousSibling('h4')
            if h4:
                self.title = self.title + ' ' + self.tag_to_string(h4)

            return a['href']
        