#!/usr/bin/env  python

__license__   = 'GPL v3'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
komersant.ru

In order to use this recipe properly on Sony reader you need to do following:
1. On the device's internal memory create folder called FONT
2. Copy to that folder any serif true-type font and name it serif0.ttf

My recommendation is to use GPL set of fonts called Liberation.
You can get them from here:
https://fedorahosted.org/releases/l/i/liberation-fonts/

For this and other recipes you should copy LiberationSerif-Regular.ttf
'''

from calibre.web.feeds.news import BasicNewsRecipe

class Komersant(BasicNewsRecipe):
    title                 = 'Komersant'
    __author__            = 'Darko Miletic'
    description           = 'News from Sankt Petersburg and Russia'
    publisher             = 'Komersant'
    category              = 'news, politics, Russia'
    oldest_article        = 1
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    remove_javascript     = True
    encoding              = 'cp1251'
    language              = _('Russian')
    extra_css = '@font-face {font-family: "serif0";src:url(res:///Data/FONT/serif0.ttf)} @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{text-align: justify; font-family: serif0, serif1, serif} .article_description{font-family: serif0, serif1, serif}'
    
    html2lrf_options = [
                          '--comment'  , description
                        , '--category' , category
                        , '--publisher', publisher
                        , '--ignore-tables'
                        ]
    
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' 

    keep_only_tags     = [
                            dict(name='div', attrs={'id':'ctl00_ContentPlaceHolderStyle_PanelHeader'})
                           ,dict(name='span', attrs={'id':'ctl00_ContentPlaceHolderStyle_LabelText'})
                         ]
    
    remove_tags = [dict(name=['object','link','img'])]

    feeds       = [(u'Daily edition', u'http://www.kommersant.ru/RSS_Export/RU/daily.xml')]

    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="ru-RU"/>'
        soup.head.insert(0,mtag)    
        for item in soup.findAll(style=True):
            del item['style']
        for item in soup.findAll(font=True):
            del item['font']
        return soup
