#!/usr/bin/env python2
import string, re
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from lxml import html
from urllib2 import urlopen
import datetime

class MadhyamamCust(BasicNewsRecipe):

    title       = 'Madhyamam'
    __author__  = 'Masoud A R'
    description = 'Daily news from the Madhyamam'
    timefmt = ' [%a, %d %b, %Y]'
    language = 'ml'
    remove_tags    = [dict(name='div', attrs={'class':'col-md-12 bx social-media'})     # social media
        , dict(name='div', attrs={'id':'zt_28354_1'})                                   # ads
        , dict(name='div', attrs={'id':'related_news'})                                 # related news
        , dict(name='div', attrs={'class':'col-lg-12 col-md-12 col-sm-12 widget ad-widget'})    # more ads
        , dict(name='div', attrs={'class':'col-lg-12 col-md-12 col-sm-12 nav-story'})           # Previous and Next
        , dict(name='div', attrs={'class':'col-md-3 col-lg-pull-9 left-panel'})         # Print and Tags
    ]
    auto_cleanup_keep = '//div[@class="col-lg-12 img img-main "]' # will keep all divs with id="article-image"   col-lg-12 img img-main 
    no_stylesheets = True

    indexURLs          = [
        ('Kerala', 'https://www.madhyamam.com/kerala', 'col-lg-12 col-md-12 col-sm-12 col-xs-8'),
    #    ('Kochi', 'https://www.madhyamam.com/kerala/local-news/kochi'),
    #    ('GCC', 'https://www.madhyamam.com/gulf'),
        ('Qatar', 'https://www.madhyamam.com/gulf-news/qatar', 'col-lg-12 col-md-12 col-sm-12 col-xs-10'),
    #    ('World', 'https://www.madhyamam.com/world'),
    #    ('Editorial', 'https://www.madhyamam.com/opinion/editorial'),
    #    ('Articles', 'https://www.madhyamam.com/opinion/articles'),
    ]

    def parse_index_URL(self, indexURL):
        feed_title = indexURL[0]
        articles = []
        tree = html.parse(urlopen(indexURL[1]))
        for indx, div in enumerate(tree.xpath("//div[@class='" + indexURL[2] + "']")):
            article = {}
            for divChild in div:
                if divChild.tag == "a":
                    article['title'] = divChild.text
                    article['url'] = divChild.get("href")
                    article['date'] = datetime.datetime.now()	# temporary - until this is worked out.
                    #article['date'] = self.getNewsDate('https://www.madhyamam.com/' + divChild.get("href"))
                    article['description'] = 'Hello World Description'
                    article['content'] = ''
            articles.append(article)
        return (feed_title, articles)

    def parse_index(self):
        ans = []
        for indexURL in self.indexURLs:
            ans.append(self.parse_index_URL(indexURL))
            
        return ans

    def print_version(self, url):
        url_split = url.split('/')
        return ('https://www.madhyamam.com/print/' + url_split[-1])