
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2013, Pr.BarnArt'
__docformat__ = 'restructuredtext en'

import socket, re, datetime
import urllib, os
from collections import OrderedDict
from threading import Thread
from lxml.html import fromstring, tostring



from calibre.ebooks.metadata.book.base import Metadata
from calibre.library.comments import sanitize_comments_html
from calibre.utils.cleantext import clean_ascii_chars


import calibre_plugins.MondoUrania.config as cfg

class Worker(Thread): # Get  details

    '''
    Get book details from MondoUrania book page in a separate thread
	related to MondoUrania
    '''
	name                    = 'Worker'
    description             = _('Get book details from MondoUrania book page in a separate thread')
    author                  = 'Pr. BarnArt'
    version                 = (0, 2, 3)
    minimum_calibre_version = (0, 8, 5)
	
	BASE_MondoUrania = 'http://www..MondoUrania.com/'
	

    def __init__(self, combo, result_queue, browser, log, relevance, plugin, timeout=20):	   
        Thread.__init__(self)
        self.daemon = True
		#distract from combo
       	self.url = combo[0]
		#self.rating = combo[1]
		self.result_queue =  result_queue
		self.log, self.timeout = log, timeout
        self.relevance, self.plugin = relevance, plugin
        self.browser = browser.clone_browser()
        self.cover_url = self.MondoUrania_id = self.isbn = self.serie = self.prefix = None
		
		self.serie_name=['urania',
		'urania rivista',
		'urania capolavori',  
		'urania classisi',
		'urania blu', 
		'urania millemondi', 
		'uraniargento',
		'urania savage', 
		'urania biblioteca', 
		'urania fantasy',
		'urania horror',
		'urania resident evil',
		'urania fumetti',
		'urania numeri speciali',
		'urania collezione',
		'urania le grandi saghe',
		'urania millemondi nuovo',
		'urania epix']
		
		self.serie_dir=['urania', 
		'rivista',
		'bis', 
		'urania%20classic',
		'blu',
		'millemondivs',
		'argento',
		'doc%20savage',
		'bibliotecaurania',
		'fantasynuovaserie',
		'horror',
		'resident%20evil',
		'fumetti',
		'speciali',
		'collezione',
		'grandi%20saghe',
		'millemondi',
		'epix']
		
		self.serie_prefix=['u',
		'rivi',
		'capo',
		'clas',
		'blu',
		'mmv',
		'a',
		'sav',
		'bib',
		'fan',
		'hor',
		'rev',
		'fum',
		'spec',
		'col',
		'sag',
		'mmn',
		'epix']
		

    def run(self):
        try:
		    self.get_details()
        except:
            self.log.exception('get_details failed for url: %r'%self.url)

    def get_details(self):
        try:
            self.log.info('MondoUrania url: %r'%self.url)
            raw = self.browser.open_novisit(self.url, timeout=self.timeout).read().strip()
        except Exception as e:
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                self.log.error('URL malformed: %r'%self.url)
                return
            attr = getattr(e, 'args', [None])
            attr = attr if attr else [None]
            if isinstance(attr[0], socket.timeout):
                msg = 'MondoUrania timed out. Try again later.'
                self.log.error(msg)
            else:
                msg = 'Failed to make details query: %r'%self.url
                self.log.exception(msg)
            return

        raw = raw.decode('windows-1252', errors='replace')
 

        if '<title>404 - ' in raw:
            self.log.error('URL malformed: %r'%self.url)
            return

		
		
        try:
            root = fromstring(clean_ascii_chars(raw))			
        except:
            msg = 'Failed to parse MondoUrania.com details page: %r'%self.url
            self.log.exception(msg)
            return
			
		
		
        self.parse_details(root,raw)

    def parse_details(self, root,raw):      
		try:
            MondoUrania_id = self.parse_MondoUrania_id(self.url)
        except:
            self.log.exception('Error parsing MondoUrania id for url: %r'%self.url)
            MondoUrania_id = None
		try:
            title = self.parse_title(root)
			series = None
			series_index = None
        except:
            self.log.exception('Error parsing title for url: %r'%self.url)
            title = None
        try:
            authors = self.parse_authors(root)
        except:
            self.log.exception('Error parsing authors for url: %r'%self.url)
            authors = []        	
        if not title or not authors:
		    self.log.error('Could not find title/authors/MondoUrania_id for %r'%self.url)
            self.log.error('MondoUrania_id: %r Title: %r Authors: %r'%(MondoUrania_id, title,
                authors))
		
            return
		try:
			series=self.serie
			if  MondoUrania_id:
				gr=re.search('(\d+$)',MondoUrania_id)
				if gr:
					series_index=gr.groups(0)[0]
        except:
            self.log.exception('Error parsing serienumbert for url: %r'%self.url)
            series_index = None		
			
        mi = Metadata(title, authors)
		
		if series:
            mi.series = series
            mi.series_index = series_index
		mi.set_identifier('mondourania', MondoUrania_id)
        self.MondoUrania_id=MondoUrania_id
               		
        try:
            mi.comments = self.parse_comments(root)
        except:
            self.log.exception('Error parsing comments for url: %r'%self.url)

        try:
            self.cover_url = self.parse_cover(root,title,authors)
		except:
            self.log.exception('Error parsing cover for url: %r'%self.url)
        mi.has_cover = bool(self.cover_url)

        try:
            mi.pubdate = self.parse_published_date(root)
        except:
            self.log.exception('Error parsing published date for url: %r'%self.url)

		try:
			serienr=self.parse_serienr(root)
		except:
			 self.log.exception('Error parsing serienumber for url: %r'%self.url)
			
        mi.source_relevance = self.relevance

		if self.MondoUrania_id:
            if self.isbn:
                self.plugin.cache_isbn_to_identifier(self.isbn, self.MondoUrania_id)
            if self.cover_url:
                self.plugin.cache_identifier_to_cover_url(self.MondoUrania_id,
                        self.cover_url)
        self.plugin.clean_downloaded_metadata(mi)
        self.result_queue.put(mi)
		
	def parse_MondoUrania_id(self, url):
		dir=re.search('mondourania.com/([a-zA-Z%20]*)', url).groups(0)[0]
		for (i, sd) in enumerate(self.serie_dir):
			#look for the serie directory and create seriename
			#the serie_directoryname+extension 
			if dir==sd:
				self.serie= self.serie_name[i]
				self.prefix=self.serie_prefix[i]
				if self.serie=='urania blu':
					dir='urania%20blu%20'
				break;
		pattern='/%s([a-z]+)'%(dir)
		ext=re.search(pattern,url)
		if  ext is None:
		   pattern='/%s(\d+)'%(dir)
		else:
			pattern= '/%s%s(\d+)'%(dir,ext.groups(0)[0])
        return ('%s-%s'%(self.prefix,re.search(pattern, url).groups(0)[0]))

   	def parse_title(self,root):
	   title_node = root.xpath('//tbody/tr/td[3]/div/font/b')
 	   if title_node:
			title=title_node[0].text_content().strip()
			title=title.replace('\'','')
			return title
	
    def parse_authors(self, root):
		auteur_node = root.xpath('//tbody/tr[3]/td[2]/div/p/font')
		authors = []
		if auteur_node:
			authors.append(auteur_node[0].text_content().strip())
			return authors	
			   
	def parse_serienr(self,root):
		serie_node = root.xpath('//tbody/tr/td[2]/div/font/b/font')
		#Seriename='Urania'
		serie_index = None
		if serie_node:
			serienr=serie_node[0].text_content().strip()
			serie_index= int(serienr)
		return serie_index
		
	def parse_published_date(self, root):
		pub_date = None  
		date_node=root.xpath('//tbody/tr[2]/td/div/font')
		if date_node:
			pub_date=date_node[0].text_content().strip()
		if not(pub_date is None):
			return  self._convert_date_text(pub_date) 

    def _convert_date_text(self, date_text):
        # presuming only dd//mm/yyyy or part of that"
		year= 2013
		month = 1
        day = 1
        if len(date_text) > 4:
			# more then just a year
            text_parts = date_text.split('/')
			if len(text_parts)==3:
				year = int(text_parts[2])
				month = int(text_parts[1])
				day=int(text_parts[0])
			if len(text_parts)==2:
				year = int(text_parts[1])
				month = int(text_parts[0])
		else:
			if len(date_text) == 4:
				year=int(date_text)
		from calibre.utils.date import utc_tz
        return datetime.datetime(year, month, day, tzinfo=utc_tz)

    def parse_comments(self, root):
        comments = ''
		#/html/body/table/tbody/tr[5]/td/div/p/font
		description_node=root.xpath('//tbody/tr[5]/td/div')
		if description_node:
			comments = description_node[0].text_content().strip()
			comments = comments.replace('\r\n','')
			while '  ' in comments:
                comments = comments.replace('  ',' ')	
			comments= comments + '<br>(source: MondoUrania.com)<br>'
		if comments:
			return comments
			
        

    def parse_cover(self, root,title,authors):
		cover_node=root.xpath('//tbody/tr/td/img/@src')
		if cover_node:
		    name=cover_node[0]
			urli= self.url.rpartition('/')[0].strip()
			img_url='%s/%s'%(urli,name);
			return img_url		 
			
