﻿#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai

from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2016, John Howell <jhowell@acm.org>'
__docformat__ = 'restructuredtext en'


import re
import urllib
import cookielib

from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.utils.date import parse_only_date

from calibre_plugins.overdrive_link.numbers import value_unit
from calibre_plugins.overdrive_link.formats import FORMAT_AUDIBLE_AUDIOBOOK
from calibre_plugins.overdrive_link.book import (LibraryBook, InfoBook)
from calibre_plugins.overdrive_link.library import SearchableLibrary
from calibre_plugins.overdrive_link.match import (normalize_author, normalize_title)
from calibre_plugins.overdrive_link.net import open_url
from calibre_plugins.overdrive_link.parseweb import (LibraryError, must_find, must_findAll, text_only, class_contains)

LANGUAGE_CODES = {
    'Danish': '9178182011',
    'Dutch': '9178185011',
    'English': '9178177011', 
    'French': '9178184011',
    'German': '9178183011',
    'Greek': '9178188011',
    'Italian': '9178180011',
    'Japanese': '9178187011',
    'Portuguese': '9178186011',
    'Russian': '9178179011',
    'Spanish': '9178178011',
    'Swedish': '9178181011',
    }
    

class Audible(SearchableLibrary):
    id = 'au'
    name = 'Audible'
    formats_supported = {FORMAT_AUDIBLE_AUDIOBOOK}
    
    
    @staticmethod    
    def validate_library_id(library_id, migrate=True, config=None):
        if library_id != "store":
            raise ValueError('Audible library id may only be "store", found: "%s"' % library_id)
            
        return library_id
        
         
    @staticmethod    
    def validate_book_id(book_id, library_id):
        if not re.match(r'^([0-9A-Za-z]{10})$', book_id):
            raise ValueError('Audible book id must be 10 alphanumberic characters: "%s"' % book_id)
             
        return book_id
        
    @staticmethod    
    def book_url(library_id, book_id):
        return 'http://www.audible.com/pd/%s' % book_id
        
    @staticmethod
    def supports_purchase(library_id):
        return True
        

    def __init__(self):
        self.cookiejar = cookielib.CookieJar()  # having cookies enabled causes Amazon to give more consistent results
        
    
    def find_books(self, books, search_author, search_title, keyword_search, find_recommendable):
        page_num = 1
        results_processed = 0
        RESULTS_PER_PAGE = 20
        MAX_RESULTS_ALLOWED = 500
        
        language_code = LANGUAGE_CODES.get(self.config.search_language)
        search_language = self.config.search_language if language_code else ''

        
        while (True):
            data = {}
            # http://www.audible.com/search?advsearchKeywords=&searchTitle=titlexx&searchAuthor=authorxx&
            #searchNarrator=&searchProvider=&field_subjectbin=&field_content_type-bin=&field_format-bin=&
            #field_publication_date=&field_runtime=&field_language=9178177011&x=59&y=3
            
            if search_author:
                data['searchAuthor'] = search_author
            
            if keyword_search:
                data['advsearchKeywords'] = search_title
            elif search_title:
                data['searchTitle'] = search_title
                
            if language_code:
                data['field_language'] = language_code
                
            if page_num > 1:
                # http://www.audible.com/search/ref=a_search_c8_2_srchPg?searchAuthor=john&field_language=9178177011&searchPage=2
                data['searchPage'] = unicode(page_num)
            
            response = open_url(self.log, 'http://www.audible.com/search?%s' % urllib.urlencode(data), cookiejar=self.cookiejar)
                    
            # Parse the html results for analysis
            soup = BeautifulSoup(response.data, convertEntities=BeautifulSoup.HTML_ENTITIES)
            soup_text = text_only(soup)
            
            if 'No results for the keyword.' in soup_text:
                break
                
            results_index = must_find(soup, 'span', attrs={'class':"adbl-results-index"})
            # 1 - 20 of 504 results
            
            first_result = int(text_only(results_index.find('span', attrs={'class':"adbl-results-from"})))
            total_results = int(text_only(results_index.find('span', attrs={'class':"adbl-results-total"})))
           
            total_pages = ((total_results - 1) // RESULTS_PER_PAGE) + 1  # floor division
            self.log.info('Response: page %d of %d. %d total results' % (page_num, total_pages, total_results))
            
            expected_first_result = ((page_num - 1) * RESULTS_PER_PAGE) + 1
            if first_result != expected_first_result:
                raise LibraryError('Unexpected first result %d instead of %d' % (first_result, expected_first_result))
                
            if total_results > MAX_RESULTS_ALLOWED:
                return True
                
            page_results = 0
                
            for book_elem in must_findAll(soup, 'li', attrs=class_contains('adbl-result-item')):
                book_id = None
                title = ''
                authors = []
                formats = set()
                
                title_elem = must_find(book_elem, 'div', attrs={'class':"adbl-prod-title"})
                title = normalize_title(text_only(title_elem))
                
                for s in must_find(title_elem, 'a')['href'].split('/'):
                    if s.startswith('ref='):
                        break
                        
                    book_id = s
                else:
                    raise LibraryError('Missing "ref=" in title href')
                    
                meta_elem = book_elem.find('div', attrs={'class':"adbl-prod-meta"})
                if meta_elem:
                    for li in meta_elem.findAll('li'):
                        label_an = li.find('span', attrs={'class':"adbl-label-an"})
                        if label_an and text_only(label_an) == 'By':
                            author_elem = must_find(li, 'span', attrs={'class':"adbl-prod-author"})
                            authors.append(normalize_author(text_only(author_elem)))
                            
                prod_type = text_only(must_find(book_elem, 'div', attrs=class_contains("adbl-prod-type")))
                
                if prod_type == "Audiobook":
                    formats.add(FORMAT_AUDIBLE_AUDIOBOOK)
                    
                lbook = LibraryBook(authors=authors, title=title, formats=formats,
                        language=search_language, purchasable=True, lib=self, book_id=book_id,
                        search_author=search_author)
                
                if not formats:
                    self.log.info('Ignoring %s: %s' % (prod_type, repr(lbook)))
                else:
                    self.log.info('Found: %s' % repr(lbook))
                    books.add(lbook)
                    
                results_processed += 1
                page_results += 1
                   
            if page_num < total_pages:
                expected_results = RESULTS_PER_PAGE
            else:
                expected_results = total_results - ((page_num - 1) * RESULTS_PER_PAGE)
            
            if page_results != expected_results:
                raise LibraryError('Expected %s but found %d' % (value_unit(expected_results,'result'), page_results))
                
            page_num += 1
                
        return False
        
        
    def get_book_info(self, book_id):
        response = open_url(self.log, self.book_url(self.library_id, book_id), cookiejar=self.cookiejar)
            
        # Parse page
        
        authors = []
        title = ''
        publisher = ''
        pubdate = None
        formats = {FORMAT_AUDIBLE_AUDIOBOOK}
        
        soup = BeautifulSoup(response.data, convertEntities=BeautifulSoup.HTML_ENTITIES)
        
        title_elem = soup.find('h1', attrs={'itemprop':'name'}) # class="adbl-prod-h1-title"
        if title_elem:
            title = normalize_title(text_only(title_elem))
            
        prod_data = soup.find('div', attrs={'class':'adbl-prod-data-column'})
        if prod_data:
            for li in prod_data.findAll('li'):
                prod_key,sep,prod_value = text_only(li).partition(':')
                prod_value = prod_value.strip()
                
                if prod_key == 'Written by':
                    authors.append(normalize_author(prod_value))
                    
                elif prod_key == 'Release Date':
                    pubdate = parse_only_date(prod_value, assume_utc=True)
                    
                elif prod_key == 'Publisher':
                    publisher = prod_value
                    
        return InfoBook(
            authors=authors, title=title, publisher=publisher, pubdate=pubdate, 
            formats=formats, lib=self, book_id=book_id)
