﻿#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai

from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2016, John Howell <jhowell@acm.org>'
__docformat__ = 'restructuredtext en'

import re
import cookielib
import urllib
import json
import urlparse
import math
import os
import traceback
import urllib2
from collections import defaultdict

from calibre.constants import config_dir
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.utils.date import parse_only_date
from calibre.utils.config import JSONConfig
from calibre.utils.config_base import tweaks

from calibre_plugins.overdrive_link.json import js_value
from calibre_plugins.overdrive_link.recipe_81611_1 import roman_to_int
from calibre_plugins.overdrive_link.numbers import (value_unit, numeric_rank)
from calibre_plugins.overdrive_link.book import (LibraryBook, InfoBook)
from calibre_plugins.overdrive_link.formats import (FORMAT_ADOBE_EPUB, FORMAT_ADOBE_PDF,
    FORMAT_KINDLE_BOOK, FORMAT_MOBI_EBOOK, FORMAT_OD_MP3, 
    FORMAT_OD_MUSIC, FORMAT_OD_READ, FORMAT_NOOK_PERIODICALS,
    FORMAT_OD_WMA, FORMAT_OD_VIDEO, FORMAT_OD_VIDEO_MOBILE, FORMAT_OPEN_EPUB, FORMAT_OPEN_PDF, 
    FORMAT_STREAMING_VIDEO, FORMAT_MEDIADO_READER, FORMAT_OD_LISTEN)
from calibre_plugins.overdrive_link.language import LANGUAGE_CODE2
from calibre_plugins.overdrive_link.library import (SearchableLibrary, LendingLibrary)
from calibre_plugins.overdrive_link.log import html_to_text
from calibre_plugins.overdrive_link.net import (open_url, hostname_from_url, isValidHostname)
from calibre_plugins.overdrive_link.match import (normalize_author, normalize_title, Object)
from calibre_plugins.overdrive_link.overdrive_legacy import OverDriveLegacy
from calibre_plugins.overdrive_link.overdrive_sites import known_new_overdrive_sites
from calibre_plugins.overdrive_link.parseweb import (
    LibraryError, must_find, must_findAll, text_only, class_contains, class_contains_all, strip_html)
from calibre_plugins.overdrive_link.tweak import (TWEAK_TRACK_NEEDED_BOOKS, TWEAK_OD_FIRST_ID, TWEAK_OD_LAST_ID)

GENERIC_SEARCH_SITE = 'search.overdrive.com'

OVERDRIVE_SIGNIN_BRANCH_ID = 'overdrive'

OVERDRIVE_BOOKS_FILE = 'overdrive_books.json'


OVERDRIVE_FORMAT_NAMES = {    
    # format names used at OverDrive libraries
    'EPUB eBook': FORMAT_ADOBE_EPUB,                    # ebook-epub-adobe
    'Kindle Book': FORMAT_KINDLE_BOOK,                  # ebook-kindle
    'MediaDo eBook': FORMAT_MEDIADO_READER,             # ebook-media-do
    'MP3 audiobook': FORMAT_OD_MP3,                     # audiobook-mp3
    'NOOK Periodical': FORMAT_NOOK_PERIODICALS,         # periodicals-nook
    'Open EPUB eBook': FORMAT_OPEN_EPUB,                # ebook-epub-open
    'Open PDF eBook': FORMAT_OPEN_PDF,                  # ebook-pdf-open
    'OverDrive Listen audiobook': FORMAT_OD_LISTEN,     # audiobook-overdrive
    'OverDrive Read': FORMAT_OD_READ,                   # ebook-overdrive
    'PDF eBook': FORMAT_ADOBE_PDF,                      # ebook-pdf-adobe
    'Streaming video': FORMAT_STREAMING_VIDEO,          # video-streaming
    
    # unused?
    'Downloadable Music': FORMAT_OD_MUSIC,
    'Digital Music': FORMAT_OD_MUSIC,
    'Downloadable Video': FORMAT_OD_VIDEO,
    'Digital Video': FORMAT_OD_VIDEO,
    
    }
    
#discovered_new_overdrive_sites = {}

#--------------------------------
# using a json file to keep track of which libraries have been upgraded is not the safest approach due
# to file contention, but it is only temporary.

ConfigStoreLocation = 'plugins/Overdrive Link Temp'
NewOverDriveLibraries = 'NewOverDriveLibraries'

new_overdrive_sites = None
new_book_id_from_legacy = None


def is_new_overdrive_site(library_id):
    global new_overdrive_sites

    #print('is_new_site %s' % library_id)
    
    if library_id in known_new_overdrive_sites:
        return True
        
    if new_overdrive_sites is None:
        #print('load persistent NewOverDriveLibraries')
        
        try:
            persistent = JSONConfig(ConfigStoreLocation)    # load at first need
            persistent.defaults[NewOverDriveLibraries] = []
            new_overdrive_sites = set(persistent[NewOverDriveLibraries])
        except:
            traceback.print_exc()
            return True
        
    return library_id in new_overdrive_sites
    

def set_new_overdrive_site(library_id, is_new):
    global new_overdrive_sites
    
    if bool(is_new_overdrive_site(library_id)) != bool(is_new):
        if is_new: new_overdrive_sites.add(library_id)
        else: new_overdrive_sites.remove(library_id)
        
        #print('save persistent NewOverDriveLibraries')
        
        try:
            persistent = JSONConfig(ConfigStoreLocation)
            persistent[NewOverDriveLibraries] = sorted(list(new_overdrive_sites))   # save on change
        except:
            traceback.print_exc()
        
#--------------------------------

        
def fix_title(odtitle, subtitle, edition=''):
    # Subtitle is sometimes just a subtitle and sometimes has series information. Try to guess which.
    title = odtitle
    series = ''
    series_index = 0.0
    
    if subtitle and (subtitle != 'A Novel'):
        series_str, sep, series_index_str = subtitle.partition(' Series, Book ')
        if not sep: series_str, sep, series_index_str = subtitle.partition(' Series, Volume ')
        if not sep: series_str, sep, series_index_str = subtitle.partition(' Series, Vol. ')
        if not sep: series_str, sep, series_index_str = subtitle.partition(', Book ')
        if not sep: series_str, sep, series_index_str = subtitle.partition(', Volume ')
        if not sep: series_str, sep, series_index_str = subtitle.partition(', Vol. ')
        
        if sep:
            # Subtitle is actually series info
            series = series_str
            try:
                series_index = float(series_index_str.strip())  # Check for integer
            except ValueError:
                try:
                    series_index = roman_to_int(series_index_str.strip().upper())   # Check for Roman numeral
                except ValueError:
                    series_index = 0.0

        # Some book series all have the same title and need book/volume information added to be made unique
        # Note: re.match is anchored at the beginning of the string, re.search is not
        if (re.match('^[0-9]+ Edition$', subtitle) or re.match('^Volume [0-9IVXLCDM]+$', subtitle) or 
                re.match('^Vol. [0-9IVXLCDM]+$', subtitle) or re.search(' Annual Collection$', subtitle) or 
                (re.search(' Collected Stories of ', odtitle) and re.search('Volume [0-9IVXLCDM]+$', subtitle)) or 
                odtitle == 'Kurt Vonnegut' or        # Letters -and- The Last Interview and Other Conversations
                odtitle == 'Fantasy' or odtitle == 'Science Fiction'): # The Best of the Year (Rich Horton)
            title = '%s, %s'%(odtitle, subtitle)
        elif not series:
            title = '%s: %s'%(odtitle, subtitle)
        
    if edition:
        edition = re.sub(r'[()]', '', edition)
        edition = re.sub(r' Edition$', '', edition)
        
        if re.match(r'^[0-9]+$', edition):
            edition_num = int(edition)
            if edition_num >= 1 and edition_num <= 99:
                edition = numeric_rank(edition_num)     # 1-> 1st, 2 -> 2nd, etc.
        
        #  DGO = Digital original = digital exclusive
        if edition not in ['1st', '2nd', 'Abridged', 'Unabridged', 'Unabridged Selections', 'ebook', 'Illustrated', 
                            'Hemingway Library', 'DGO']:
            split_edition = edition.split()
            if len(split_edition) >= 2 and split_edition[1].lower() == 'anniversary':
                title = '%s (%s Anniversary Edition)'%(title, split_edition[0])
            else:
                title = '%s, %s Edition'%(title, edition)
                
    return (title, series, series_index)    

    


class OverDrive(SearchableLibrary):

    id = ''
    name = 'OverDrive'
    
    ebook_formats_supported = {FORMAT_ADOBE_EPUB, FORMAT_OPEN_EPUB, FORMAT_OPEN_PDF,
        FORMAT_KINDLE_BOOK, FORMAT_MOBI_EBOOK, FORMAT_ADOBE_PDF, FORMAT_OD_READ,
        FORMAT_MEDIADO_READER}
    audiobook_formats_supported = {FORMAT_OD_LISTEN, FORMAT_OD_MP3, FORMAT_OD_WMA}
    video_formats_supported = {FORMAT_OD_VIDEO, FORMAT_OD_VIDEO_MOBILE, FORMAT_STREAMING_VIDEO}
    music_formats_supported = {FORMAT_OD_MUSIC, }
    periodical_formats_supported = {FORMAT_NOOK_PERIODICALS, }
    
    formats_supported = (ebook_formats_supported | audiobook_formats_supported |
                         video_formats_supported | music_formats_supported |
                         periodical_formats_supported)
                         
    supports_recommendation = True              # detected per library
    sign_in_affects_check_obtainable = True     # sign in needed to detect holds and Advantage titles

    
    @staticmethod
    def validate_library_id(library_id, migrate=True, config=None):
        orig_library_id = library_id
        
        if (':' in library_id) or ('/' in library_id):
            library_id = hostname_from_url(library_id)
        
        library_id = library_id.lower()
        
        if (not isValidHostname(library_id)) or (library_id == GENERIC_SEARCH_SITE):
            raise ValueError('OverDrive library hostname invalid: "%s"' % orig_library_id)
                
        if '.' not in library_id:
            raise ValueError('OverDrive library hostname must contain a period: "%s"' % orig_library_id)
          
        # detect cases where an alternate form of the same library id has already been configured and use it if so
        if (config is not None) and (config.library(OverDrive.id, library_id) is None):
            if '.lib.overdrive.' in library_id:
                alt_library_id = library_id.replace('.lib.overdrive.', '.overdrive.')
            elif '.overdrive.' in library_id:
                alt_library_id = library_id.replace('.overdrive.', '.lib.overdrive.')
            elif '.lib.libraryreserve.' in library_id:
                alt_library_id = library_id.replace('.lib.libraryreserve.', '.libraryreserve.')
            elif '.libraryreserve.' in library_id:
                alt_library_id = library_id.replace('.libraryreserve.', '.lib.libraryreserve.')
            else:
                alt_library_id = None
                
            if (alt_library_id is not None) and (config.library(OverDrive.id, alt_library_id) is not None):
                library_id = alt_library_id
                
        return library_id
         
        
    @staticmethod        
    def validate_branch_id(branch_id):
        branch_id = branch_id.lower()
        
        if not (branch_id in ['', OVERDRIVE_SIGNIN_BRANCH_ID] or re.match(r'^[0-9]+$', branch_id)):
            raise ValueError('OverDrive branch id must be a numeric or "%s": "%s"' % (OVERDRIVE_SIGNIN_BRANCH_ID, branch_id))
            
        return branch_id


    @staticmethod
    def validate_book_id(book_id, library_id):
        if not (re.match(r'^[0-9]+$', book_id) or
                re.match(r'^([0-9a-f]{8})\-([0-9a-f]{4})\-([0-9a-f]{4})\-([0-9a-f]{4})\-([0-9a-f]{12})$', book_id)):
            raise ValueError('OverDrive book id must be a numeric or a UUID: "%s"' % book_id)
            
        # temporary migration of old style OverDrive book ids
        '''
        if '-' in book_id and is_new_overdrive_site(library_id):
            global new_book_id_from_legacy
            if new_book_id_from_legacy is None:
                try:
                    # table is large so import only as needed
                    json_filename = os.path.join(os.path.join(config_dir, 'plugins'), OVERDRIVE_BOOKS_FILE)
                    with open(json_filename, 'rb') as of:
                        new_book_id_from_legacy = json.loads(of.read())
                except:
                    pass    # ignore missing translation table
                
            if new_book_id_from_legacy is not None:
                if book_id in new_book_id_from_legacy:
                    book_id = unicode(new_book_id_from_legacy[book_id])             # translate to new id, if known
                elif tweaks.get(TWEAK_OD_LAST_ID, False):
                    print("**** no new id for %s (%s)" % (book_id, library_id))     # TEMP for debug???
            '''
            
        return book_id
        
        
    @staticmethod
    def book_url(library_id, book_id):
        # temporary check to determine version of an OverDrive library site
        
        if len(book_id) <= 10:
            set_new_overdrive_site(library_id, True)
            
        if is_new_overdrive_site(library_id):
            return OverDrive.new_book_url(library_id, book_id)
        
        return OverDriveLegacy.book_url(library_id, book_id)
    
        
    @staticmethod
    def new_book_url(library_id, book_id):
        return 'https://%s/media/%s' % (library_id, book_id.upper())    # works for old or new style ID at new style library
    
        
    def __init__(self):
        self.recommendation_allowed = False
        self.signin_required = False
        self.signed_in = False
        self.legacy = None
        self.ready_for_search = False
        self.cookiejar = cookielib.CookieJar()
        
        self.holds_checked = False
        self.holds = {}
        
        # translate formats supported by this library to standard format names
        self.library_formats_names = {}
        for f in self.formats_supported:
            self.library_formats_names[f.lower()] = f
        for f,v in OVERDRIVE_FORMAT_NAMES.items():
            self.library_formats_names[f.lower()] = v
            self.library_formats_names[f.lower() + 's'] = v
        

    def sign_in(self, use_credentials, allow_redirect=False):
        tried_ids = set()
        
        while True:
            if self.library_id in tried_ids:
                raise LibraryError('Incorrect library id in configuration! '
                        'Host name %s is redirected to itself. Configuration update required.' % self.library_id)
                        
            tried_ids.add(self.library_id)
            
            response = open_url(self.log, 'http://%s' % self.library_id, cookiejar=self.cookiejar)
            soup = BeautifulSoup(response.data, convertEntities=BeautifulSoup.HTML_ENTITIES)
            
            redirect_url = response.geturl()
            redirect_host = hostname_from_url(redirect_url).lower()
            
            # Make sure that the host name entered by the user matches the host name returned.

            if redirect_host == GENERIC_SEARCH_SITE:
                raise LibraryError('Non-working library id in configuration! '
                        'Host name %s is redirected to the generic Overdrive site (%s).' % (self.library_id, redirect_host))
                        
            if redirect_url.startswith('https:') and redirect_host.endswith('libraryreserve.com'):
                self.log.info('Cannot determine site layout. Library requires sign-in before searching.')
                break
                        
            if redirect_host != self.library_id:
                self.log.error('Incorrect library id in configuration! '
                        'Host name %s is redirected to %s. Configuration change required. See Help for details.' % (
                            self.library_id, redirect_host))
                
                if not allow_redirect:
                    return
                    
                self.library_id = redirect_host
                continue
                
            # temporary check needed until all OverDrive sites have been upgraded to new format
            if soup.find('section', id='mainContent'):
                self.log.info('legacy OverDrive site layout detected for %s' % self.name)
                set_new_overdrive_site(self.library_id, False)
                
                '''
                thunder_pop = soup.find('div', id='thunderPop')
                if thunder_pop:
                    link = thunder_pop.find('a', attrs={'href': re.compile(r'^https://')})
                    if link:
                        # Pop up message link to new style website
                        self.log.info('ThunderPop: "%s": %s' % (link['href'], text_only(thunder_pop)))
                        discovered_new_overdrive_sites[link['href']] = self.name
                '''
                
                # create an old-style library instance to use
                self.legacy = OverDriveLegacy()
                self.legacy.enabled = self.enabled
                self.legacy.provider = OverDriveLegacy
                self.legacy.provider_id = self.provider_id
                self.legacy.library_id = self.library_id
                self.legacy.branch_id = self.branch_id
                self.legacy.name = self.name
                self.legacy.card_number = self.card_number
                self.legacy.card_pin = self.card_pin
                self.legacy.log = self.log
                self.legacy.config = self.config
                    
                self.legacy.signin_required = False
                self.legacy.signed_in = False
                self.legacy.did_sign_in = False
        
                self.legacy.sign_in(use_credentials)
                self.legacy.did_sign_in = True
                self.ready_for_search = self.legacy.ready_for_search
                self.recommendation_allowed = self.legacy.recommendation_allowed
                return
                
            toaster = soup.find('div', attrs={'class':'toaster'})
            if not toaster:
                raise LibraryError('Unknown OverDrive library website layout. Cannot access.')
                
            set_new_overdrive_site(self.library_id, True)
            
            if tweaks.get(TWEAK_TRACK_NEEDED_BOOKS, False):
                self.log.warn('new OverDrive site layout detected for %s' % self.name)
            else:
                self.log.info('new OverDrive site layout detected for %s' % self.name)
                
            break
            
        if self.card_number and use_credentials:
            self.signin_required = True
            
            response = open_url(self.log, 'https://%s/account/sign-in' % self.library_id, cookiejar=self.cookiejar)
            login_url = response.geturl()
            
            # Library card, OverDrive account or Facebook sign in
            
            soup = BeautifulSoup(response.data, convertEntities=BeautifulSoup.HTML_ENTITIES)
            
            model_script = must_find(soup, 'script', attrs={'id': 'model'})
            model = js_value(self.log, html_to_text(model_script.string), "")
            
            if self.branch_id == OVERDRIVE_SIGNIN_BRANCH_ID:
                # OverDrive authentication
                self.log.info('Signing in to OverDrive for %s' % self.name)
                
                data = {}
                data[model["tokenName"]] = model["tokenValue"]
                data["username"] = self.card_number
                data["password"] = self.card_pin
                
                response = open_url(self.log, login_url, urllib.urlencode(data), cookiejar=self.cookiejar)
                
            else:
                # Library authentication
                response = open_url(self.log, model["libraryAuthUrl"], cookiejar=self.cookiejar)
                login_url = response.geturl()
                
                soup = BeautifulSoup(response.data, convertEntities=BeautifulSoup.HTML_ENTITIES)
                
                model_script = must_find(soup, 'script', attrs={'id': 'model'})
                model2 = js_value(self.log, html_to_text(model_script.string), "")
                
                login_forms = model2["loginForms"]
                
                if model2["displayLoginFormSelector"]:
                    # branch selection required
                    
                    for login_form in login_forms:
                        if unicode(login_form["id"]) == self.branch_id:
                            self.log.info("Signing in to %s branch ID %s (%s)" % (self.name, self.branch_id, login_form["displayName"]))
                            break
                    else:
                        self.log.warn('It appears that a branch ID is required for %s. Configure this library'
                                ' in the plugin with the appropriate branch ID:' % self.library_id)
                                
                        for login_form in login_forms:
                            self.log.info('%s = %d' % (login_form["displayName"], login_form["id"]))
                         
                        if self.branch_id:
                            raise LibraryError("Failed to find branch ID %s - View log for a list of valid IDs" % self.branch_id)

                        raise LibraryError('Branch ID required for sign in - View log for a list of valid IDs')
                        
                elif len(login_forms) != 1:
                    raise LibraryError('Single OverDrive site has %d login forms' % len(login_forms))
                        
                else:        
                    login_form = login_forms[0]     # single branch
                    self.log.info('Signing in to %s (%s)' % (self.name, login_form["displayName"]))
                    
  
                if login_form["isUnsupported"]:
                    raise LibraryError('Branch "%s" is unsupported' % login_form["displayName"])
                
                if login_form["isRedirect"]:
                    raise LibraryError('Branch "%s" is redrect' % login_form["displayName"])
                    
                # model2["cardAcquisitionUrl"] ???
                
                data = {}
                data[model2["tokenName"]] = model2["tokenValue"]
                data["websiteId"] = login_form["id"]
                data["ilsName"] = login_form["ilsName"]
                data["username"] = self.card_number
                if login_form["requiresPin"]: data["password"] = self.card_pin
                
                login_url = urlparse.urljoin(login_url, model2["loginUrl"])
                response = open_url(self.log, login_url, urllib.urlencode(data), cookiejar=self.cookiejar)
                
                 
            login_url = response.geturl()
            page = response.data
            soup = BeautifulSoup(page, convertEntities=BeautifulSoup.HTML_ENTITIES)
            
            model_script = soup.find('script', attrs={'id': 'model'})
            if model_script:
                model3 = js_value(self.log, html_to_text(model_script.string), "")
                if "errorMessage" in model3 or "libraryLoginErrorText" in model3:
                    self.log.error('Sign in error: %s: %s' % (model3.get("libraryLoginErrorText", ""), model3.get("errorMessage", "")))
                    return
                    
            if "/login?" in login_url:
                self.log.info('URL after sign in: %s' % login_url)
                self.log.error('Sign in failed. Check card number and PIN.')
                return
            
            if not('Log Out' in page or 'Sign Out' in page): 
                # unknown problem
                self.log.info('Sign in failure: %s' % text_only(soup))
                self.log.error('Sign in unsuccessful.')
                return
            
            self.log.info('Sign in successful')
            self.signed_in = True
            
        elif ('/SignIn.htm' in redirect_url) or ('/SignIn2.htm' in redirect_url):
            self.signin_required = True
            raise LibraryError('Library requires sign in to perform searches.')
            
            
        # check advanced search form for options at this library
        response = open_url(self.log, 'https://%s/advanced-search' % self.library_id, cookiejar=self.cookiejar)
        soup = BeautifulSoup(response.data, convertEntities=BeautifulSoup.HTML_ENTITIES)
        search_form = must_find(soup, 'form', attrs={'id': 'advanced-search-form'})
        availability = must_find(search_form, 'select', attrs={'name': 'availability'})
        if availability.find('option', attrs={'data-parameter-name': 'showAlsoRecommendable'}):
            self.log.info('Search for recommendable books is supported by %s' % self.name)
            self.recommendation_allowed = True
            
        self.ready_for_search = True
        
    

    def find_books(self, books, search_author, search_title, keyword_search, find_recommendable):
        if self.legacy:
            return self.legacy.find_books(books, search_author, search_title, keyword_search, find_recommendable)
            
        if not self.ready_for_search:
            return False

        if len(search_author) < 4 and not search_title:
            return False    # very short author names return bad results
            
        if keyword_search and find_recommendable:
            return False    # combination not supported for OverDrive
                
        RESULTS_PER_PAGE = 24
        MAX_RESULTS_ALLOWED = 500

        page_num = 1
        total_pages = 1
        total_results = 0
        results_processed = 0
        
        while (page_num <= total_pages):
            data = {}
            
            # These require ' ' to be replaced by '+' in urlencode for proper operation
            if search_title: data['q'] = ' '.join(search_title.lower().split())                 # defaults to 'AND'
            if search_author: data['creator'] = ' AND '.join(search_author.lower().split())     # defaults to 'OR'
            
            if LANGUAGE_CODE2.get(self.config.search_language): data['language'] = LANGUAGE_CODE2[self.config.search_language]
            
            if len((self.config.search_formats - self.ebook_formats_supported) & self.formats_supported) == 0:
                data['mediaType'] = 'ebook'         # only e-books
            elif len((self.config.search_formats - self.audiobook_formats_supported) & self.formats_supported) == 0:
                data['mediaType'] = 'audiobook'     # only audiobooks
                
            # showOnlyAvailable=true
            # showOnlyPrerelease=true
            
            if find_recommendable:
                data['showAlsoRecommendable'] = 'true'
                
            data['sortBy'] = 'relevancy'
            
            if page_num != 1: data['page'] = unicode(page_num)
                
            response = open_url(self.log, 'https://%s/search?%s' % (self.library_id, urllib.urlencode(data)),
                            cookiejar=self.cookiejar)
                
            soup = BeautifulSoup(response.data, convertEntities=BeautifulSoup.HTML_ENTITIES)
            
            error_container = soup.find('div', attrs=class_contains('error-container'))
            if error_container:
                raise LibraryError(text_only(error_container))
        
            # <h2 class="header-small text-center" id="noresults" tabindex="0">We couldn't find any matches for ...</h2>
            no_results_h2 = soup.find('h2', attrs={'id':'noresults'})
            if no_results_h2:
                break
            
            # <h2 class="search-text">Showing 97-120 of 125 results for ...</h2>
            search_text_h2 = must_find(soup, 'h2', attrs=class_contains('search-text'))
            search_info = text_only(search_text_h2).split()
            
            if search_info[0] != "Showing" or search_info[2] != "of" or search_info[4] != "results":
                raise LibraryError('Unexpected search-text %s' % text_only(search_text_h2))
                
            first_result = int(search_info[1].partition("-")[0])
            new_total_results = int(search_info[3])
            
            if first_result != results_processed + 1:
                raise LibraryError('Unexpected first result %d instead of %d' % (first_result, results_processed + 1))
            
            if total_results and (new_total_results != total_results):
                raise LibraryError('Total results changed from %d to %d' % (total_results, new_total_results))
            
            total_results = new_total_results
            total_pages = ((total_results - 1) // RESULTS_PER_PAGE) + 1  # floor division
            
            self.log.info('Response: page %d of %d. %d total results' % (page_num, total_pages, total_results))
            
            if total_results > MAX_RESULTS_ALLOWED:
                return True
                
            loaded_media_container = must_find(soup, 'div', attrs=class_contains_all("media-container loaded"))
            
            for title_container in must_findAll(loaded_media_container, 'div', attrs=class_contains('title-container'), recursive=True):
                authors = []
                title = ''
                desired_format = False
                available = True
                        
                href = must_find(title_container, 'a')['href']
                    
                if not href.startswith("/media/"):
                    raise LibraryError('Unexpected anchor href: %s' % href)
                    
                book_id = self.validate_book_id(href.rpartition("/")[2], self.library_id)
                    
                # <span class="title-format-badge"><i class="icon-ebook"></i>eBook</span>

                title_format = text_only(must_find(title_container, 'span', attrs=class_contains('title-format-badge')))
                
                if title_format == 'eBook':
                    if len(self.config.search_formats & self.ebook_formats_supported) > 0:
                        desired_format = True
                elif title_format == 'Audiobook':
                    if len(self.config.search_formats & self.audiobook_formats_supported) > 0:
                        desired_format = True
                elif title_format == 'Video':
                    if len(self.config.search_formats & self.video_formats_supported) > 0:
                        desired_format = True
                elif title_format == 'Music':
                    pass
                else:
                    raise LibraryError('Unknown title-format %s' % title_format)
                    
                if title_container.find('div', attrs=class_contains('not-owned-title')):
                    available = False
                
                title_p = title_container.find('p', attrs=class_contains('title-name'))
                if title_p:
                    title = normalize_title(text_only(title_p))
                    
                # Only the first author is shown in results list.
                author_p = title_container.find('p', attrs=class_contains('title-author'))
                if author_p:
                    authors.append(normalize_author(text_only(author_p), unreverse=False))
                    
                lbook = LibraryBook(authors=authors, title=title, lib=self, book_id=book_id,
                                available=available, recommendable=find_recommendable,
                                search_author=search_author)
                
                if not desired_format:
                    self.log.info('Ignoring %s: %s' % (title_format, repr(lbook)))
                elif available and find_recommendable:
                    self.log.info('Ignoring available %s: %s'%(title_format, repr(lbook)))
                elif not (available or find_recommendable):
                    self.log.info('Ignoring unavailable %s: %s'%(title_format, repr(lbook)))
                else:
                    self.log.info('Found %s: %s' % (title_format, repr(lbook)))
                    books.add(lbook)
                    
                results_processed += 1

            page_num += 1
            
        if results_processed != total_results:
            raise LibraryError('Expected %s but found %d' % (value_unit(total_results,'result'), results_processed))
    
        return False
        
        
    def get_book_info(self, book_id):
        if self.legacy:
            return self.legacy.get_book_info(book_id)
            
        authors = []
        title = ''
        subtitle = ''
        edition = ''
        publisher = ''
        pubdate = None
        isbn = ''
        formats = set()
        
        response = open_url(self.log, self.new_book_url(self.library_id, book_id), cookiejar=self.cookiejar)
        soup = BeautifulSoup(response.data, convertEntities=BeautifulSoup.HTML_ENTITIES)
        
        error_container = soup.find('div', attrs=class_contains('error-container'))
        if error_container:
            raise LibraryError(text_only(error_container))
        
        title_header = must_find(soup, 'div', attrs=class_contains('title-header-container'))
        
        title_h = title_header.find('h1', attrs=class_contains('media-title'))
        if title_h:
            title = text_only(title_h)
            
        subtitle_h = title_header.find('h2', attrs=class_contains('subtitle-series'))
        if subtitle_h:
            subtitle = text_only(subtitle_h)
            
        author_h = title_header.find('h2', attrs=class_contains('author'))
        if author_h:
            for author_link in author_h.findAll('a'):
                author = normalize_author(text_only(author_link), unreverse=False)
                if author and (author not in authors):
                    authors.append(author)
                    
        formats_panel = soup.find('div', attrs={'id':'formats-panel'})
        if formats_panel:
            for format in unicode(formats_panel).split('<br />'):
                format = strip_html(format).replace('®', '')
                standard_format = self.library_formats_names.get(format.lower())
                if standard_format:
                    formats.add(standard_format)
                elif format not in ['', 'Device restrictions', 'Kindle restrictions']:
                    self.log.warn('Unknown book format: "%s"' % format)
                    #self.log.info('library_formats_names: %s' % unicode(self.library_formats_names))
                
        languages_panel = soup.find('div', attrs={'id':'languages-panel'})
        if languages_panel:
            for language in unicode(languages_panel).split('<br />'):
                language = strip_html(language)
                if language:
                    break   # take first found
                
        details_panel = soup.find('div', attrs={'id':'details-panel'})
        if details_panel:
            for detail in details_panel.findAll('p'):
                category = text_only(must_find(detail, 'b'))
                if category == "Publisher:":
                    publisher = text_only(must_find(detail, 'a'))
                elif (category in self.formats_supported) or (category in OVERDRIVE_FORMAT_NAMES):
                    for format_info in unicode(detail).split('<br />'):
                        format_info = strip_html(format_info)
                        info_type,sep,info_value = format_info.partition(": ")
                        
                        if info_type == "Release date":
                            pubdate = parse_only_date(info_value, assume_utc=True)
                        elif info_type == "ISBN":
                            isbn = info_value
                else:
                    self.log.info("ignoring category: %s" % category)
             
        publisher = re.sub(r'\[Start\]','',publisher).strip()    # Handle: Night Shade Books[Start]
        
        title, series, series_index = fix_title(title, subtitle, edition)
        title = normalize_title(title)

        return InfoBook(authors=authors, title=title, series=series, series_index=series_index, isbn=isbn,
                        language=language, publisher=publisher, pubdate=pubdate, formats=formats,
                        lib=self, book_id=book_id)
                        
                        
    def check_book_obtainable(self, book_id):
        if self.legacy:
            return self.legacy.check_book_obtainable(book_id)
            
        self.check_current_holds()
            
        #consortium & advantage: copies owned, copies available, patrons on hold???
        #general: release date (for pre-release titles), on hold by user, checked out by user, always available y/n???

        library_copies = 1
        available_copies = 0
        have_checked_out = False
        number_waiting_per_copy = None
        hold_position_per_copy = None
        release_date = None
        
        if book_id in self.holds:
            self.log.info('Using availability info from %s holds list for %s' % (self.name, book_id))
            hold_position_per_copy, release_date = self.holds[book_id]
            
        response = open_url(self.log, self.new_book_url(self.library_id, book_id), cookiejar=self.cookiejar)
        soup = BeautifulSoup(response.data, convertEntities=BeautifulSoup.HTML_ENTITIES)
        
        error_container = soup.find('div', attrs=class_contains('error-container'))
        if error_container:
            raise LibraryError(text_only(error_container))
        
        availability_container = must_find(soup, 'div', attrs={'class':'availability-container'})
        #self.log.info('availability_container: %s' % unicode(availability_container))
        
        for availability_h in availability_container.findAll('h3', recursive=True):
            availability = text_only(availability_h)
            #self.log.info('availablity: %s' % availability)
            av = availability.split()
            
            if len(av) >= 5 and av[1] == 'of' and av[3] in ['copy', 'copies'] and av[4] == 'available':
                # "3 of 5 copies available"
                available_copies = int(av[0])
                library_copies = int(av[2])
                
            elif len(av) >= 3 and av[1] in ['person', 'people'] and av[2] == 'waiting':
                # "10 people waiting per copy"
                number_waiting_per_copy = int(av[0])
                
            elif len(av) >= 3 and av[0] == 'Pre-release:' and av[1] == 'Expected':
                # "Pre-release: Expected May. 24, 2016"
                date_str = ' '.join(av[2:]).replace('.', '')
                release_date = parse_only_date(date_str, assume_utc=True)
        
            elif availability:
                raise LibraryError('Could not parse availability: %s' % availability)
                        
                
        action_button_div = must_find(soup, 'div', attrs=class_contains('action-button-top'))
        main_button = must_find(action_button_div, 'a', attrs={'role':'button'})    # First button
        button_text = text_only(main_button).lower()    # : PLACE A HOLD, BORROW, ?
        
        if button_text == 'place a hold':
            available_copies = 0
        elif button_text in ['borrow', 'listen now'] and available_copies == 0:
            available_copies = 1
        elif button_text == 'go to bookshelf':
            have_checked_out = True
            
        # estimate availability
        return self.when_obtainable(library_copies=library_copies, available_copies=available_copies, 
                        number_waiting_per_copy=number_waiting_per_copy, release_date=release_date, 
                        have_checked_out=have_checked_out, hold_position_per_copy=hold_position_per_copy)
        
 
    def check_current_holds(self):
        if self.holds_checked or not self.signed_in:
            return
            
        self.holds_checked = True
        
        try:
            # get hold info
            #self.holds[book_id] = (hold_position_overall, release_date)
            pass
            
        except Exception as e:
            self.log.exception('', e)

            


def inventory_overdrive_sites(abort, log, status, config):
    sites = {}
    branches = set()

    # Get list of sites by doing map queries and check library capabilities for each
    
    sites_filename = os.path.join(os.path.join(config_dir, 'plugins'), 'overdrive_sites.json')

    if os.path.isfile(sites_filename):
        log.info('loading OverDrive site list from %s' % sites_filename)
        
        with open(sites_filename, 'rb') as of:
            sites = json.loads(of.read())
            
            if type(sites) is list:
                sites = dict(zip(sites, sites))

    else:
        log.info('Searching map for OverDrive sites')
        
        # cover the globe in overlapping circles.

        QUERY_RADIUS_MILES = 500    # Bigger is better, but OverDrive may have a limit
        
        RADIANS_IN_DEGREES = 0.0174533
        MILES_PER_DEGREE_LATITUDE = 69.172

        for lat_min,lat_max,long_min,long_max in [
                (-50.0, -10.0, 110.0, 179.0),   # Australia and NZ
                (20.0, 75.0, -180.0, 45.0),     # USA and Europe
                ]:
            latitude_degrees = lat_min
            latitude_increment = QUERY_RADIUS_MILES / MILES_PER_DEGREE_LATITUDE
            while (latitude_degrees <= lat_max):
                longitude_degrees = long_min
                longitude_increment = math.cos(latitude_degrees * RADIANS_IN_DEGREES) * latitude_increment
                while (longitude_degrees <= long_max):
                    data = {}
                    data['latLng'] = "%0.6f,%0.6f" % (latitude_degrees, longitude_degrees)
                    data['radius'] = unicode(QUERY_RADIUS_MILES)

                    response = open_url(log, 'https://www.overdrive.com/mapbox/find-libraries-by-location?%s' % urllib.urlencode(data))
                    result = json.loads(response.data)      # Parse the json results
                    
                    added_site_count = 0
                    for consortium in result:
                        if consortium["consortiumUrl"] not in sites:
                            added_site_count += 1
                            sites[consortium["consortiumUrl"]] = consortium["consortiumName"]
                            
                            for branch in consortium["geoJson"]:
                                branches.add(branch["properties"]["id"])
                        
                    log.info('%d results with %d added sites' % (len(result), added_site_count))
                    
                    longitude_degrees += longitude_increment
                    
                latitude_degrees += latitude_increment
            
            
        with open(sites_filename, 'wb') as of:
            of.write(json.dumps(sites, sort_keys=True, indent=2, separators=(',', ': ')))

    log.info('Found %d sites and %d branches.' % (len(sites), len(branches)))
    
    stats = Object()
         
    stats.actual_site_count = 0
    stats.new_sites = set()
    stats.legacy_sites = set()
    stats.signin_required_sites = set()
    stats.formats = {}
    stats.ebook_count = set()
    stats.audiobook_count = set()
    stats.search_params = defaultdict(set)
    
    for i,(library_url, library_name) in enumerate(sorted(sites.items())):
        status.update(i / len(sites), library_name)
        inventory_library(library_url, library_name, log, config, stats)
    
    '''    
    log.info('')
    log.info('Processing %d discovered optional new library sites' % len(discovered_new_overdrive_sites))
    
    for i,(library_url, library_name) in enumerate(sorted(discovered_new_overdrive_sites.items())):
        status.update(i / len(sites), library_name)
        inventory_library(library_url, library_name, log, config, stats)
    '''
    
    log.info('')
    log.info('new sites:')
    log.info(json.dumps(sorted(list(stats.new_sites)), sort_keys=True, indent=2, separators=(',', ': ')))

    log.info('')
    log.info('search parameters:')
    for param_name in sorted(list(stats.search_params.keys())):
        log.info('%s (%d)' % (param_name, len(stats.search_params[param_name])))
        #log.info('%s (%s)' % (param_name, ', '.join(stats.search_params[param_name])))
        
    log.info('')
    log.info('formats:')
    for format_name, format_id in stats.formats.items():
        log.info('%s (%s)' % (format_name, format_id))
        
    log.info('')
    log.info('top EPUB eBook counts:')
    for book_count,lib_name in sorted(list(stats.ebook_count), key=lambda x: -x[0])[:50]:
        log.info('%s (%d)' % (lib_name, book_count))
        
    log.info('')
    log.info('top MP3 audiobook counts:')
    for book_count,lib_name in sorted(list(stats.audiobook_count), key=lambda x: -x[0])[:50]:
        log.info('%s (%d)' % (lib_name, book_count))
        
    
    log.info('')
    log.summary('Found: %d branches, %d sites, %d actual sites, %d new sites, %d legacy sites, %d sites require sign in' % (
            len(branches), len(sites), stats.actual_site_count, len(stats.new_sites),
            len(stats.legacy_sites), len(stats.signin_required_sites)))
            
            
            

def inventory_library(library_url, library_name, log, config, stats):
    log.context('Check library id from url "%s"' % library_url)
    
    try:
        library_id = OverDrive.validate_library_id(library_url, config=None)
    except Exception as e:
        log.exception('', e)
        library_id = ''
        
    log.context(None)
    
    if library_id:
        # see LibraryConfig for kwargs
        lending_lib = LendingLibrary(library_id=library_id, name=library_name, enabled=True, 
            card_number='', card_pin='', branch_id='', provider_id=OverDrive.id)
        
        lib = SearchableLibrary.create(log, config, lending_lib)

        log.context('Sign in to %s' % lib.library_id)
        
        try:
            lib.sign_in(True, allow_redirect=True)
        except Exception as e:
            log.exception('', e)

        log.context(None)
        
        library_id = lib.library_id     # pick up any redirect
        
        stats.actual_site_count += 1
        
        if lib.ready_for_search and not lib.legacy:
            stats.new_sites.add(lib.library_id)

        if lib.legacy is not None:
            stats.legacy_sites.add(lib.library_id)
             
        if lib.signin_required:
            stats.signin_required_sites.add(lib.library_id)
            
        if lib.ready_for_search and (not lib.signin_required) and (not lib.legacy):
            try:
                # do search for all to inventory formats and count books
                response = open_url(log, 'https://%s/search?s=' % lib.library_id, cookiejar=lib.cookiejar)
                soup = BeautifulSoup(response.data, convertEntities=BeautifulSoup.HTML_ENTITIES)
                
                ebook_filters = must_find(soup, 'div', attrs={'data-filter-content': 'eBook'})
                for script in ebook_filters.findAll('script', recursive=True):
                    #log.info('script: %s' % script.string)
                    format_id = js_value(log, script.string, "var format = ")
                    m = re.search(r"document\.write\('(.+)'\);", script.string)
                    format_soup = BeautifulSoup(m.group(1), convertEntities=BeautifulSoup.HTML_ENTITIES)
                    format_a = must_find(format_soup, 'a')
                    format_name = text_only(format_a)
                    format_span = must_find(format_soup, 'span')
                    format_count = int(text_only(format_span).replace(',',''))
                    
                    if format_name not in stats.formats:
                        log.info('detected format: %s (%s)' % (format_name, format_id))
                        stats.formats[format_name] = format_id
                        
                    elif stats.formats[format_name] != format_id:
                        log.error('format %s has multiple ids %s and %s' % (format_name, format_id, stats.formats[format_name]))
                        
                    if format_name == "EPUB eBook":
                        stats.ebook_count.add((format_count, lib.library_id + ' ' + lib.name))
                    elif format_name == "MP3 audiobook":
                        stats.audiobook_count.add((format_count, lib.library_id + ' ' + lib.name))

            except Exception as e:
                log.exception('', e)
                
                
            try:
                # check advanced search form for options at this library
                response = open_url(log, 'https://%s/advanced-search' % lib.library_id, cookiejar=lib.cookiejar)
                soup = BeautifulSoup(response.data, convertEntities=BeautifulSoup.HTML_ENTITIES)
                
                form = must_find(soup, 'form', attrs={'id': 'advanced-search-form'})
                for input in form.findAll('input', recursive=True):
                    input_name = input.get('name', 'unknown')
                    stats.search_params[input_name].add(lib.library_id)
                    
                for select in form.findAll('select', recursive=True):
                    select_name = select.get('name', 'unknown')
                    
                    for option in select.findAll('option', recursive=True):
                        option_value = option.get('value', '')
                        data_param = option.get('data-parameter-name', '')
                        option_label = text_only(option)
                        
                        stats.search_params['%s/%s/%s/%s' % (select_name, option_value, data_param, option_label)].add(lib.library_id)

            except Exception as e:
                log.exception('', e)
            
                
def inventory_overdrive_books(abort, log, status, config):
    # Get mapping between book ID number and crid
    books = {}
    
    # book id range appears to be approximately 1-300,000
    
    first_id = tweaks.get(TWEAK_OD_FIRST_ID, 0)
    last_id = tweaks.get(TWEAK_OD_LAST_ID, 300000)

    
    json_filename = os.path.join(os.path.join(config_dir, 'plugins'), OVERDRIVE_BOOKS_FILE)
    
    with open(json_filename, 'rb') as of:
        books = json.loads(of.read())
        
    reverse_books = {}
    for reserve_id, book_id in books.items():
        reverse_books[book_id] = reserve_id
        
    count = last_id - first_id + 1
    for i,book_id in enumerate(range(count)):
        book_id = i + first_id
        status.update(i / count, "book id %d" % book_id)
        
        if book_id not in reverse_books:
            log.context('Get book %d' % book_id)
            
            try:
                found = False
                try:
                    response = open_url(log, 'https://www.overdrive.com/media/%d' % book_id, save=None)
                    found = True
                except Exception as e:
                    if type(e) == urllib2.HTTPError and e.code == 404:
                        log.info("not found %d" % book_id)
                    else:
                        raise   # report other errors
                  
                if found:
                    soup = BeautifulSoup(response.data, convertEntities=BeautifulSoup.HTML_ENTITIES)
                    
                    for script in soup.findAll('script'):
                        if "dataLayer =" in unicode(script):
                            break
                                
                    else:
                        raise LibraryError('Missing dataLayer script')
                        
                    data_layer = js_value(log, unicode(script), "dataLayer =")
                    
                    if len(data_layer) != 1:
                        log.error("Data layer len=%d" % len(data_layer))
                        
                    data = data_layer[0]
                    content = data["content"]
                    reserve_id = content["reserveID"]
                    
                    log.info("found %d = %s" % (book_id, reserve_id))
                    
                    if reserve_id in books:
                        log.warn("Reserve ID %s matches multiple books: %d and %d" % (reserve_id, books[reserve_id], book_id))
                    
                    books[reserve_id] = book_id
                    reverse_books[book_id] = reserve_id
            
            except Exception as e:
                log.exception('', e)

            log.context(None)
        
    with open(json_filename, 'wb') as of:
        of.write(json.dumps(books, sort_keys=True, indent=4, separators=(',', ': ')))
        of.write(b'\n')
        
    log.info('Completed')
