# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-

__license__ = 'GPL 3'
__copyright__ = '2011-2012, Jesse Chisholm <jesse.chisholm@gmail.com>'
__docformat__ = 'restructuredtext en'

# even though it does more than MP3 anymore.
#
AUDIOBOOKREADER_NAME      = 'Read MP3 AudioBook metadata'
AUDIOBOOKREADER_VERSION   = (1, 0, 79)
                                # intentional   accidental
AUDIOBOOKREADER_FILETYPES = set(['mp3',
                                 'm4b',         'm4a','mp4','3gp','3g2','qt',
                                 'aa',          'aax',
                                 'f4b',         'f4a','flv',
                                 'm3u',         'm3u8','ram','pls','kpl'])

from common import DEBUG
from common import VERBOSE
from common import log

if VERBOSE:
    log.debug("AudioBookReader: __file__   : %s" % (__file__))
    log.debug("AudioBookReader: __package__: %s" % (__package__))
    log.debug("AudioBookReader: our.version: %s" % (str(AUDIOBOOKREADER_VERSION)))

import os, sys, re, mimetypes, glob
from calibre.customize import MetadataReaderPlugin
from calibre.ebooks.metadata import MetaInformation
from calibre.utils.logging import default_log

from common import AUDIOBOOK_KNOWN_GENRE
from common import AudioBookException

#######################################################################
# the metadata reader derived class
class AudioBookMetadataReader(MetadataReaderPlugin):
    name                = AUDIOBOOKREADER_NAME
    version             = AUDIOBOOKREADER_VERSION
    file_types          = AUDIOBOOKREADER_FILETYPES
     #
    description         = """
Reads metadata from an MP3, M4B, AA and F4B AudioBook file, or M3U playlist.
Uses eyeD3-0.6.17j; mp4file-0.2j; pyaudibletags-1.0.1; flvlib-0.1.12 for parsing files.
"""
    supported_platforms = ['windows', 'osx', 'linux']
    author              = 'Jesse Chisholm'
    on_import           = True
    minimum_calibre_version = (0, 7, 53)
    
    def get_metadata(self, stream, ftype):
        log.debug("AudioBookReader: [%s] : %s" % (ftype, _get_name(stream)))
        return _get_metadata(stream, ftype)

######################################################

def _get_metadata(stream, ftype):
    ftype = ftype.lower()    # paranoia
    ostream = stream
    streamname = _get_name(stream)
    if isinstance(stream, basestring):
        if -1 != stream.find('://'):
            log.debug("'stream' is URL: %s" % (stream))
            import urllib2
            ostream = urllib2.urlopen(stream)
        else:
            log.debug("'stream' is PATH: %s" % (stream))
            ostream = file(stream,'rb')
    elif not isinstance(stream, file):
        log.error("'stream' is neither a file nor the path to a file")
        log.debug("'stream' is type: %s" % (type(stream)))
        raise AudioBookException("'stream' is neither a file nor the path to a file")

    (dir, name) = os.path.split(streamname);
    if VERBOSE: print "debug: _get_metadata: [%s] : %s" % (ftype, streamname)
    log.debug("_get_metadata: [%s] : %s" % (ftype, streamname))

    mi = None;

    global _dispatch_table
    if ftype in _dispatch_table:
        func = _dispatch_table[ftype]
        mi = func(ostream,ftype)

    # if not handled by a specific handler,
    #   use our generic handler
    #
    if not mi:
        mi = _get_guess_metadata(streamname,ftype);
    #
    # ensure sanity, API requires these fields to have SOME value.
    #
    if mi:
        changed = 0

        if mi.series_index and not isinstance(mi.series_index, float):
            tmp = re.match("0*(\d+\.?\d*)", mi.series_index.strip())
            if tmp:
                changed = 1
                mi.series_index = float(tmp.group(1))
                if DEBUG: print ("SeriesIdx: %s" % (mi.series_index));
        if not mi.title:
            changed = 1
            mi.title = _("Unknown")
            if DEBUG: print ("Title:     %s" % (mi.title));
        if mi.series and mi.series_index and mi.title == mi.series:
            changed = 1
            mi.title = "%s (Part %.f)" % (mi.series, mi.series_index)
            if DEBUG: print ("Title:     %s" % (mi.title));
        if not mi.title_sort:
            changed = 1
            mi.title_sort = mi.title
            if mi.series and mi.series_index:
                mi.title_sort = "%#03.f %s" % (m_series_index, mi.title)
            if DEBUG: print ("TitleSort: %s" % (mi.title_sort));
        if not mi.authors:
            changed = 1
            mi.authors = (_("Unknown"),)
            if DEBUG: print ("Authors:   %s" % (mi.authors));
        if not mi.tags:
            changed = 1
            mi.tags = [ AUDIOBOOK_KNOWN_GENRE[0] ]
            if DEBUG: print ("Genre:     %s" % (mi.tags));
        if mi.pubdate and isinstance(mi.pubdate, basestring):
            changed = 1
            from calibre.utils.date import parse_date
            mi.pubdate = parse_date(mi.pubdate, True, True)
            if DEBUG: print ("PubDate:   %s" % (mi.pubdate));
        #
        # and we require something here
        #
        if not mi.mime:
            changed = 1
            mi.mime = "audio/mpeg";
            if DEBUG: print ("Mime:      %s" % (mi.mime));
        #
        # and we require something here
        #   but, in theory, the cover _could_ be a URL with no local cover_data
        #   so, leave mi.cover alone
        #
        if mi.is_null('cover_data'):
            (ignore, cover_data) = _get_RESOURCE_art()
            if cover_data:
                changed = 1
                mi.cover_data = cover_data
                if DEBUG: print ("CoverData: %s" % (mi.cover_data and mi.cover_data[0] or None));
        #
        if changed:
            if DEBUG: print ("====================");

    # if we opened it, we should close it, too.
    if isinstance(stream, basestring):
        ostream.close()

    return mi;

######################################################

# get the metadata from an M3U playlist file
#    calls _get_metadata for each entry
# @param:stream - an open file object to a file of M3U format
# @param:ftype - the file extension, sans period
# @return:Metadata - filled as best we can; or None if we can't
#
def _get_m3u_metadata(stream,ftype):
    #if not isinstance(stream,file): return None
    if VERBOSE: print "debug: _get_m3u_metadata: [%s] : %s" % (ftype, _get_name(stream))
    log.debug(".get_m3u: [%s] : %s" % (ftype,_get_name(stream)))
    title = None
    mi = None
    (dir, name) = os.path.split(_get_name(stream));
    log.debug("..dir : %s" % (dir))
    log.debug("..name: %s" % (name))
    try:
        while 1:
            line = stream.readline()
            if None == line or "" == line:
                break
            line = line.strip()
            if "" == line:
                continue
            log.debug("line: %s" % (line))
            if line.startswith('#'):
                continue
            if "kpl" == ftype:
                # entry1=url
                if not line.lower().startswith("entry"):
                    continue
                line = line[line.find("="):]
                log.debug("line= %s" % (line))
            elif "pls" == ftype:
                # File1=url
                if not line.lower().startswith("file"):
                    continue
                line = line[line.find("="):]
                log.debug("line= %s" % (line))
            ext = os.path.splitext(line)[1].strip()
            ext = ext[1:].lower()   # don't need the lead '.'
            try:
                if -1 == line.find("://") and -1 == line.find(":\\"):
                    line = os.path.join(dir, line).strip()
                    log.debug("line= %s" % (line))
                miN = _get_metadata(line, ext);
                title = title or miN.series
                if not mi:
                    mi = miN
                else:
                    _clone_metadata(mi, miN)
                mi.series = None
                mi.series_index = 0.0
                mi.title = title
                mi.title_sort = title
            except Exception, ex:
                if VERBOSE: print "exception: %s\n         : [%s] : %s" % (str(ex),ext, line)
                log.debug("exception: %s : [%s] : %s" % (str(ex),ext, line))
                pass
    except Exception, err:
        if VERBOSE: print "exception: %s\n         : [%s] : %s" % (str(err),ftype, _get_name(stream))
        log.debug("exception: %s : [%s] : %s" % (str(err),ftype, _get_name(stream)))
        pass
    #
    if mi and not mi.title:
        mi.title = _get_name(stream)
    if DEBUG and mi:
        (dir, name) = os.path.split(_get_name(stream));
        print ("File:      %s" % (name));
        print ("Genre:     %s" % (mi.tags));
        print ("Title:     %s" % (mi.title));
        print ("TitleSort: %s" % (mi.title_sort));
        print ("Album:     %s" % (mi.series));
        print ("Chapter:   %s" % (mi.series_index));
        print ("Authors:   %s" % (mi.authors));
        print ("Publisher: %s" % (mi.publisher));
        print ("PubDate:   %s" % (mi.pubdate));
        print ("Cover:     %s" % (mi.cover));
        cvd = mi.cover_data
        print ("CoverData: %s" % (cvd and cvd[0] or None));
        print ("Comments:  %s" % (mi.comments));
        print ("====================");
    #
    if not mi:
        mi = _get_guess_metadata(_get_name(stream),ftype)
    return mi

######################################################

# get the metadata from an MP3 file using the ID3 container format.
# @param:stream - an open file object to a file of ID3v1 or ID3v2 format
# @param:ftype - the file extension, sans period
# @return:Metadata - filled as best we can; or None if we can't
#
def _get_mp3_metadata(stream,ftype):
    #if not isinstance(stream,file): return None
    if VERBOSE: print "debug: _get_mp3_metadata: [%s] : %s" % (ftype, _get_name(stream))
    log.debug(".get_mp3: [%s] : %s" % (ftype,_get_name(stream)))
    import id3Wrapper
    mp3 = id3Wrapper.id3Wrapper(stream)
    #
    # start of with the best we can from the file name alone
    #
    mi = _get_guess_metadata(_get_name(stream),ftype)
    #
    # then fill in with what we found in the tags
    #
    _clone_tags(mi, mp3)
    #
    if DEBUG:
        (dir, name) = os.path.split(_get_name(stream));
        print ("File:      %s" % (name));
        print ("Genre:     %s" % (mi.tags));
        print ("Title:     %s" % (mi.title));
        print ("TitleSort: %s" % (mi.title_sort));
        print ("Album:     %s" % (mi.series));
        print ("Chapter:   %s" % (mi.series_index));
        print ("Authors:   %s" % (mi.authors));
        print ("Publisher: %s" % (mi.publisher));
        print ("PubDate:   %s" % (mi.pubdate));
        print ("Cover:     %s" % (mi.cover));
        cvd = mi.cover_data
        print ("CoverData: %s" % (cvd and cvd[0] or None));
        print ("Comments:  %s" % (mi.comments));
        print ("====================");
    #
    return mi

######################################################

# get the metadata from an MP4 file (really an M4A or M4B, but it all really the same)
# @param:stream - an open file object with MPEG-4 Part 14 format
# @param:ftype - the file extension, sans period
# @return:Metadata - filled as best we can; or None if we can't
#
def _get_mp4_metadata(stream,ftype):
    #if not isinstance(stream,file): return None
    if VERBOSE: print "debug: _get_mp4_metadata: [%s] : %s" % (ftype, _get_name(stream))
    log.debug(".get_mp4: [%s] : %s" % (ftype,_get_name(stream)))
    import mp4Wrapper
    mp4 = mp4Wrapper.mp4Wrapper(stream)
    #
    #   check for special 'ID32' tag set that may override normal tags
    #
    if mp4.has('ID32'):
        import cStringIO
        memFile = cStringIO.cStringIO(mp4.get('ID32'))
        import mp3Wrapper
        mp3 = mp3Wrapper.mp3Wrapper(memFile)
        for t in mp3.tags:
            log.warning("ID3v2 tag override: [%s]='%s' replaces '%s'" % (t, mp3.get(t), mp4.get(t)))
            mp4.set(t, mp3.get(t))
    #
    # start of with the best we can from the file name alone
    #
    mi = _get_guess_metadata(_get_name(stream),ftype)
    #
    # then fill in with what we found in the tags
    #
    _clone_tags(mi, mp4)
    #
    if DEBUG:
        (dir, name) = os.path.split(_get_name(stream));
        print ("File:      %s" % (name));
        print ("Genre:     %s" % (mi.tags));
        print ("Title:     %s" % (mi.title));
        print ("TitleSort: %s" % (mi.title_sort));
        print ("Album:     %s" % (mi.series));
        print ("Chapter:   %s" % (mi.series_index));
        print ("Authors:   %s" % (mi.authors));
        print ("Publisher: %s" % (mi.publisher));
        print ("PubDate:   %s" % (mi.pubdate));
        print ("Cover:     %s" % (mi.cover));
        cvd = mi.cover_data
        print ("CoverData: %s" % (cvd and cvd[0] or None));
        print ("Comments:  %s" % (mi.comments));
        print ("====================");
    #
    return mi

######################################################

# get the metadata from an AA file
# @param:stream - an open file object with Audible.com format
# @param:ftype - the file extension, sans period
# @return:Metadata - filled as best we can; or None if we can't
#
def _get_aa_metadata(stream,ftype):
    #if not isinstance(stream,file): return None
    if VERBOSE: print "debug: _get_aa_metadata: [%s] : %s" % (ftype, _get_name(stream))
    log.debug(".get_aa: [%s] : %s" % (ftype,_get_name(stream)))
    import aaWrapper
    aa = aaWrapper.aaWrapper(stream)
    #
    # start of with the best we can from the file name alone
    #
    mi = _get_guess_metadata(_get_name(stream),ftype)
    #
    # then fill in with what we found in the tags
    #
    _clone_tags(mi, aa)
    #
    if DEBUG:
        (dir, name) = os.path.split(_get_name(stream));
        print ("File:      %s" % (name));
        print ("Genre:     %s" % (mi.tags));
        print ("Title:     %s" % (mi.title));
        print ("TitleSort: %s" % (mi.title_sort));
        print ("Album:     %s" % (mi.series));
        print ("Chapter:   %s" % (mi.series_index));
        print ("Authors:   %s" % (mi.authors));
        print ("Publisher: %s" % (mi.publisher));
        print ("PubDate:   %s" % (mi.pubdate));
        print ("Cover:     %s" % (mi.cover));
        cvd = mi.cover_data
        print ("CoverData: %s" % (cvd and cvd[0] or None));
        print ("Comments:  %s" % (mi.comments));
        print ("====================");
    #
    return mi

######################################################

# get the metadata from an F4B file
# @param:stream - an open file object with Flash format
# @param:ftype - the file extension, sans period
# @return:Metadata - filled as best we can; or None if we can't
#
def _get_flv_metadata(stream,ftype):
    #if not isinstance(stream,file): return None
    if VERBOSE: print "debug: _get_flv_metadata: [%s] : %s" % (ftype, _get_name(stream))
    log.debug(".get_flv: [%s] : %s" % (ftype,_get_name(stream)))
    import flvWrapper
    flv = flvWrapper.flvWrapper(stream)
    #
    # start of with the best we can from the file name alone
    #
    mi = _get_guess_metadata(_get_name(stream),ftype)
    #
    # then fill in with what we found in the tags
    #
    _clone_tags(mi, flv)
    #
    if DEBUG:
        (dir, name) = os.path.split(_get_name(stream));
        print ("File:      %s" % (name));
        print ("Genre:     %s" % (mi.tags));
        print ("Title:     %s" % (mi.title));
        print ("TitleSort: %s" % (mi.title_sort));
        print ("Album:     %s" % (mi.series));
        print ("Chapter:   %s" % (mi.series_index));
        print ("Authors:   %s" % (mi.authors));
        print ("Publisher: %s" % (mi.publisher));
        print ("PubDate:   %s" % (mi.pubdate));
        print ("Cover:     %s" % (mi.cover));
        cvd = mi.cover_data
        print ("CoverData: %s" % (cvd and cvd[0] or None));
        print ("Comments:  %s" % (mi.comments));
        print ("====================");
    #
    return mi

######################################################

# patterns to extract title, chapter and author, ignoring compression and ftype
#    from a librivox style MP3 file name.
#
# Typically, from Librivox, the pieces are:
#    (?P<title>[^_\d]+?)
#    (?P<title>[^\d]+?)
#    (?P<title>[^_]+?)
#    (?P<title>.+?)
#    (?P<chapter>\d+[\._]?\d*)
#    (?P<author>[^\d]+?)
#
# If title and author end up containing underscore, they should be converted to space.
# If chapter ends up containing underscore, it should be converted to period.
#
# Typically the file name format is:
#    title_chapter_author_compression.mp3
#
# But may also be one of these:
#    title_chapter.mp3
#    titlechapter.mp3
#    authorchapter_title_unknown_compression.mp3
#    author_title.mp3
#    author-titlechapterofcount_compression.mp3
#    chapter_title.mp3
#
# But I have seen:
#    chapter.mp3
#    TtlChapter.mp3        #    where the title is an acronym
#
# calibre has utility method: calibre.ebooks.metadata.metadata_from_filename(name,pattern=None)
#
#   2nd parm is pattern that can have named fields:
#       (?P<title> ...)
#       (?P<author> ...)
#       (?P<series> ...)
#       (?P<series_index> ...)
#       (?P<isbn> ...)
#       (?P<publisher> ...)
#       (?P<published> ...)
#   NOTE: metadata_from_filename will replace all '_' with ' ' before pattern matching!
#   NOTE: metadata_from_filename will remove the extension from the name, so do pass it in.
#
# chapter
#    one or more digits
#        optionally followed by a period or underscore and zero or more digits
#        optionally followed by total number of chapter
RE_CHAPTER = "(?P<series_index>\d+[\._]?\d*)(of\d+)?"
#
#    and sometimes it is "(Part N)" or just "PartN"
#
RE_CHAPTER_PART = "\(Part[ _-]?(?P<series_index>\d+[\._]?\d*)\)"

# title with and without underscore
RE_TITLE  = "(?P<title>[^_]+)"
RE_TITLE_ = "(?P<title>.+?)"

# author with and without underscore
RE_AUTHOR  = "(?P<author>[^_]+)"
RE_AUTHOR_ = "(?P<author>.+?)"
RE_AUTHORS = "( ?by ?)?(?P<author>.+?)( ?& ?)(?P<other>.+?)"

###############################################################################
#eg: "All About Dragons(Part-1) by Ed Penner & Charles Wolcott on 1966 Walt Disney-Disneyland LP._mpeg4"
#     ttttttttttttttttt      i     aaaaaaaaa   aaaaaaaaaaaaaaa    dddd ppppppppppppppppppppppppp
#
#   RE_TITLE RE_CHAPTER_PART by RE_AUTHORS on (?P<year>\d+) (?P<publisher>[^\.]+)(?P<extension>\.\w+)?
RE_WORDY = "".join(["(?P<title>[^_]+)",
                    " ?\( ?Part[ _-]?(?P<series_index>\d+[\._]?\d*) ?\) ?",
                    "( ?by ?)(?P<author>[^_]+?)( ?& ?)(?P<other>[^_]+?)",
                    "( ?on ?)(?P<pubdate>\d+) ?(?P<publisher>[^_\.]+?)",
                    "(?P<extension>\.\w+)?"
                   ])
RE_WORDY2 = "".join([RE_TITLE, RE_CHAPTER_PART, RE_AUTHORS,
                    "( ?on ?)(?P<pubdate>\d+) ?(?P<publisher>[^_\.]+?)",
                    "(?P<extension>\.\w+)?"
                   ])
# {
#   'title'         :   'All About Dragons'
#   'series_index'  :   '1'
#   'author'        :   'Ed Penner'
#   'other'         :   'Charles Wolcott'
#   'pubdate'       :   '1966'
#   'publisher'     :   'Walt Disney-Disneyland LP'
#   'extension'     :   "._mpeg'
# }
#   in theory, anyway; barring some spaces that need strip()
###############################################################################

RE_TRAILING = "([_-]\d+\w+)?"

# just learn what we can from the file name.
# @param:filename - the name of a file, typically: stream.name
# @param:ftype - the file extension, sans period
# @return:Metadata - filled as best we can; or None if we can't
#
# from calibre.ebooks.metadata.meta import metadata_from_filename
# --- I COULD have used metadata_from_filename but I didn't like how it parsed.
#
def _get_guess_metadata(filename,ftype):
    if VERBOSE: print "debug: _get_guess_metadata: [%s] : %s" % (ftype, filename)
    (dir, name) = os.path.split(filename)
    (sname, ext) = os.path.splitext(name)
    if DEBUG: print ("File:      %s" % (name))
    # use various RE to try and parse the file name into useful chunks.
    # The first pattern that matches, wins; so put them in a good order.
    #
    test = [RE_WORDY, RE_WORDY2,
            "^%s[ _]%s[ _]%s$" % (RE_TITLE,   RE_CHAPTER, RE_AUTHOR),
            "^%s[ _]%s$"       % (RE_AUTHOR,  RE_TITLE),
            "^%s[ _]?%s %s$"   % (RE_AUTHOR,  RE_CHAPTER, RE_TITLE),
            "^%s[ _]?%s %s$"   % (RE_AUTHOR_, RE_CHAPTER, RE_TITLE_),
            "^%sPart%s$"       % ("(?P<title>\w+?)",  "(?P<series_index>\d+)"),
            "^%s[ _]?%s$"      % (RE_TITLE_,  RE_CHAPTER),
            "^%s[ _]?%s$"      % (RE_CHAPTER, RE_TITLE_),
            "^%s[ _]?%s$"      % (RE_TITLE,   RE_CHAPTER),
            "^%s[ _]?%s$"      % (RE_CHAPTER, RE_TITLE),
            "^%s$"            % (RE_TITLE_),
           ]
    # first remove any "-64kb" or the like!
    match = re.match("^(.*)([ _-]\d+\w+)$", sname)
    if match:
        if VERBOSE: print "debug: name ended in '%s'" % (match.group(2))
        sname = match.group(1)
    # next, turn '+' into ' ' as if it was HTTP query string.
    sname = sname.replace('+',' ')
    # get a copy with underscore also turned to space
    sname_ = sname.replace('_', ' ')

    # from calibre.ebooks.metadata.meta import metadata_from_filename
    #      COULD have used metadata_from_filename but I didn't like how it parsed.
    #
    title, chapter, author, series = None, None, None, None
    match = None
    mi = MetaInformation("", None)
    def _safe(m,k):
        try:
            if k in m.groupdict():
                return m.group(k).replace('_',' ').strip()
        except:
            pass
        return None
    for item in test:
        match = re.match(item, sname)
        if match:
            title = _safe(match, 'title')
            if title: mi.title = title
            author = _safe(match, 'author')
            other = _safe(match, 'other')
            if author or other:
                mi.authors = []
                if author: mi.authors.append(author)
                if other: mi.authors.append(other)
            if not mi.authors:mi.authors=[_("Unknown")]
            pubdate = _safe(match, 'pubdate')
            if pubdate: mi.pubdate = pubdate
            publisher = _safe(match, 'publisher')
            if publisher: mi.publisher = publisher
            series = _safe(match, 'series')
            chapter = _safe(match, 'series_index')
            # used later to set series, index, and such
            break

    # see if the title was in CamelBack notation
    #
    if title and author is None and series is None:
        if not ' ' in title:
            # add spaces in camel back notation of title?
            temp = re.sub(r"([A-Z])([a-z]*)", r" \1\2", title).strip()
            if temp and temp != title:
                title = temp
                mi.title = temp
    
    # if no matched series,  then series is title
    # if no matched chapter, then no series
    # if matched chapter,    then title_sort is chapter and title

    if title and not series:
        series = title
    if series and not title:
        title = series

    ugly = None
    if not chapter: # and not series:
        # first available sequence of digits,
        #   possible in floating point, possible with delimiter goofed
        #   possibly with lead "Part-"
        #   possible wrapped in parens
        #
        temp = re.search(RE_CHAPTER_PART, sname)
        if temp:
            ugly = temp.group()
            chapter = _safe(temp,'chapter').replace(' ','.').strip()

    # assert: if mi.title:  then it is pretty reasonable
    # assert: if mi.series: then it is pretty reasonable
    # assert: if chapter:   then it is pretty reasonable
    # assert: if series:    then it is pretty reasonable
    
    if chapter:
        try:
            mi.series_index = float(chapter)
        except:
            pass
        if series:
            if VERBOSE and mi.series:
                print "debug: changed series from '%s' to '%s'" % (mi.series, series)
            mi.series = series
        else:
            # getting here means both 'title' and 'series' are None :(
            #   then get first sequence that seems reasonable
            #
            title = sname_
            series = title
            temp = re.search("(?P<title>[^\.\&:;-]+)", title)
            if temp:
                title = _safe(temp, 'title')
                series = title
                mi.title = title
                mi.series = series

    # ensure conformity
    title = (mi.title != _("Unknown")) and mi.title or mi.series or _("Unknown")
    if title != mi.title:
        mi.title = title.replace('_',' ').strip()
    if mi.series_index:
        title_sort = "%#03.0f %s" % (mi.series_index, mi.title)
        mi.title_sort = title_sort.replace('_',' ').strip()
 
    #
    (cover, cover_data) = _get_folder_art(dir,[sname,mi.series,mi.title]);
    #
    mi.tags = [ AUDIOBOOK_KNOWN_GENRE[0] ];
    mi.cover = cover or None;
    mi.cover_data = cover_data or None;
    if not mi.series:
        mi.series = mi.title
    if mi.series and not mi.title:
        if mi.series_index:
            mi.title = "%#03.0f %s" % (mi.series_index, mi.series)
        else:
            mi.title = mi.series
    #
    if mi.series == mi.title and mi.series_index:
            mi.title_sort = "%#03.0f %s" % (mi.series_index, mi.series)
    #
    if DEBUG:
        print ("Title?     %s" % (mi.get('title',None)))
        print ("TitleSort? %s" % (mi.get('title_sort',None)))
        print ("Chapter?   %s" % (mi.get('series_index',None)))
        print ("Author?    %s" % (mi.get('authors',None)))
        print ("Album?     %s" % (mi.get('series',None)))
        print ("Publisher? %s" % (mi.get('publisher',None)))
        print ("PubDate?   %s" % (mi.get('pubdate',None)))
        print ("Cover?     %s" % (cover))
        print ("CoverData? %s" % (cover_data and cover_data[0] or None))
        print ("--------------------")
    #
    return mi;

######################################################

# get artwork from the plugin resources
# @returns -  tuple (cover, coverdata)
#    where either could be None.
#    but where coverdata should be a tuple (mime,byte[])
#
def _get_RESOURCE_art():
    cover_data = None
    try:
        # get the generic audio book image.
        # cover_data = ("image/svg", get_resources("images/AudioBook.svg"));
        cover_data = ("image/png", get_resources("images/AudioBook.png"));
        # cover_data = ("image/jpeg", get_resources("images/AudioBook.jpg"));
        # cover_data = ("image/gif", get_resources("images/AudioBook.gif"));
        if not cover_data[1]:
            cover_data = None
    except:
        pass
    return (None, cover_data);

######################################################

# fetch an image from the same directory as the audio file
# @param:dir - the directory to look in
# @param:nameList - a list of potential file names (sans extension)
# @return:
#     on failure: tuple (None, None)
#     on success: tuple (filepath, (mimetype, imagedata))
#
def _get_folder_art(dir,nameList):
    # check for "cover" art in same directory as file.
    exts = ('.svg','.png','.jpg','.jpeg','.gif','.bmp');
    fils = nameList or []
    fils.extend(("FRONT_COVER","ICON","OTHER","OTHER_ICON","AlbumArt","AlbumArtSmall","Folder","*"))
    choices = [];
    for f in fils:
        if not f:
            continue    # some in nameList could be None
        for e in exts:
            temp = os.path.join(dir,f + e);
            if "*" in f: temp = glob.glob(temp);
            else:        temp = os.path.exists(temp) and [temp] or None;
            if temp: choices.extend(temp);
    if VERBOSE:
        log.debug("folder art choices? %s" % (choices));
    cover, cover_data = None, None
    # FORNOW: just use the first one found!
    cover = choices and choices[0] or None;
    if cover:
        # Load img
        try:
            (imgType, imgEnc) = mimetypes.guess_type(cover)
            fp = file(cover, "rb")
            imgData = fp.read()
            fp.close()
            if imgData:
                if not imgType: imgType = _get_mime_from_data(imgData)
                cover_data = (imgType, imgData)
        except:
            pass
    return (cover, cover_data)  # (None, None) or (filename, (mime, data))

######################################################

# decide what kind of image this is from lead few bytes
# @param:data - the image bytes as read from disk
# @return: None on failure, the "image/type" on success.
#
def _get_mime_from_data(data):
    # TODO: image/tiff starts with one of ['M','M',0,42] or ['l','l',42,0]
    magic = data[:16]   # if we can't tell in the first 16 bytes, then we are hosed.
    if "<?xml" in magic: return "image/svg"
    if "PNG"   in magic: return "image/png"
    if "JFIF"  in magic: return "image/jpeg"
    if "GIF"   in magic: return "image/gif"
    if "BM"    in magic: return "image/bmp"
    return None

######################################################

def _get_name(value):
    try:
        name = value.name
    except:
        try:
            name = value.geturl()
        except:
            name = str(value)
    if name and name.startswith("file:///"):
        name = name[8:]
    return name

######################################################

def _parse_date(value):
    if isinstance(value,int):
        value = "%04d-02-02" % (value)                  # pad naked year to Ground Hog's Day
    if isinstance(value,basestring):
        if len(value)==0: value = "2011-02-02"          # gotta have something
        if len(value)<=4: value = "%s-02-02" % (value)  # pad naked year to Ground Hog's Day
        from calibre.utils.date import parse_date
        value = parse_date(value, True, True)           # parse as UTC return in UTC
    return value

######################################################

def _clone_tags(mi, wrap):
    #
    # then fill in with what we found in the tags
    #
    for t in wrap.tags:
        #
        # some special cases are handeled by the caller, not by this level
        #
        if t in ["ID32"]:
            # already handled by caller, ignore at this level
            continue
        #
        # special case tags that start with a hash,
        #   which means they are user defined custom tags
        #
        if t[:1]=='#':
            log.warning("custom column not handeled yet: [%s]='%s'" % (t, wrap.get(t)))
            continue
        #
        # 'pubdate' is a special case
        #   in that it wants to be the earliest of available values
        #
        if t == "pubdate" and mi.pubdate:
            tpub = _parse_date(wrap.get(t))     # converts to a datetime or returns unchanged
            wrap.set(t, tpub)
            mpub = _parse_date(mi.pubdate)
            mi.pubdate = mpub
            if mpub <= tpub:
                continue    # do NOT replace older pubdate with younger
            log.debug("pubdate: replacing '%s' with '%s'" % (mpub, tpub))
            # drop through to "not a special case"
        #
        # not a special case? then just copy it over
        #   wrapper tag names have been canonicalized to Metadata tag names and data types.
        #
        vN = wrap.get(t)
        if isinstance(vN, (list, tuple)):
            v = mi.get(t)
            for i in vN:
                if not i in v:
                    v.append(i)
            if _("Unknown") in v and len(v)>1:
                v.remove(_("Unknown"))
            mi.set(t,v)
        elif isinstance(vN, (dict)):
            v = mi.get(t)
            for i in vN.keys():
                if not i in v.keys():
                    v[i] = vN[i]
            mi.set(t,v)
        else:
            mi.set(t, vN)

######################################################

def _clone_metadata(mi, miN):
    #
    # then fill in with what we found in the other metadata
    #
    for t in miN:
        #
        # special case tags that start with a hash,
        #   which means they are user defined custom tags
        #
        if t[:1]=='#':
            log.warning("custom column not handeled yet: [%s]='%s'" % (t, miN.get(t)))
            continue
        #
        # 'pubdate' is a special case
        #   in that it wants to be the earliest of available values
        #
        if t == "pubdate" and mi.pubdate:
            tpub = _parse_date(miN.pubdate)     # converts to a datetime or returns unchanged
            miN.pubdate = tpub
            mpub = _parse_date(mi.pubdate)
            mi.pubdate = mpub
            if mpub <= tpub:
                continue    # do NOT replace older pubdate with younger
            log.debug("pubdate: replacing '%s' with '%s'" % (mpub, tpub))
            # drop through to "not a special case"
        #
        # 'comments' is a special case
        #    in that they accumulate
        #
        if t == 'comments':
            if mi.comments:
                mi.comments = "%s\n<br/>\n%s" % (mi.comments, miN.comments)
                continue
            # drop through to "not a special case"
        #
        # not a special case? then just copy over NEW values
        #
        vN = miN.get(t)
        if vN:
            if isinstance(vN, (list, tuple)):
                v = mi.get(t)
                for i in vN:
                    if not i in v:
                        v.append(i)
                if _("Unknown") in v and len(v)>1:
                    v.remove(_("Unknown"))
                mi.set(t,v)
            elif isinstance(vN, (dict)):
                v = mi.get(t)
                for i in vN.keys():
                    if not i in v.keys():
                        v[i] = vN[i]
                mi.set(t,v)
            else:
                mi.set(t, vN)

######################################################

# debugging method to dump a dictionary to the console
# @param:name - the name of this dictionary, for display purposes
# @param:dset - the dictionary set to dump
# @param:w    - an optional width for justifying field names
#               will determine if not set
#
def dumpDict(name,dset,w=0):
    if not dset:
        print name,": None"
    else:
        if not w:
            for d in dset: w = w<len(d) and len(d) or w
        for d in dset:
            try:
                print "%s[%s] = %s" % (name,d.ljust(w),dset[d])
            except:
                print "%s[%s] = ***** exception *****" % (name,d.ljust(w))

# debugging method to dump a dictionary to the log file
# @param:name - the name of this dictioanry, for display purposes
# @param:dset - the dictionary set to dump
# @param:w    - an optional width for justifying field names
#               will determine if not set
#
def logDict(name,dset,w=0):
    if not dset:
        log.debug("%s: None", (name))
    else:
        if not w:
            for d in dset: w = w<len(d) and len(d) or w
        for d in dset:
            try:
                log.debug("%s[%s] = %s" % (name,d.ljust(w),dset[d]))
            except:
                log.debug("%s[%s] = ***** exception *****" % (name,d.ljust(w)))

######################################################

# make it convenient to remember which file extensions
# go with which parsing methods.
#
_dispatch_table = {}
for e in ['mp3']:
    _dispatch_table[e] = _get_mp3_metadata
for e in ['aa','aax']:
    _dispatch_table[e] = _get_aa_metadata
for e in ['m4a','m4b','mp4','3gp','3g2','qt']:
    _dispatch_table[e] = _get_mp4_metadata
for e in ['f4a','f4b','flv']:
    _dispatch_table[e] = _get_flv_metadata
for e in ['m3u','m3u8','ram','pls','kpl']:
    _dispatch_table[e] = _get_m3u_metadata

######################################################
######################################################

def main(argv):
    global DEBUG, VERBOSE, log
    DEBUG = 1
    VERBOSE = 1
    # FOR DEBUGGING:
    import logging
    log_h = logging.FileHandler("C:/Users/Jesse/calibre_hold/dev/AudioBookReader/_debug.log")
    log_f = logging.Formatter("%(asctime)s %(levelname)s %(message)s")
    log_h.setFormatter(log_f)
    log.addHandler(log_h)
    log.setLevel(logging.DEBUG)
    
    dir = "C:/Users/jesse/calibre_hold/dev/Temp"
    files = [
#"01_Welcome_to_Audible.aa",
#"02_The_Lion_the_Witch_and_the_Wardrobe.aa",
#"03_Frommers_Best_of_Rome_Audio_Tour.aa",
#"04_The_Dilbert_Principle.aa",
#"05_By_the_Light_of_the_Moon.aa",
#"All+About+Dragons(Part-1)+by+Ed+Penner+&+Charles+Wolcott+on+1966+Walt+Disney-Disneyland+LP._mpeg4.mp4",
#"All+About+Dragons(Part-2)+by+Ed+Penner+&+Charles+Wolcott+on+1966+Walt+Disney-Disneyland+LP._mpeg4.mp4",
#"arthurconandoyle_thecrookedman.mp3",
#"Dragons_Secret-48kb.m4b",
#"LordOfTheWorldPart1.m4b",
#"LordOfTheWorldPart2.m4b",
#"secondlatchkey_02_williamson_64kb.mp3",
#"TheReluctantDragon.mp4",
#"The Reluctant Dragon.flv",
#"AllAboutDragons.m3u",
#"LordOfTheWorld.m3u",
#"Example.ram",
"WebExample.m3u",
#"RadioMozart.m3u",
            ]
    #
    if not argv:
        argv = []
        for f in files:
            p = os.path.join(dir, f);
            argv.append(p)
    #
    for f in argv:
        (n, e) = os.path.splitext(f)
        #
        mi = _get_metadata(f, e[1:].lower());
        #

        print ("File:      %s" % (f));
        longest = 0
        for field in mi:
            if len(field) > longest:
                longest = len(field)
        for field in mi:
            if field == 'cover_data':
                cvd = mi.get(field,(None,[]))
                cvd = [cvd[0], cvd[1]]
                if cvd and cvd[1] and len(cvd[1]) > 16:
                    cvd = [cvd[0], "byte[...]"]
                print ("%s : %s" % (field.ljust(longest), cvd))
            else:
                print ("%s : %s" % (field.ljust(longest), mi.get(field,None)))
        print ("====================");

######################################################

if __name__ == "__main__":
    main(sys.argv[1:])
