#!/usr/bin/env python

import re
import sys
import tempfile
import subprocess
import os
import shutil
import argparse

from xml.etree import ElementTree as ET

def symlink (src, dst):
  if os.name == "nt": subprocess.check_call (["fsutil", "hardlink", "create", dst, src])
  else: os.symlink (src, dst)

def import_files (files, tmpd, rules0):
  ids = []
  for f in files:
    (_, basename) = os.path.split (f)
    if not os.path.isfile (f): continue
    args = ["calibredb", "add"]
    m = None; rules = rules0 [:]
    while rules:
      r = rules.pop (0)
      if r == "--match": m = re.match (rules.pop (0), basename)
      elif r == "--tag":
	if m:
	  (key, val) = rules.pop (0).split ("=")
          if re.search ("[^0-9]", val):
            val = eval (val) ((m.group (0),) + m.groups ())
          else:
            val = m.group (int (val))
	  args.append ("--{0}={1}".format (key, val))

    sys.stdout.write (" ".join (args) + "\n")
    out = subprocess.check_output (args + [f])
    m = re.search ("^Added[^0-9]+(\d+)", out, re.M)
    if m:
      id = m.group (1)
    else: continue
    ids.append (id)
    with open (os.path.join(tmpd, id + ".txt"), "w") as idf: idf.write ("{0}".format (f))
  sys.stdout.write ("Finished importing files.\n")
  return ids

def list_ids (tmpd):
  ids = []
  for idf in os.listdir (tmpd):
    m = re.match ("([0-9]+)[.]txt", idf)
    if m: ids.append (m.group (1))
  return ids

def get_catalog (tmpd):
  catf = os.path.join (tmpd, "catalog.xml")
  subprocess.check_call (["calibredb", "catalog", catf])
  catalog = ET.parse (catf)
  #os.unlink (catf)
  return catalog

def catalog2dict (catalog_root):
  id_file_dict = {}
  for rec in catalog_root.findall ("./record"):
    id_file_dict [rec.find ("./id").text] = rec.find ("./formats/format").text
  return id_file_dict

def create_links (ids, id_file_dict, tmpd):
  for id in ids:
    with open (os.path.join (tmpd, id + ".txt")) as idf: f = idf.read ()
    nf = id_file_dict [id]
    os.unlink (nf); symlink (f, nf)

def get_cli_parser ():
  class AppendWithArg (argparse.Action):
    def __call__ (self, parser, namespace, values, option_string):
      setattr (namespace, self.dest, (getattr (namespace, self.dest) or []) + [option_string] + [values])

  p = argparse.ArgumentParser ()
  p.add_argument ("--dir", help = "folder for original filename database")
  p.add_argument ("--links", action = "store_true",
      help = "replace Calibre database files with links")
  p.add_argument ("--rebuild", action = "store_true",
      help = "re-process all original filenames (needs --dir)")
  p.add_argument ("--from", metavar = "FILELIST", help = "read filenames from a list")
  p.add_argument ("--match", "--tag", action = AppendWithArg, metavar = 'ARG',
      help = "match regexp or assign tag using previous match. Tag ARG format: TAG=NNN to reference match group or TAG='lambda g: ...' for arbitrary expression (g will be a tuple of all match groups, g [0] = entire match)")
  p.add_argument ("file", nargs = "*", help = "files to import into Calibre")
  return p

def parse_args ():
  """Old parser. Deprecated."""
  files = []; rules = []; opts = { "dir": None }
  args = sys.argv [1:]
  while args:
    arg = args [0]; args.pop (0)
    if   arg == "--dir":
      TMPD = args.pop (0)
    elif arg == "--links":
      opts ["links"] = True
    elif arg == "--rebuild":
      opts ["rebuild"] = True
    elif arg == "--from":
      with open (args.pop (0)) as flist:
        files = [l.rstrip ('\n') for l in flist]
    elif arg == "--match" or arg == "--tag":
      rules += [arg, args.pop (0)]
    else:
      args.insert (0, arg); files = args; args = []
  opts ["file"] = files; opts ["match"] = rules
  return opts

#opts = parse_args ();
opts = vars (get_cli_parser ().parse_args (sys.argv [1:]))
files = opts ["file"]; rules = opts ["match"]
files = [os.path.abspath (p) for p in files]

TMPD = opts ["dir"]
if   not TMPD: TMPD = tempfile.mkdtemp ()
elif not os.path.isdir (TMPD): os.makedirs (TMPD)
if opts ["rebuild"]: ids = list_ids (TMPD)
else: ids = import_files (files, TMPD, rules)

id_file_dict = catalog2dict (get_catalog (TMPD).getroot ())
if opts ["links"]: create_links (ids, id_file_dict, TMPD)
sys.stdout.write ("Finished. ID <-> filename map in {0}\n".format (TMPD))

