#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2021, Fabien Carrion <fabien@carrion.mx>'
__docformat__ = 'restructuredtext en'

import errno, os, subprocess, shutil
from contextlib import closing
from zipfile import ZipFile

import numpy as np
from pyzbar import pyzbar
import imutils
import cv2

# calibre Python 3 compatibility.
import six
from six import text_type as unicode

from calibre import prints, CurrentDir
from calibre.constants import iswindows, isbsd, filesystem_encoding
from calibre.customize import numeric_version
from calibre.ebooks import ConversionError, DRMError
from calibre.ptempfile import TemporaryDirectory, PersistentTemporaryFile
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
from calibre.utils.logging import GUILog

from calibre_plugins.extract_isbn.scan import BookScanner

FRONT_PAGES = 10
BACK_PAGES = 5


def get_isbn_from_cbz(log, cbz_path):
    '''
    On a forked job unzip the file and execute tesseract on the first
    five pages and the last 5 pages.
    '''
    with TemporaryDirectory('_isbn_cbz') as output_dir:
        cbz_copy = os.path.join(output_dir, u'src.cbz')
        with open(cbz_path, 'rb') as src, open(cbz_copy, 'wb') as dest:
            shutil.copyfileobj(src, dest)

        try:
            # We want to run the scanning of the CBZ on a fork_job, however
            # that will only be "fixed" in calibre 0.8.55 to allow calling
            # a calibre plugin from such a job. In the meantime, do it the
            # risky way of calling from in-process.
            if numeric_version < (0, 8, 55):
                log.error('Warning: CBZ analysis may crash, upgrade to calibre 0.8.55 when possible')
                return get_isbn(output_dir, 'src.cbz', log)

            res = fork_job('calibre_plugins.extract_isbn.cbz', 'get_isbn',
                    (output_dir, 'src.cbz'))
        except WorkerError as e:
            prints(e.orig_tb)
            raise RuntimeError('Failed to extract cbz and run tesseract')
        finally:
            try:
                os.remove(cbz_copy)
            except:
                pass
    info = res['result']
    with open(res['stdout_stderr'], 'rb') as f:
        raw = f.read().strip()
        if raw:
            log(raw)
    return info


def get_isbn(output_dir, cbz_name, log=None):
    is_running_on_fork = False
    if log is None:
        log = GUILog()
        is_running_on_fork = True
    try:
        total_pages = get_page_count(log, output_dir, cbz_name)
        if total_pages is not None:
            scanner = BookScanner(log)

            text = call_opencv(log, output_dir, cbz_name, total_pages-2, total_pages)
            if text is not None:
                return text

            if total_pages <= FRONT_PAGES + BACK_PAGES:
                # No point in doing all the complexity of ranges
                text = call_opencv(log, output_dir, cbz_name)
                if text is not None:
                    return text
                text = call_tesseract(log, output_dir, cbz_name)
                scanner.look_for_identifiers_in_text([text])

            else:
                text = call_opencv(log, output_dir, cbz_name, 1, FRONT_PAGES)
                if text is not None:
                    return text
                text = call_tesseract(log, output_dir, cbz_name, 1, FRONT_PAGES)
                scanner.look_for_identifiers_in_text([text])
                if not scanner.has_identifier():
                    text = call_opencv(log, output_dir, cbz_name, total_pages-BACK_PAGES, total_pages)
                    if text is not None:
                        return text
                    text = call_tesseract(log, output_dir, cbz_name, total_pages-BACK_PAGES, total_pages)
                    scanner.look_for_identifiers_in_text([text])
        return scanner.get_isbn_result()
    finally:
        if is_running_on_fork:
            # We need to print our log out so the parent process can re-log it.
            print(log.html)


def get_page_count(log, output_dir, cbz_name):
    '''
    Count number of jpg file in a cbz file named src.cbz in output_dir.
    Note that this function changes the cwd to output_dir and is therefore not
    thread safe. Run it using fork_job. This is necessary as there is no safe
    way to pass unicode paths via command line arguments. This also ensures
    that if poppler crashes, no stale file handles are left for the original
    file, only for src.cbz.
    '''

    os.chdir(output_dir)

    with CurrentDir(output_dir):
        try:
            with closing(ZipFile(cbz_name, 'r')) as zipfile:
                count = len(zipfile.infolist())
        except subprocess.CalledProcessError as e:
            log.error('unzip errored out with return code: %d'%e.returncode)
            return None

    return count


def call_tesseract(log, output_dir, cbz_name, first=None, last=None):
    '''
    Convert the jpg into txt using the tesseract app.
    This will write the txt as index.txt into output_dir.
    '''
    from calibre.ebooks.pdf.pdftohtml import popen

    cbzsrc = os.path.join(output_dir, cbz_name)
    index_file = os.path.join(output_dir, u'index.txt')

    files = os.listdir(output_dir)
    for f in files:
        if f.endswith(".txt"):
            os.remove(os.path.join(output_dir, f))

    with CurrentDir(output_dir):
        # subprocess in python 2 cannot handle unicode arguments on windows
        # that cannot be encoded with mbcs. Ensure all args are bytestrings.
        def a(x):
            return os.path.basename(x).encode('ascii')

        logf = PersistentTemporaryFile(u'tesseract_log')

        with ZipFile(cbzsrc, 'r') as zipfile:
            for index, zipinfo in enumerate(zipfile.infolist()):
                if first is not None and last is not None and index > last:
                    break
                if first is not None and last is not None and index < first:
                    continue

                zipfile.extract(zipinfo, output_dir)
                cmd = [b'tesseract', zipinfo.filename, zipinfo.filename]

                try:
                    p = popen(cmd, stderr=logf._fd, stdout=logf._fd,
                              stdin=subprocess.PIPE)
                except OSError as err:
                    if err.errno == errno.ENOENT:
                        raise ConversionError(
                            _('Could not find tesseract, check it is in your PATH'))
                    else:
                        raise

                while True:
                    try:
                        ret = p.wait()
                        break
                    except OSError as e:
                        if e.errno == errno.EINTR:
                            continue
                        else:
                            raise
        logf.flush()
        logf.close()
        out = open(logf.name, 'rb').read().strip()
        if ret != 0 and not "Image file  cannot be read" in str(out):
            raise ConversionError(out)
        if out:
            log('tesseract log:')
            log(out)

        text = ""
        files = os.listdir(output_dir)
        for f in files:
            if f.endswith(".txt"):
                with open(f, 'rb') as f:
                    text += str(clean_ascii_chars(f.read()))
                    print(text)
        return text



def call_opencv(log, output_dir, cbz_name, first=None, last=None):
    '''
    Convert the jpg into txt using the tesseract app.
    This will write the txt as index.txt into output_dir.
    '''
    from calibre.ebooks.pdf.pdftohtml import popen

    cbzsrc = os.path.join(output_dir, cbz_name)
    index_file = os.path.join(output_dir, u'index.txt')

    files = os.listdir(output_dir)
    for f in files:
        if f.endswith(".txt"):
            os.remove(os.path.join(output_dir, f))

    with CurrentDir(output_dir):
        # subprocess in python 2 cannot handle unicode arguments on windows
        # that cannot be encoded with mbcs. Ensure all args are bytestrings.
        def a(x):
            return os.path.basename(x).encode('ascii')

        with ZipFile(cbzsrc, 'r') as zipfile:
            for index, zipinfo in enumerate(zipfile.infolist()):
                if first is not None and last is not None and index > last:
                    break
                if first is not None and last is not None and index < first:
                    continue

                zipfile.extract(zipinfo, output_dir)

                log('opencv log: ' + zipinfo.filename)
                try:
                    # load the image and convert it to grayscale
                    image = cv2.imread(zipinfo.filename)
                    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                    # compute the Scharr gradient magnitude representation of the images
                    # in both the x and y direction using OpenCV 2.4
                    ddepth = cv2.cv.CV_32F if imutils.is_cv2() else cv2.CV_32F
                    gradX = cv2.Sobel(gray, ddepth=ddepth, dx=1, dy=0, ksize=-1)
                    gradY = cv2.Sobel(gray, ddepth=ddepth, dx=0, dy=1, ksize=-1)
                    # subtract the y-gradient from the x-gradient
                    gradient = cv2.subtract(gradX, gradY)
                    gradient = cv2.convertScaleAbs(gradient)

                    # blur and threshold the image
                    blurred = cv2.blur(gradient, (9, 9))
                    (_, thresh) = cv2.threshold(blurred, 225, 255, cv2.THRESH_BINARY)

                    # construct a closing kernel and apply it to the thresholded image
                    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (21, 7))
                    closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)

                    # perform a series of erosions and dilations
                    closed = cv2.erode(closed, None, iterations = 4)
                    closed = cv2.dilate(closed, None, iterations = 4)

                    # find the contours in the thresholded image, then sort the contours
                    # by their area, keeping only the largest one
                    cnts = cv2.findContours(closed.copy(), cv2.RETR_EXTERNAL,
	                                    cv2.CHAIN_APPROX_SIMPLE)
                    cnts = imutils.grab_contours(cnts)
                    c = sorted(cnts, key = cv2.contourArea, reverse = True)[0]
                    # compute the rotated bounding box of the largest contour
                    rect = cv2.minAreaRect(c)
                    box = cv2.cv.BoxPoints(rect) if imutils.is_cv2() else cv2.boxPoints(rect)
                    box = np.int0(box)

                    x, y, w, h = cv2.boundingRect(box)
                    # crop
                    barcode_img = image[y:y + h, x:x + w]

                    barcodes = pyzbar.decode(barcode_img)

                    for barcode in barcodes:
                        return barcode.data.decode("utf-8")
                except:
                    return None
