#!/usr/bin/env python
'''A script to prepare the pdf file for CyBook with brute force.

usage:  prepare_pdf file.pdf

then answer the questions.

The images will be stored in the dir named fiel_img in the same folder as the file.
One can compress all the images to a pdf file afterwards. This pdf could be read by Cybook.

This is a very ugly script, feel free to change anything if you like.
'''

__author__='Luyang Han'
__date__ = "$Date: 2008/07/23 $"
__version__ = "$Revision: 0.3 $"

import subprocess
import os.path
import glob
import sys

tmpfile = 'tmpfile'
pts_unit = 72
ORI_PORTRAIT = 0
ORI_LANDSCAPE = 1
ORI_TWOCOLUMNS_PORTRAIT = 2
ORI_TWOCOLUMNS_LANDSCAPE = 3
ZOOM_WIDTH = 0
ZOOM_HEIGHT = 1
# The resolution of CyBook Gen3
SCEEN_WIDTH = 600
SCEEN_HEIGHT = 800
# Image file prefix
IMG_PRE='img'
# temp file prefix
TMP_PRE='tmp'
# Which view to call for preview?
VIEWER = 'kview'
#VIEWER = 'display'

# a simple interface for CLI
def prompt(msgWelcome = '', msgError = '', default = None, trans = None, check = None):
    print msgWelcome
    if default != None:
        print "default value is " + str(default)
    input = None
    while input == None:
        value = sys.stdin.readline().strip()
        if default <> None and value == '':
            value = default
        if trans <> None:
            try:
                tmp = trans(value)
            except:
                print msgError
                tmp = None
        else:
            tmp = value

        if check <> None:
            try:
                if check(tmp):
                    input = tmp
                else:
                    print msgError
            except:
                print msgError
        else:
            input = tmp
    return input

def y_nconv(y_or_n):
    if y_or_n == 'y' or y_or_n == 'Y':
        return True
    elif y_or_n == 'n' or y_or_n == 'N':
        return False
    else:
        return None

def y_ncheck(y_or_n):
    if type(y_or_n) is type(True):
        return True
    else:
        return False

###########################################

def pdfinfo(filename):
    """Read pdf file info using pdfinfo"""
    p = subprocess.Popen('pdfinfo -box ' + filename, stdout = subprocess.PIPE, stderr = subprocess.PIPE, shell = True)
    result = p.stdout.readlines()
    err = p.stderr.readlines()
    if err:
        info = ''.join(err)
        raise IOError(info)
    out = {}
    for line in result:
        key, value = line.split(':', 1)
        key = key.strip()
        value = value.strip()
        out[key] = value
    return out

def imginfo(filename):
    """read image file with imagemagick identify"""
    p = subprocess.Popen('identify ' + filename, stdout = subprocess.PIPE, stderr = subprocess.PIPE, shell = True)
    result = p.stdout.readline()
    err = p.stderr.readlines()
    if err:
        info = ''.join(err)
        raise IOError(info)
    lines = result.strip().split()
    out = {}
    out['filename'] = lines[0]
    out['type'] = lines[1]
    out['size'] = lines[2]
    out['carvans'] = lines[3]
    out['colorspace'] = lines[4]
    out['bit'] = lines[5]
    out['filesize'] = lines[6]
    return out

def pdfinfo_onepage(filename, pageno):
    """Return the geometry of corresponding page, displayed as a string aaa x bbb."""
    p = subprocess.Popen('pdfinfo -box -f %d -l %d %s' %(pageno,pageno,filename), stdout = subprocess.PIPE, stderr = subprocess.PIPE, shell = True)
    result = p.stdout.readlines()
    err = p.stderr.readlines()
    if err:
        info = ''.join(err)
        raise IOError(info)
    out = {}
    for line in result:
        key, value = line.split(':', 1)
        key = key.strip()
        value = value.strip()
        out[key] = value
    for key in out.keys():
            if key.find('Page')>=0 and key.find('size')>=0:
                page_geometry = out[key]
    out['geometry'] = page_geometry
    return out

def pdf_convert(filename, prefix, pageno, dpi, x=0, y=0, w=0, h=0):
    """convert a certain page of pdf file to a tmp image file in the pwd for further 
    usage. Every time when the function is called again, the previous file is 
    overwritten. The function does not clean the tmp file eagerly.

    The converted filename is returned as result
    """
    x = int(dpi * x /pts_unit)
    y = int(dpi * y /pts_unit)
    w = int(dpi * w /pts_unit)
    h = int(dpi * h /pts_unit)
    p = subprocess.Popen('pdftoppm -gray -f %d -l %d -r %.3f -x %d -y %d -H %d -W %d %s %s' \
                         %(pageno, pageno, dpi, x, y, h, w, filename, prefix), \
                         stdout = subprocess.PIPE, stderr = subprocess.PIPE, shell = True)
    result = p.stdout.readlines()
    err = p.stderr.readlines()
    if err:
        info = ''.join(err)
        raise IOError(info)
    outfiles = glob.glob(prefix+'*.pgm')
    for file in outfiles:
        if file.find(str(pageno)) > 0:
            return file
    else:
        raise IOError('cannot create file')

def display_onepage(filename):
    """The function used to display a certain page in order to obtain the cropping 
    factor."""
    p = subprocess.Popen(VIEWER + ' ' + filename, stdout = subprocess.PIPE, stderr = subprocess.PIPE, shell = True)
    # The display does not need to block the present thread.
    # p.wait()
    #if err:
        #info = ''.join(err)
        #raise IOError(info)

def im_convert(inputfile, outputfile, opts=''):
    p = subprocess.Popen('convert %s %s %s' %(inputfile, opts, outputfile),\
                         stdout = subprocess.PIPE, stderr = subprocess.PIPE, shell = True)
    # The process does not block the main thread. The thread is returned directly so the main thread can
    # determine whether to wait the process to finish.
    return p
    #if err:
        #info = ''.join(err)
        #raise IOError(info)

def pdflib(inputimages, outputfile, opts=''):
    p = subprocess.Popen('pdfimage -o %s %s %s' %(outputfile, opts, inputimages),\
                         stdout = None, stderr = None, shell = True)
    return p.wait()
    #print result
    #print err
    #if err:
    #    info = ''.join(err)
    #    raise IOError(info)

# Above are the functions which wrap around those command line tools.
def average_section(length, section):
    """Make an averaged sections from a total lentgh
    It returns a list of turples, which contains the starting and ending value of each 
    section.
    """
    # length = int(length)
    # section = int(section)
    n = int(length/section) + 1
    result = []
    if n == 1:
        result.append((0,length))
    else:
        for i in range(n):
            start = (length-section)*1.0/(n-1)*i
            end = start + section
            result.append((start, end))
    return result

class MethodError(Exception): pass

class PDFPageMethod(object):
    def __str__(self):
        if self.orientation == ORI_PORTRAIT:
            s1 = 'Portrait'
        elif self.orientation == ORI_LANDSCAPE:
            s1 = 'Landscape'
        if self.zoom == ZOOM_HEIGHT:
            s2 = 'fit to height'
        elif self.zoom == ZOOM_WIDTH:
            s2 = 'fit to width'
        if self.crop:
            s3 = 'Crop to %s' % str(self.crop)
        else:
            s3 = 'no crop'
        s4 = '%i images per page' %len(self.sections)
        s = 'Methods: for geometry %s, orientation: %s, fit: %s, %s, %s' % (self.geometry, s1, s2, s3, s4)
        return s
    def __init__(self, geometry, orientation = ORI_LANDSCAPE, zoom = ZOOM_WIDTH,crop=[]):
        self.orientation = orientation
        self.zoom = zoom
        self.geometry = geometry
        f = geometry.split()
        self.width = float(f[0])
        self.height = float(f[2])
        self.crop = crop
        if crop:
            self.width = crop[2]
            self.height = crop[3]
        else:
            self.crop = [0,0,0,0]
        # determine the dpi
        if self.orientation == ORI_PORTRAIT:
            if self.zoom == ZOOM_WIDTH:
                self.dpi = SCEEN_WIDTH / (self.width/72.)
            if self.zoom == ZOOM_HEIGHT:
                self.dpi = SCEEN_HEIGHT / (self.height/72.)
        if self.orientation == ORI_LANDSCAPE:
            if self.zoom == ZOOM_WIDTH:
                self.dpi = SCEEN_HEIGHT / (self.width/72.)
            if self.zoom == ZOOM_HEIGHT:
                self.dpi = SCEEN_WIDTH / (self.height/72.)

        image_w = int(self.width/72.*self.dpi)
        image_h = int(self.height/72.*self.dpi)
        if self.orientation == ORI_PORTRAIT:
            if self.zoom == ZOOM_WIDTH:
                self.sections = average_section(image_h, SCEEN_HEIGHT)
            if self.zoom == ZOOM_HEIGHT:
                self.sections = average_section(image_w, SCEEN_WIDTH)
        if self.orientation == ORI_LANDSCAPE:
            if self.zoom == ZOOM_WIDTH:
                self.sections = average_section(image_h, SCEEN_WIDTH)
            if self.zoom == ZOOM_HEIGHT:
                self.sections = average_section(image_w, SCEEN_WIDTH)

    def convert(self, filename, pageno, prefix, imgno):
        #info = pdfinfo_onepage(filename,pageno)
        #for key in info.keys():
            #if key.find('Page')>=0 and key.find('size')>=0:
                #page_geometry = info[key]
        #if page_geometry != self.geometry:
            #raise MethodError('The geometry of the page does not fit to the method')
        
        x,y,w,h = self.crop
        tmpfile = pdf_convert(filename, '%s/%s' %(prefix, TMP_PRE), pageno, self.dpi,x,y,w,h)
        process_pool = []
        
        if self.orientation == ORI_PORTRAIT:
            if self.zoom == ZOOM_WIDTH:
                for i, section in enumerate(self.sections):
                    p = im_convert(tmpfile, '%s/%s%s.%d.png' %(prefix, IMG_PRE, imgno, i), \
                               '-crop %dx%d+%d+%d  -colorspace Gray -depth 8 -quality 90' %(SCEEN_WIDTH, SCEEN_HEIGHT,0,section[0]))
                    process_pool.append(p)
            if self.zoom == ZOOM_HEIGHT:
                for i, section in enumerate(self.sections):
                    p = im_convert(tmpfile, '%s/%s%s.%d.png' %(prefix, IMG_PRE, imgno, i), \
                               '-crop %dx%d+%d+%d  -colorspace Gray -depth 8 -quality 90' %(SCEEN_WIDTH, SCEEN_HEIGHT,section[0],0))
                    process_pool.append(p)
        if self.orientation == ORI_LANDSCAPE:
            if self.zoom == ZOOM_WIDTH:
                for i, section in enumerate(self.sections):
                    p = im_convert(tmpfile, '%s/%s%s.%d.png' %(prefix, IMG_PRE, imgno, i), \
                               '-crop %dx%d+%d+%d  -colorspace Gray -depth 8 -quality 90 -rotate "-90"' %(SCEEN_HEIGHT, SCEEN_WIDTH,0,section[0]))
                    process_pool.append(p)
            if self.zoom == ZOOM_HEIGHT:
                for i, section in enumerate(self.sections):
                    p = im_convert(tmpfile, '%s/%s%s.%d.png' %(prefix, IMG_PRE, imgno, i), \
                               '-crop %dx%d+%d+%d  -colorspace Gray -depth 8 -quality 90 -rotate "-90"' %(SCEEN_HEIGHT, SCEEN_WIDTH,section[0],0))
                    process_pool.append(p)
        for p in process_pool:
            p.wait()
        os.remove(tmpfile)

if __name__=="__main__":
    filename = sys.argv[1]
    if not os.path.exists(filename):
        raise 'No such file'
    fileinfo = pdfinfo(filename)
    totalpage = int(fileinfo['Pages'])
    format_str = """%0""" + str(len(str(totalpage))+1) + 'd'
    dirname = os.path.splitext(filename)[0]+'_img'
    try:
        os.mkdir(dirname)
    except OSError:
        pass
    methods=[]
    img_count = 0
    startpage = 1
    endpage = totalpage
    
    def display(pageno):
        global filename
        prefix = dirname + TMP_PRE
        tmpfile = pdf_convert(filename, prefix, pageno, 72)
        display_onepage(tmpfile)
        
    def count():
        global img_count
        img_count = img_count + 1
        return format_str % img_count
    
    print 'Input file is %s' % filename
    print 'Total page number: %i' % totalpage
    display(1)
    
    if prompt('Do you want to save the first page as a cover? (y/n)','Pleae give y/n.',default = 'y', trans = y_nconv, check = y_ncheck):
        geo = pdfinfo_onepage(filename, 1)['geometry']
        covermethod = PDFPageMethod(geo,ORI_PORTRAIT,ZOOM_HEIGHT)
        covermethod.convert(filename,1,dirname,count())
        startpage = 2
    
    def trans_pagerange(s):
        return map(int,s.split('-'))
    def check_pagerange(s):
        if s[0] >= startpage and s[1] <= endpage:
            return True
        else:
            return False
    pagerange = prompt('Please give the page range to convert.','The format is wrong, try again', \
                       default = '%i-%i' %(startpage,endpage),trans = trans_pagerange, check = check_pagerange)
    startpage = pagerange[0]
    endpage = pagerange[1]
    
    print 'A image of the first converting page is shown. If you want to crop the page please note down the cropping point on the upper-left conner and lower-right conner.'
    display(startpage)
    geo = pdfinfo_onepage(filename, startpage)['geometry']
    
    def check_orientation(s):
        if s is ORI_LANDSCAPE or s is ORI_LANDSCAPE or s is ORI_TWOCOLUMNS_PORTRAIT or s is ORI_TWOCOLUMNS_LANDSCAPE:
            return True
        else:
            return False
    orientation = prompt('View orientation (Portrait = 0, Landscape = 1, Two columes portrait= 2, Two columes landscape= 3)','give either 0, 1, 2, 3', \
                       default = '1',trans = int, check = check_orientation)
    
    def check_zoom(s):
        if s is ZOOM_HEIGHT or s is ZOOM_WIDTH:
            return True
        else:
            return False
    zoom = prompt('Fit to width = 0, height = 1','give either 0 or 1', \
                       default = '0',trans = int, check = check_zoom)
    
    def trans_crop(s):
        if not s:
            return []
        else:
            x1,y1,x2,y2 = map(int,s.split())
            return [x1, y1, (x2-x1), (y2-y1)]
    def check_crop(s):
        n = len(s)
        if n == 4:
            return True
        else:
            return False
    if orientation is ORI_PORTRAIT or orientation is ORI_LANDSCAPE:
        crop = prompt('Give the x y coorditates for cropping, for example 1 2 16 22, upperleft (1,2),lowerright (16,22). If no cropping, just press enter', 'error, input again', default = None,trans = trans_crop, check = None)
        methods.append(PDFPageMethod(geo, orientation, zoom, crop))
    elif orientation is ORI_TWOCOLUMNS_PORTRAIT:
        crop = prompt('Give the x y coorditates for cropping of the first column, for example 1 2 16 22, upperleft (1,2),lowerright (16,22). You must give the cropping point!', 'error, input again', default = None,trans = trans_crop, check = check_crop)
        methods.append(PDFPageMethod(geo, ORI_PORTRAIT, zoom, crop))
        crop = prompt('Give the x y coorditates for cropping of the second column, for example 1 2 16 22, upperleft (1,2),lowerright (16,22). You must give the cropping point!', 'error, input again', default = None,trans = trans_crop, check = check_crop)
        methods.append(PDFPageMethod(geo, ORI_PORTRAIT, zoom, crop))
    elif orientation is ORI_TWOCOLUMNS_LANDSCAPE:
        crop = prompt('Give the x y coorditates for cropping of the first column, for example 1 2 16 22, upperleft (1,2),lowerright (16,22). You must give the cropping point!', 'error, input again', default = None,trans = trans_crop, check = check_crop)
        methods.append(PDFPageMethod(geo, ORI_LANDSCAPE, zoom, crop))
        crop = prompt('Give the x y coorditates for cropping of the second column, for example 1 2 16 22, upperleft (1,2),lowerright (16,22). You must give the cropping point!', 'error, input again', default = None,trans = trans_crop, check = check_crop)
        methods.append(PDFPageMethod(geo, ORI_LANDSCAPE, zoom, crop))
    #pagemethod = PDFPageMethod(geo, orientation, zoom, crop)
    
    print 'The pages are converted with the following method:'
    for pagemethod in methods:
        print str(pagemethod)
    
    #print str(pagemethod)
    
    for i in range(startpage, endpage + 1):
        for pagemethod in methods:
            try:
                pagemethod.convert(filename,i,dirname,count())
            except MethodError:
                pass
        
    if prompt('Do you want to compress all the images into a single PDF file again? (y/n)','give y/n','y',y_nconv,y_ncheck):
        pdflib(dirname+'/*.png',dirname+'/'+os.path.basename(filename))
        map(os.remove, glob.glob(dirname+'/*.png'))
        
    #os.remove(tmpfile)
