#!/bin/bash
# read the file name from the commandline
file="$1"
# set the file name of the outgoing file as FILENAME-stretched.pdf
outfile="${file%.pdf}-stretched.pdf"
# create temporary files with crop information, and pre-stetched cropped pdf
bounding="tmp-bounding.txt"
skewedfile="tmp-skewed.pdf"
# see if the necessary files exist
echo -n "Looking for ebook-meta ..."
if which "ebook-meta" ; then
    emexec=$(which "ebook-meta")
else
    if locate "ebook-meta" ; then
        emexec=$(locate "ebook-meta" | head -n1)
    else
       echo ""
       echo "Could not find ebook-meta calibre subprogram. Skipping metadata steps."
       emexec="notfound"
    fi
fi
echo ""
echo -n "Looking for pdfmanipulate..."
if which "pdfmanipulate" ; then
    pdmexec=$(which "pdfmanipulate")
else
    if locate "pdfmanipulate" ; then
        pdmexec=$(locate "pdfmanipulate" | head -n1)
    else
       echo ""
       echo "Error, Could not find calibre's pdfmanipulate subprogram."
       echo "The script cannot work without it."
       exit 3 
    fi
fi
echo ""
echo -n "Looking for ghostscript..."
if which "gs" ; then
    gsexec=$(which "gs")
else
    if locate "gs" ; then
        gsexec=$(locate "gs" | head -n1)
    else
       echo ""
       echo "Error, Could not find ghostscript."
       echo "The script cannot work without it."
       exit 3 
    fi
fi

# see if the file exists
if [ -f "$file" ] ; then
    if [ "$emexec" != "notfound" ] ; then
        echo ""
        echo "Reading metadata..."
        author="$("${emexec}" "${file}" | grep 'Author' | sed 's/[^:]*: //')"
        title="$("${emexec}" "${file}" | grep 'Title' | sed 's/[^:]*: //')"
        language="$(ebook-meta "${file}" | grep 'Language' | sed 's/[^:]*: //')"
        echo "Author(s) recognized as ${author}."
        echo "Title recognized as ${title}."
        echo "Language recognized as ${language}."
    fi
    echo ""
    echo "Analyzing page geometry..."
    echo ""
    "${gsexec}" -dSAFER -dNOPAUSE -dBATCH -sDEVICE=bbox "$file" 2> "${bounding}"
    sed /HiRes/d "${bounding}" > "tmp-newbounding.txt"
    sed "s/.*Box: //" "tmp-newbounding.txt" > "tmp-newnewbounding.txt"
    mv "tmp-newnewbounding.txt" "${bounding}"
    rm "tmp-newbounding.txt"
    maxh=1
    maxv=1
    for pagenum in $(seq 1 $(cat ${bounding} | wc -l)) ; do
       toph=$(sed -n "$pagenum s/\([0-9]*\) .*/\1/p" "${bounding}") 
       topv=$(sed -n "$pagenum s/[0-9]* \([0-9]*\) .*/\1/p" "${bounding}") 
       both=$(sed -n "$pagenum s/[0-9]* [0-9]* \([0-9]*\) .*/\1/p" "${bounding}") 
       botv=$(sed -n "$pagenum s/[0-9]* [0-9]* [0-9]* \([0-9]*\).*/\1/p" "${bounding}") 
       spreadh=$((both - toph))
       spreadv=$((botv - topv))
       if [ "$spreadh" -gt "$maxh" ] ; then
          maxh=${spreadh}
       fi
       if [ "$spreadv" -gt "$maxv" ] ; then
          maxv=${spreadv}
       fi
    done
    scaleamt=$(( $((maxh * 400)) / $((maxv * 3)) ))
    scaleamt="0$(echo "$scaleamt" | sed "s/\(.*\)\(..\)/\1\.\2/")"
    echo ""
    echo "Now scaling pages..."
    echo ""
    "${gsexec}" -sOutputFile="${skewedfile}" -dBATCH -dNOPAUSE -sDEVICE=pdfwrite -r300 -g6000x6000 -c "<</BeginPage{1 ${scaleamt} scale}>> setpagedevice" -f "${file}"
    echo ""
    echo "Now determining the crop region..."
    echo ""
    "${gsexec}" -dSAFER -dNOPAUSE -dBATCH -sDEVICE=bbox "${skewedfile}" 2> "${bounding}"
    echo ""
    echo -n "Now cropping the pages..."
    "${pdmexec}" crop -o "${outfile}" -b "${bounding}" "${skewedfile}"
    echo " done."
    rm "$skewedfile"
    rm "$bounding"
    if [ "$emexec" != "notfound" ] ; then
        echo ""
        echo "Reinstating metadata..."
        echo ""
        "${emexec}" "${outfile}" --authors "$author" --title "${title}" --language "${language}"
        echo ""
    fi
    echo "Finished. Output saved as $outfile."
else
    # what happens if the file cannot be found
    echo 'The file was not found.'
    exit 1
fi
exit 0
