#!/bin/sh

# html2epub - Converts HTML or plain text files to EPUB format - public domain

VERSION="1.20"

EPUB=""
TITLE=""
AUTHOR=""
COVER=""
EPUBLANG="en"
SUBJECT=""
CREATOR=""
PUBLISHER=""
UUID=""
URI=""
BOOKID=""
INTERACTIVE=""
USAGE=""
TITLEPAGE="1"

SRCLST="/tmp/html2epub.manifest.$$"
rm -f "${SRCLST}"
touch "${SRCLST}"

# overridden below if mktemp is available
WRKDIR="/tmp/html2epub.$(date +%Y%m%d%H%M%S).$$.d"

# parse arguments
while [ $# -ne 0 ] ; do
    case $1 in
    -t)         TITLE="$2"                      ; shift ;;
    -a)         AUTHOR="$2"                     ; shift ;;
    -c)         COVER="$2"                      ; shift ;;
    -l)         EPUBLANG="$2"                   ; shift ;;
    -s)         SUBJECT="$2"                    ; shift ;;
    -w)         CREATOR="$2"                    ; shift ;;
    -p)         PUBLISHER="$2"                  ; shift ;;
    -u)         UUID="$2"                       ; shift ;;
    -U)         URI="$2"                        ; shift ;;
    -I)         INTERACTIVE="1"                         ;;
    -ntp)       TITLEPAGE=""                            ;;
    *.epub)     EPUB="$1"                               ;;
    *)          echo "$1" >> "${SRCLST}"                ;;
    esac

    shift
done

[ -z "$EPUB" ] && USAGE="1"
#[ -z "$SRC" ] && USAGE="1"

if [ -n "$USAGE" ] ; then
    echo "html2epub $VERSION - Converts HTML or txt files to EPUB - public domain"
    echo
    echo "Usage: html2epub {epub file} [opts] {.html or .txt file(s)} [{other file(s)}]"
    echo
    echo "Options:"
    echo "-t \"title\"            Book title"
    echo "-a \"author\"           Book author"
    echo "-s \"subject\"          Book subject"
    echo "-w \"creator\"          Book creator"
    echo "-p \"publisher\"        Book publisher"
    echo "-u \"UUID\"             Book UUID"
    echo "-U \"URI\"              Book URI"
    echo "-c cover_image.png    Cover image (PNG format)"
    echo "-l lang               Language (en, es, fr...)"
    echo "-I                    Interactive; opens a shell for manual changes"
    echo "-ntp                  No Title Page (avoid genering one)"
    echo
    echo "Files:"
    echo ".html                 Filtered through 'tidy' or 'tidyp' if available"
    echo ".txt                  Converted to basic xHTML between <pre> tags"
    echo ".jpg, .png, ...       Copied as is"

    rm -f "${SRCLST}"

    exit 1
fi

# why wait? fail ASAP if no zip
if ! command -v zip > /dev/null ; then
    echo "html2epub: you don't have the 'zip' utility installed -- fix that"
    exit 4
fi

TOXHTML=""

command -v tidy > /dev/null && TOXHTML="tidy -q -asxhtml -utf8"
command -v tidyp > /dev/null && TOXHTML="tidyp -q -asxhtml -utf8"

if [ -z "${TOXHTML}" ] ; then
    echo "html2epub: recommendation: install 'tidy' or 'tidyp'"
    TOXHTML="cat"
fi

if [ ! -z "${COVER}" ] && [ ! -f "${COVER}" ] ; then
    echo "html2epub: cannot find cover file '${COVER}' -- aborting"
    rm -f "${SRCLST}"
    exit 2
fi

while read F ; do
    if [ ! -f "${F}" ] ; then
        echo "html2epub: cannot find '${F}' -- aborting"
        rm -f "${SRCLST}"
        exit 2
    fi
done < "${SRCLST}"

if command -v mktemp > /dev/null ; then
    WRKDIR=$(mktemp -d)
else
    mkdir "${WRKDIR}"
fi

if [ ! -d "$WRKDIR" ] ; then
    echo "html2epub: cannot create working directory -- aborting"
    rm -f "${SRCLST}"
    exit 3
fi

[ -z "$TITLE" ] && TITLE="${EPUB}"

echo "Gathering data..."

ID=1
while read F ; do
    O=$(printf "${WRKDIR}/part-%04d.html" "$ID")

    case $F in
    *.txt)
        TXTTITLE=$(basename "$F")
        W="${WRKDIR}/wrk.html"

        {
            echo "<!DOCTYPE html>"
            echo '<html xmlns="http://www.w3.org/1999/xhtml">'
            echo "<head><title>${TXTTITLE}</title></head><body><p>"
            sed -e 's/</\&lt;/g' -e 's/>/\&gt;/g' -e 's/^$/<\/p><p>/' "$F"
            echo "</p></body></html>"
        } > "${W}"

        ${TOXHTML} < "${W}" > "${O}"
        rm -f "${W}"
        ;;
    *.html)
        ${TOXHTML} < "${F}" > "${O}"
        ;;
    *)
        # copy as is
        cp "${F}" "${WRKDIR}"
    esac

    ID=$((ID + 1))
done < "${SRCLST}"

if [ -z "$URI" ] ; then
    if [ -z "$UUID" ] ; then
        # if any uuidgen utility is installed, generate an UUID
        if command -v uuidgen > /dev/null ; then
            UUID="$(uuidgen)"
        elif command -v uuid > /dev/null ; then
            UUID="$(uuid)"
        else
            # no uuidgen? use this UUID pulled from my ass
            UUID="c5be00f9-932e-4903-8cd9-b4b5939411a6"
        fi
    fi

    BOOKID="$UUID"
else
    BOOKID="$URI"
    UUID=""
fi

[ -n "${COVER}" ] && cp "${COVER}" "${WRKDIR}"

if [ -n "$TITLEPAGE" ] ; then
    O="${WRKDIR}/titlepage.xhtml"
    {
        echo "<?xml version='1.0' encoding='utf-8'?>"
        echo "<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='${EPUBLANG}'>"
        echo "<head><title>${TITLE} - ${AUTHOR}</title></head><body>"

        if [ -n "${COVER}" ] ; then
            echo "<img src='${COVER}'/>"
        else
            echo "<h1>$TITLE</h1><h2>$AUTHOR</h2>"
        fi

        echo "</body></html>"
    } > "${O}"
fi


mkdir -p "${WRKDIR}/META-INF"
O="${WRKDIR}/META-INF/container.xml"
{
    echo '<?xml version="1.0"?>'
    echo '<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
    echo '  <rootfiles>'
    echo '    <rootfile full-path="content.opf" media-type="application/oebps-package+xml"/>'
    echo '  </rootfiles>'
    echo '</container>'
} > "${O}"


O="${WRKDIR}/mimetype"
printf "application/epub+zip" > "$O"


O="${WRKDIR}/content.opf"
{
    echo '<?xml version="1.0" encoding="utf-8"?>'
    echo '<package xmlns="http://www.idpf.org/2007/opf" version="2.0" unique-identifier="BookID">'
    echo '  <metadata xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:opf="http://www.idpf.org/2007/opf" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:calibre="http://calibre.kovidgoyal.net/2009/metadata" xmlns:dc="http://purl.org/dc/elements/1.1/">'
    echo "    <dc:language>${EPUBLANG}</dc:language>"
    echo "    <dc:title>${TITLE}</dc:title>"
    echo "    <dc:creator opf:role='aut'>${AUTHOR}</dc:creator>"

    [ -n "$UUID" ] && echo "    <dc:identifier id='BookID' opf:scheme='UUID'>urn:uuid:${UUID}</dc:identifier>"
    [ -n "$URI" ]  && echo "    <dc:identifier id='BookID' opf:scheme='URI'>uri:${URI}</dc:identifier>"

    echo "    <dc:date>$(date +%Y-%m-%d)</dc:date>"

    [ -n "$SUBJECT" ]   && echo "    <dc:subject>${SUBJECT}</dc:subject>"
    [ -n "$CREATOR" ]   && echo "    <dc:creator>${CREATOR}</dc:creator>"
    [ -n "$PUBLISHER" ] && echo "    <dc:publisher>${PUBLISHER}</dc:publisher>"

    echo '  </metadata>'
    echo '  <manifest>'

    [ -n "$COVER" ] &&  echo "    <item href='${COVER}' id='cover' media-type='image/png'/>"

    [ -n "$TITLEPAGE" ] && echo "    <item href='titlepage.xhtml' id='titlepage.xhtml' media-type='application/xhtml+xml'/>"

    for f in "${WRKDIR}"/*.html ; do
        f=$(basename "$f")
        echo "    <item href='${f}' id='${f}' media-type='application/xhtml+xml'/>"
    done

    echo '    <item href="toc.ncx" media-type="application/x-dtbncx+xml" id="ncx"/>'
    echo '  </manifest>'
    echo '  <spine toc="ncx">'

    [ -n "$TITLEPAGE" ] && echo "    <itemref idref='titlepage.xhtml'/>"

    for f in "${WRKDIR}"/*.html ; do
        f=$(basename "$f")
        echo "    <itemref idref='${f}'/>"
    done

    echo '  </spine>'
    echo '  <guide>'

    [ -n "$TITLEPAGE" ] && echo '    <reference href="titlepage.xhtml" type="cover" title="Cover"/>'

    echo '  </guide>'
    echo '</package>'
} > "${O}"


O="${WRKDIR}/toc.ncx"
{
    echo '<?xml version="1.0" encoding="utf-8"?>'
    echo "<ncx xmlns='http://www.daisy.org/z3986/2005/ncx/' version='2005-1' xml:lang='${EPUBLANG}'>"
    echo '  <head>'
    echo "    <meta content='${BOOKID}' name='dtb:uid'/>"
    echo '    <meta content="2" name="dtb:depth"/>'
    echo '    <meta content="html2epub" name="dtb:generator"/>'
    echo '    <meta content="0" name="dtb:totalPageCount"/>'
    echo '    <meta content="0" name="dtb:maxPageNumber"/>'
    echo '  </head>'
    echo '  <docTitle>'
    echo "    <text>$TITLE - $AUTHOR</text>"
    echo '  </docTitle>'
    echo '  <navMap>'

    ID=1

    for f in ${WRKDIR}/*.html ; do
        SRCTITLE=$(paste -sd ' ' "$f" | \
            sed -e 's!^.*<[tT][iI][tT][lL][eE]>!!' \
                -e 's!</[tT][iI][tT][lL][eE]>.*$!!')
        [ -z "$SRCTITLE" ] && SRCTITLE="${ID}"

        f=$(basename "$f")
        echo "    <navPoint id='${f}' playOrder='${ID}'>"
        echo "      <navLabel><text>${SRCTITLE}</text></navLabel>"
        echo "      <content src='${f}'/>"
        echo "    </navPoint>"

        ID=$((ID + 1))
    done

    echo '  </navMap>'
    echo '</ncx>'
} > "${O}"

WRKFILE="/tmp/html2epub.$$.zip"
rm -f "${WRKFILE}"

# open in a subshell for the cwd
(
    cd "${WRKDIR}" || exit

    if [ -n "$INTERACTIVE" ] ; then
        echo
        echo "html2epub interactive mode"
        echo "--------------------------"
        echo
        echo "This is a subshell inside the folder that contains the files"
        echo "that will be part of the epub. Modify files or copy new ones"
        echo "as suited and exit the shell (Ctrl-D) to create the epub."
        $SHELL
    fi

    echo "Building .epub file..."

    zip "${WRKFILE}" -X -Z store mimetype
    zip "${WRKFILE}" -X -r * -x mimetype

    echo "Finished."
)

mv "${WRKFILE}" "${EPUB}"
rm -rf "${WRKDIR}"
rm -f "${SRCLST}"

exit 0
