#!/bin/sh
scriptdir=`/usr/bin/dirname "$0"`
cd $scriptdir

## TODO
## provide search phrase!!!
## get filename of matched file
 
# 2008-04-28

## Get the current path from the description field of the manifest in the config directory
STRING=`grep Description ../config/manifest.xml | sed -e 's/ *<Description>\(.*\)<\/Description>/\1/'` 

STARTDIR=$(awk '
BEGIN {FS=";"}
{
  if ($2 == STRING) {
    print $1
  }
}
' "STRING=$STRING" ../config/search.rc)



snippetSearch () {
  ## Parameter is path of container directory
  SEARCHPHRASE="$2"
  MANIFEST="$1/manifest.xml"
  SNIPPETS="$1/snippets"
  ## Get name of PDF file (there should only be one PDF in the container)
  ## This gives the full path to the PDF (which is what we want)
  PDFFILE="$1/*.pdf"
  PDFFILE=$(ls $PDFFILE)
  ## Get title from manifest file
  TITLE=$(sed -ne 's/.*<[Tt]itle>\(.*\)<\/[Tt]itle>.*/\1/p' "$MANIFEST")
  awk 'BEGIN {phrasefound=0}

## A new page of potentially matching data
/^Page/ {
  pageno=$2
  text=""
}

## Lines that match the search phrase
## See page 256 of the awk book
/'"$SEARCHPHRASE"'/ {
  text=$0
  phrasefound=1
  ## Go to the next input line (ignore the rest of the script)
  next
}

## Get the line after the matching line, print the result and reset 
## for the next one.
{
  if (phrasefound==1) {
    ## Combine the two lines of text
    text=text " " "\n" $0
    phrasefound=0
    ## Create the new dir, manifest and shell script to update lastpage here
    randno = int(rand() * 1000)
    RESDIR = "../results/result-"randno"-"NR 
    MANIFEST = RESDIR"/manifest.xml" 
    system("mkdir -p " RESDIR) 
    #print pageno, text

    ## Create the manifest file
    print "<?xml version=\"1.0\" encoding=\"utf-8\"?>" > MANIFEST
    print "<package>" >> MANIFEST
    print "<metadata>" >> MANIFEST
    print "<dc-metadata>" >> MANIFEST
    print "<Title>" title "</Title>" >> MANIFEST
    print "<Description>[" pageno "] ... " text "</Description>" >> MANIFEST
    print "<Date>2007-09-10T17:05:26</Date>" >> MANIFEST
    print "</dc-metadata>" >> MANIFEST
    print "<y-metadata>" >> MANIFEST
    print "<startpage>" PDFFILE "</startpage>" >> MANIFEST
    print "<version>000</version>" >> MANIFEST
    print "<ItemSize>393216</ItemSize>" >> MANIFEST
    print "</y-metadata>" >> MANIFEST
    print "</metadata>" >> MANIFEST
    print "<last-location>" >> MANIFEST
    print "<pagenumber>" pageno "</pagenumber>" >> MANIFEST
    print "</last-location>" >> MANIFEST
    print "<viewer-settings>" >> MANIFEST
    print "<zoomfactor>100</zoomfactor>" >> MANIFEST
    print "<rotation>0</rotation>" >> MANIFEST
    print "<positionx>248</positionx>" >> MANIFEST
    print "<positiony>125</positiony>" >> MANIFEST
    print "<mode>column</mode>" >> MANIFEST
    print "<modefullscreen>1</modefullscreen>" >> MANIFEST
    print "<modetoolbar>0</modetoolbar>" >> MANIFEST
    print "<column>0</column></viewer-settings>" >> MANIFEST
    print "<pen-settings><size>3</size><color>3</color><linestyle/><attributes>0</attributes></pen-settings>" >> MANIFEST
    print "</package>" >> MANIFEST

    text=""
  }
} ' "title=$TITLE" "PDFFILE=$PDFFILE" "$SNIPPETS"
}


## Get the string to search for from the Description field of the manifest file 
STRING=`grep Description manifest.xml | sed -e 's/ *<Description>\(.*\)<\/Description>/\1/'` 

## Find snippets
find "$STARTDIR" -type f -name 'snippets' | sed -e 's/\(.*\)\/snippets.*/\1/' | uniq > match.lst

## Need to send the search phrase as the second parameter.
## Need to get it from the manifest file.
while read lineoftext
do
  snippetSearch "$lineoftext" "$STRING"
done < match.lst
