<?php
include_once('simple_html_dom.php');

//INPUT VARS
//$shelfariURL= 'http://www.shelfari.com/books/23080524/Steve-Jobs';
//$wikilink = 'http://en.wikipedia.org/wiki/Crime_and_punishment';
//$base_name = 'CR!9V9Z52PQXN26KCDE081TEPYGACQ6';
//$guid = '6e9075e5';
//$asin = '9f71ae3e-53e8-408a-96e5-3e8f3c323d25';

$shelfariURL= trim($_POST['url']);
if($_POST['wikilink']){$wikilink = trim($_POST['wikilink']);} else {trim($wikilink = '');}
$base_name = trim($_POST['name']);
$guid = trim($_POST['guid']);
$asin = trim($_POST['asin']);
if(preg_match('/[a-zA-Z]/', $guid))
{
   $guid = strtoupper($guid);
}else {
   $guid = strtoupper(dechex(intval($guid)));
}

/*Storage Arrays*/
$aCharacters = array();
$aPhrases = array();

/*replacement patterns*/
$patterns = array();
$patterns[0] = '/"/';
$patterns[1] = '/\[\d\]/';
$replacements = array();
$replacements[0] = '\"';
$replacements[1] = '';

$wikiPatterns = array();
$wikiPatterns[0] = '/\s?\([^)]+\)/'; //remove everything in ()
//$wikiPatterns[1] = '/\s?"[^"]+"/'; //remove everything in ()
$wikiPatterns[1] = '/\s?["\'(]\\w+["\')]/'; //any words in quotes
$wikiPatterns[2] = '/[.,“”()\/\"?\':\[\]]/';  //any weird character
$wikiPatterns[3] = '/\s/';
$wikiReplace[0] = '';
$wikiReplace[1] = '';
$wikiReplace[2] = '';
$wikiReplace[3] = '_';
ksort($patterns);
ksort($replacements);
ksort($wikiPatterns);
ksort($wikiReplace);
//$testString = 'Nastasya Petrovna (“Nastenka,” “Nastasyushka”):';
//echo preg_replace($wikiPatterns, $wikiReplace, $testString);
//return;

/*GET SHELFARI CONTENT*/
//$html = file_get_html('data.txt');
$html = file_get_html($shelfariURL);

/*OUTPUT*/
$out = '{"asin":"'.$asin.'","guid":"'.$base_name.':'.$guid.'","version":"1","terms":[';

/*wikipedia link*/
if($wikilink != '')
{
   $wikihtml = file_get_html($wikilink);
   $tmpWiki = $wikihtml->find('.mw-content-ltr p', 0)->plaintext;
   $out .= '{"type":"topic","term":"Wikipedia Info","desc":"'.preg_replace($patterns, $replacements, $tmpWiki).'","descSrc":"wiki","descUrl":"'.$wikilink.'","locs":[[100,100,100,5]]},'; 
}
//Characters
if($html->find('#WikiModule_Characters ul.li_6 li'))
{
   foreach($html->find('#WikiModule_Characters ul.li_6 li') as $li)
   {
      $tmpString = $li->plaintext;
      if(strpos($tmpString, ':'))
      {
         $tmpKey = substr($tmpString,0, strpos($tmpString, ':'));
         $tmpValue = substr($tmpString, strpos($tmpString, ':')+1);
      } else
      {
         $tmpKey = $tmpString;
         $tmpValue = $tmpString;
      }
      $aCharacters[$tmpKey] = trim($tmpValue);
   }
}
kSort($aCharacters);

foreach ($aCharacters as $c => $i){
   $out .= '{"type":"character","term":"' . preg_replace($patterns,$replacements,$c) . '",' . '"desc":"'.preg_replace($patterns,$replacements,$i).'","descSrc":"wiki","descUrl":"http://m.wikipedia.org/wiki/' .preg_replace($wikiPatterns,$wikiReplace,$c). '","locs":[[100,100,100,6]]},';
}

//Orginizations
if($html->find('#WikiModule_Organizations ul.li_6 li'))
{
   foreach($html->find('#WikiModule_Organizations ul.li_6 li') as $li)
   {
      $tmpString = $li->plaintext;
      if(strpos($tmpString, ':'))
      {
         $tmpKey = substr($tmpString,0, strpos($tmpString, ':'));
         $tmpValue = substr($tmpString, strpos($tmpString, ':')+1);
      } else
      {
         $tmpKey = $tmpString;
         $tmpValue = $tmpString;
      }
      $aPhrases[$tmpKey] = trim($tmpValue);
   }
}

//Settings
if($html->find('#WikiModule_Settings ul.li_6 li'))
{
   foreach($html->find('#WikiModule_Settings ul.li_6 li') as $li)
   {
      $tmpString = $li->plaintext;
      if(strpos($tmpString, ':'))
      {
         $tmpKey = substr($tmpString,0, strpos($tmpString, ':'));
         $tmpValue = substr($tmpString, strpos($tmpString, ':')+1);
      } else
      {
         $tmpKey = $tmpString;
         $tmpValue = $tmpString;
      }
      $aPhrases[$tmpKey] = trim($tmpValue);
   }
}

//Glossary
if($html->find('#WikiModule_Glossary ul.li_6 li'))
{
   foreach($html->find('#WikiModule_Glossary ul.li_6 li') as $li)
   {
      $tmpString = $li->plaintext;
      if(strpos($tmpString, ':'))
      {
         $tmpKey = substr($tmpString,0, strpos($tmpString, ':'));
         $tmpValue = substr($tmpString, strpos($tmpString, ':')+1);
      } else
      {
         $tmpKey = $tmpString;
         $tmpValue = $tmpString;
      }
      $aPhrases[$tmpKey] = trim($tmpValue);
   }
}

//Theme & Symbolism
if($html->find('#WikiModule_Themes ul.li_6 li'))
{
   foreach($html->find('#WikiModule_Themes ul.li_6 li') as $li)
   {
      $tmpString = $li->plaintext;
      if(strpos($tmpString, ':'))
      {
         $tmpKey = substr($tmpString,0, strpos($tmpString, ':'));
         $tmpValue = substr($tmpString, strpos($tmpString, ':')+1);
      } else
      {
         $tmpKey = $tmpString;
         $tmpValue = $tmpString;
      }
      $aPhrases[$tmpKey] = trim($tmpValue);
   }
}


/*Sort file and write to output*/
kSort($aPhrases);
foreach ($aPhrases as $c => $i){
   $out .= '{"type":"topic","term":"' . preg_replace($patterns,$replacements,$c) . '",' . '"desc":"'.preg_replace($patterns,$replacements,$i).'","descSrc":"wiki","descUrl":"http://m.wikipedia.org/wiki/' .preg_replace($wikiPatterns,$wikiReplace,$c). '","locs":[[100,100,100,6]]},';
   //echo $c . " : " . $i;
}

$out = substr($out, 0, -1); //remove last comma
$out .= '],"chapters":[{"name":null,"start":1,"end":9999999}]}';
$fileName = 'XRAY.entities.'.$asin.'.asc';
header("Content-type: application/octet-stream");
header("Content-Disposition: attachment; filename=\"$fileName\"");
echo $out; 

?>

