﻿using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;

using System.IO;

namespace ConvertIMPGUI
{
    public class HtmlParser
    {
        #region Constants
        public const char START_BOLD = (char)0xf5c1;
        public const char END_BOLD = (char)0xf5c2;

        public const char START_SUP = (char)0xf5b7;
        public const char END_SUP = (char)0xf5b8;
        public const char START_SUB = (char)0xf5b9;
        public const char END_SUB = (char)0xf5ba;
        #endregion

        #region Member Variables
        private CBBeBFile book;
        private byte[] buf;
        Hashtable entityToCharacter = new Hashtable();
        #endregion

        #region Constructor
        public HtmlParser(CBBeBFile aBook, String aHtmlFileName)
        {
            book = aBook;
            buf = readWholeFile(aHtmlFileName);

            entityToCharacter.Add("nbsp", (char)160);
            entityToCharacter.Add("iexcl", (char)161);
            entityToCharacter.Add("cent", (char)162);
            entityToCharacter.Add("pound", (char)163);
            entityToCharacter.Add("curren", (char)164);
            entityToCharacter.Add("yen", (char)165);
            entityToCharacter.Add("brvbar", (char)166);
            entityToCharacter.Add("sect", (char)167);
            entityToCharacter.Add("uml", (char)168);
            entityToCharacter.Add("copy", (char)169);
            entityToCharacter.Add("ordf", (char)170);
            entityToCharacter.Add("laquo", (char)171);
            entityToCharacter.Add("not", (char)172);
            entityToCharacter.Add("reg", (char)174);
            entityToCharacter.Add("macr", (char)175);
            entityToCharacter.Add("deg", (char)176);
            entityToCharacter.Add("plusmn", (char)177);
            entityToCharacter.Add("sup2", (char)178);
            entityToCharacter.Add("sup3", (char)179);
            entityToCharacter.Add("acute", (char)180);
            entityToCharacter.Add("micro", (char)181);
            entityToCharacter.Add("para", (char)182);
            entityToCharacter.Add("middot", (char)183);
            entityToCharacter.Add("cedil", (char)184);
            entityToCharacter.Add("sup1", (char)185);
            entityToCharacter.Add("ordm", (char)186);
            entityToCharacter.Add("raquo", (char)187);
            entityToCharacter.Add("frac14", (char)188);
            entityToCharacter.Add("frac12", (char)189);
            entityToCharacter.Add("frac34", (char)190);
            entityToCharacter.Add("iquest", (char)191);
            entityToCharacter.Add("Agrave", (char)192);
            entityToCharacter.Add("Aacute", (char)193);
            entityToCharacter.Add("Acirc", (char)194);
            entityToCharacter.Add("Atilde", (char)195);
            entityToCharacter.Add("Auml", (char)196);
            entityToCharacter.Add("Aring", (char)197);
            entityToCharacter.Add("AElig", (char)198);
            entityToCharacter.Add("Ccedil", (char)199);
            entityToCharacter.Add("Egrave", (char)200);
            entityToCharacter.Add("Eacute", (char)201);
            entityToCharacter.Add("Ecirc", (char)202);
            entityToCharacter.Add("Euml", (char)203);
            entityToCharacter.Add("Igrave", (char)204);
            entityToCharacter.Add("Iacute", (char)205);
            entityToCharacter.Add("Icirc", (char)206);
            entityToCharacter.Add("Iuml", (char)207);
            entityToCharacter.Add("ETH", (char)208);
            entityToCharacter.Add("Ntilde", (char)209);
            entityToCharacter.Add("Ograve", (char)210);
            entityToCharacter.Add("Oacute", (char)211);
            entityToCharacter.Add("Ocirc", (char)212);
            entityToCharacter.Add("Otilde", (char)213);
            entityToCharacter.Add("Ouml", (char)214);
            entityToCharacter.Add("times", (char)215);
            entityToCharacter.Add("Oslash", (char)216);
            entityToCharacter.Add("Ugrave", (char)217);
            entityToCharacter.Add("Uacute", (char)218);
            entityToCharacter.Add("Ucirc", (char)219);
            entityToCharacter.Add("Uuml", (char)220);
            entityToCharacter.Add("Yacute", (char)221);
            entityToCharacter.Add("THORN", (char)222);
            entityToCharacter.Add("szlig", (char)223);
            entityToCharacter.Add("agrave", (char)224);
            entityToCharacter.Add("aacute", (char)225);
            entityToCharacter.Add("acirc", (char)226);
            entityToCharacter.Add("atilde", (char)227);
            entityToCharacter.Add("auml", (char)228);
            entityToCharacter.Add("aring", (char)229);
            entityToCharacter.Add("aelig", (char)230);
            entityToCharacter.Add("ccedil", (char)231);
            entityToCharacter.Add("egrave", (char)232);
            entityToCharacter.Add("eacute", (char)233);
            entityToCharacter.Add("ecirc", (char)234);
            entityToCharacter.Add("euml", (char)235);
            entityToCharacter.Add("igrave", (char)236);
            entityToCharacter.Add("iacute", (char)237);
            entityToCharacter.Add("icirc", (char)238);
            entityToCharacter.Add("iuml", (char)239);
            entityToCharacter.Add("eth", (char)240);
            entityToCharacter.Add("ntilde", (char)241);
            entityToCharacter.Add("ograve", (char)242);
            entityToCharacter.Add("oacute", (char)243);
            entityToCharacter.Add("ocirc", (char)244);
            entityToCharacter.Add("otilde", (char)245);
            entityToCharacter.Add("ouml", (char)246);
            entityToCharacter.Add("divide", (char)247);
            entityToCharacter.Add("oslash", (char)248);
            entityToCharacter.Add("ugrave", (char)249);
            entityToCharacter.Add("uacute", (char)250);
            entityToCharacter.Add("ucirc", (char)251);
            entityToCharacter.Add("uuml", (char)252);
            entityToCharacter.Add("yacute", (char)253);
            entityToCharacter.Add("thorn", (char)254);
            entityToCharacter.Add("yuml", (char)255);
        }
        #endregion

        #region readWholeFile
        private byte[] readWholeFile(String aFileName)
        {
            StreamReader sr = new StreamReader(aFileName);
            string strWhole = sr.ReadToEnd();
            sr.Close();

            return ASCIIEncoding.ASCII.GetBytes(strWhole);
        }
        #endregion

        #region parsePages
        public void parsePages()
        {
            int bufIndex = 0;
            bool inTag = false;
            bool inEntity = false;
            bool inBody = false;
            bool inWord = false;
            string tagBuffer = "";
            string entityBuffer = "";

            startPage();

            while (bufIndex < buf.Length)
            {
                char c = (char)(buf[bufIndex++] & 0x00ff);
                if (inTag)
                {
                    if (c == '>')
                    {
                        inTag = false;

                        if (inBody)
                        {
                            if (tagBuffer[0] == '/')
                            {
                                if (tagBuffer.Length == 2)
                                {
                                    if (tagBuffer[1] == 'p')
                                    {
                                        endParagraph();
                                    }
                                    else if (tagBuffer[1] == 'i')
                                    {
                                        book.outAppend(0xf582); // End Italic
                                    }

                                    if (tagBuffer[1] == 'b')
                                    {
                                    }
                                }

                                if (tagBuffer.Length >= 5)
                                {
                                    if (tagBuffer.Substring(1, 4) == "body")
                                    {
                                        inBody = false;
                                        continue;
                                    }
                                }
                                if (tagBuffer.Length >= 4)
                                {
                                    if (tagBuffer.Substring(1, 3) == "sup")
                                    {
                                        book.outAppend(END_SUP); // Start Superscript
                                    }
                                    else if (tagBuffer.Substring(1, 3) == "sub")
                                    {
                                        book.outAppend(END_SUB); // Start Sub-script
                                    }
                                    else if (tagBuffer.Substring(1, 3) == "pre")
                                    {
                                        endParagraph();
                                    }
                                }
                            }
                            else
                            {
                                if (tagBuffer.Length == 1)
                                {
                                    if (tagBuffer[0] == 'p')
                                    {
                                        startParagraph();
                                    }
                                    else if (tagBuffer[0] == 'i')
                                    {
                                        book.outAppend(0xf581); // Start Italic
                                    }
                                    else if (tagBuffer[0] == 'b')
                                    {
                                    }
                                }
                                if (tagBuffer.Length >= 3)
                                {
                                    if (tagBuffer.Substring(0, 3) == "sup")
                                    {
                                        book.outAppend(START_SUP); // Start Superscript
                                    }
                                    else if (tagBuffer.Substring(0, 3) == "sub")
                                    {
                                        book.outAppend(START_SUB); // Start Sub-script
                                    }
                                    else if (tagBuffer.Substring(0, 3) == "pre")
                                    {
                                        startParagraph();
                                    }
                                }
                                if ((tagBuffer.Length >= 2) && tagBuffer.Substring(0, 2) == "br")
                                {
                                    breakLine();
                                }
                            }
                        }
                        else
                        {
                            if (tagBuffer.Length >= 4)
                            {
                                if (tagBuffer.Substring(0, 4) == "body")
                                {
                                    inBody = true;
                                }
                            }
                        }
                    }
                    else
                    {
                        tagBuffer += c.ToString().ToLower();
                    }
                }
                else if (inEntity)
                {
                    if (c == ';')
                    {
                        inEntity = false;

                        if (entityBuffer[0] == '#')
                        {
                            try
                            {
                                int value = int.Parse(entityBuffer.Substring(1));
                                if ((value != 160) &&
                                (value != 173))
                                {
                                    book.outAppend(value);
                                }
                            }
                            catch
                            {
                            }
                        }
                        else
                        {
                            char entityValue = (char)entityToCharacter[entityBuffer];
                            if (entityValue != null)
                            {
                                book.outAppend(entityValue);
                            }
                        }
                    }
                    else
                    {
                        entityBuffer += c;
                    }
                }
                else
                {
                    if (c == '<')
                    {
                        inTag = true;
                        tagBuffer = "";
                    }
                    else if (inBody)
                    {
                        if (c == '&')
                        {
                            inEntity = true;
                            entityBuffer = "";
                        }
                        else if (Char.IsControl(c))
                        {
                            inWord = false;
                        }
                        else if (Char.IsWhiteSpace(c))
                        {
                            inWord = false;
                        }
                        else
                        {
                            if (!inWord)
                            {
                                book.outAppend(' ');
                            }
                            book.outAppend(c);
                            inWord = true;
                        }
                    }
                }
            }
            endPage();
        }
        #endregion

        #region startParagraph
        private void startParagraph()
        {
            book.outAppend(0xf5ca);	// Indent paragraph 18pt
            book.outAppend(180);
        }
        #endregion

        #region endParagraph
        private void endParagraph()
        {
            breakLine();
            breakLine();
        }
        #endregion

        #region breakLine
        private void breakLine()
        {
            if (book.outBufOffset > 48000)
            {
                breakPage();
            }
            else
            {
                book.outAppend(0xf5d2);
            }
        }
        #endregion

        #region breakPage
        private void breakPage()
        {
            endPage();
            startPage();
        }
        #endregion

        #region startPage
        private void startPage()
        {
            book.outAppend(0xf5a1);
            book.outAppend(0x0000);
            book.outAppend(0x0000);
        }
        #endregion

        #region endPage
        private void endPage()
        {
            book.outAppend(0xf5a2);

            book.addOutBufAsTextPage();
        }
        #endregion
    }
}
