/*
 * File Name: text_model.h
 */

/*
 * This file is part of uds-plugin-plaintext.
 *
 * uds-plugin-plaintext is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 2 of the License, or
 * (at your option) any later version.
 *
 * uds-plugin-plaintext is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

/**
 * Copyright (C) 2008 iRex Technologies B.V.
 * All rights reserved.
 */

#ifndef TEXT_MODEL_H
#define TEXT_MODEL_H

#include <string>
#include <vector>
#include <cassert>
#include <glib.h>
#include <iconv.h>
#include "plugin_inc.h"
#include "text_base_types.h"
#include "signal_slot.h"

namespace text
{

#define DEFAULT_ENCODING "iso8859-1"

class TextModel
{
public:
    /// @brief Constructors and destructors
    TextModel();
    ~TextModel();

public:
    /// @brief Open specified document with default encoding.
    PluginStatus open(const std::string& path);

    /// @brief Open document with specified encoding
    PluginStatus open(const std::string& path, const std::string& encoding);

    /// @brief Check if document is already open
    bool is_open() const
    {
        return b_open;
    }

    /// @brief Close document
    void close();

    /// @brief Get current encoding
    const std::string& get_encoding() const
    {
        return encoding;
    }

    const std::string& get_path() const
    {
        return path;
    }

    /// @brief Get number of paragraphs
    unsigned int get_paragraph_count() const
    {
        return static_cast<unsigned int>(doc.size());
    }

    /// @brief Get 1 paragraph
    const std::string* get_paragraph(unsigned int index) const
    {
        assert(index < doc.size());
        return doc[index].text;
    }

    /// @brief Search specified pattern given by search criteria.
    /// @param result_ranges Output range collection.
    /// @param search_context Search criteria.
    /// @return Return true if search is complete (i.e. we reach the start/end
    ///  of the document, or find an occurrence if SearchType is SEARCH_NEXT),
    ///  otherwise false is returned, user needs to call this function again to get
    ///  the search procedure complete.
    bool search(std::vector<Range>& result_ranges, SearchContext* sc);

    /// @brief Check if the document contains the anchor or not.
    bool has_anchor(const Position &pos);

    /// @brief Get absolute file position from anchor.
    bool get_file_pos_from_anchor(size_t& file_pos, const Position &pos);

    /// @brief Get a word from specified document position.
    bool get_word_from_anchor(const Position& pos,
                              Position& word_start_pos,
                              Position& word_end_pos);

    /// @brief Get the words from range, the range will be extended/shrinked to words boundary.
    bool get_words_from_range(const Position& range_start,
                              const Position& range_end,
                              Position& words_start,
                              Position& words_end);

    /// @brief Get the text between start_pos and end_pos.
    bool get_text_from_range(std::string& result,
                             const Position& start_pos,
                             const Position& end_pos);

    /// @brief Dump the content to disk file.
    void dump();

    /// @brief Abort specified search task.
    void set_aborting_search_task_id(unsigned int id)
    {
        aborting_search_task_id = id;
    }

    unsigned int get_aborting_search_task_id()
    {
        return aborting_search_task_id;
    }

public:
    /// @brief Signals.
    utils::Signal<const std::vector<Range>&, const SearchContext*> search_done_signal;

private:
    /// @brief Detect the encoding by reading data from text file.
    void detect_encoding();

    /// @brief Clear content read from disk file
    void clear();

    /// @breif Read content from disk file to internal vector
    PluginStatus read_text();

    /// @brief Read text from UTF-8 encoded text
    PluginStatus read_utf8_text();

    /// @brief Read text from UTF-8 encoded text
    PluginStatus read_non_utf8_text();

    /// @brief Save block with paragraphs
    void save_block_with_paragraphs(const char *blk, size_t blk_size);

    /// @brief A simple wrapper for iconv function
    void convert(iconv_t cd, char **in_buf, size_t *in_bytes_left, char **out_buf, size_t *out_bytes_left);

    /// @brief Check the char is a word seperator
    bool is_seperator(const char* p);

private:
    /// @brief File pointer
    FILE *file_p;

    /// @brief Current encoding string
    std::string encoding;

    /// @brief Document path in file system
    std::string path;

    /// @brief Flag indicating that current file is successfully opened
    bool b_open;

    /// @brief The id of the search task which needs to be aborted.
    unsigned int aborting_search_task_id;

    /// @brief A flag used when we read content from disk file. If the block
    ///  contains incomplete paragraph, this variable is set to true, 
    ///  otherwise it is true.
    bool incomplete_line;

    struct Paragraph
    {
    public:
        size_t       start_file_pos;
        std::string* text;

    public:
        Paragraph(size_t pos, std::string* _text)
        : start_file_pos(pos), text(_text)
        {
        }
    };

    /// @brief text document
    typedef std::vector<Paragraph> TextDocument;
    typedef TextDocument::iterator TextDocumentIter;

    /// @brief Paragraph array, the paragraph is already in UTF-8 format
    TextDocument doc;
};

};  // text

#endif // TEXT_MODEL_H

