﻿#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2016, John Howell <jhowell@acm.org>'
__docformat__ = 'restructuredtext en'

from calibre_plugins.overdrive_link.numbers import (word_number, word_rank, numeric_rank, roman_number, word_year)
# Equivalent words for title matching

equivalent_words = {}   # maps a word to a set of equivalent words

equivalent_multi_words = {}   # maps a word to a set of equivalents, each of which is a tuple containing multiple words


# Only the first element of a tuple may contain multiple words
equivalents = [

    # Misspelled titles at libraries
    ('neveryon', 'nevron', 'nevaereyon'), # Nevèrÿon at freading, ebscohost
    ('neveryona', 'neveryna', 'nevaereyona', 'neveryaona'), # Nevèrÿona at freading, ebscohost
    

    # Common misspellings (http://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines)
    ('altho', 'although'),
    ('approproximate', 'approximate'),
    ('assasined', 'assassinated'),
    ('assisnate', 'assassinate'),
    ('assosication', 'assassination'),
    ('attaindre', 'attained'),
    ('azn', 'asian'),
    ('beaurocracy', 'bureaucracy'),
    ('beaurocratic', 'bureaucratic'),
    ('broacasted', 'broadcast'),
    ('cant', 'cannot'),
    ('catapiller', 'caterpillar'),
    ('catapillers', 'caterpillars'),
    ('cervial', 'servile'),
    ('charistics', 'characteristics'),
    ('colonizators', 'colonizers'),
    ('consequentually', 'consequently'),
    ('copywrite', 'copyright'),
    ('criticists', 'critics'),
    ('critising', 'criticizing'),
    ('deriviated', 'derived'),
    ('deteoriated', 'deteriorated'),
    ('differentiatiations', 'differentiations'),
    ('emmisarries', 'emissaries'),
    ('emmisarry', 'emissary'),
    ('eraticly', 'erratically'),
    ('fiel', 'phial'),
    ('fiels', 'phials'),
    ('flourishment', 'flourishing'),
    ('funguses', 'fungi'),
    ('futhroc', 'futhark'),
    ('gogin', 'gauguin'),
    ('gouvener', 'governor'),
    ('hvea', 'heave'),
    ('idaeidae', 'idea'),
    ('intepretator', 'interpretor'),
    ('libitarianisn', 'libertarianism'),
    ('maintainence', 'maintenance'),
    ('manuever', 'manoeuvre'),
    ('manuevers', 'manoeuvres'),
    ('mediterainnean', 'mediterranean'),
    ('monolite', 'monolithic'),
    ('nessasarily', 'necessarily'),
    ('nessecary', 'necessary'),
    ('ocassionaly', 'occasionally'),
    ('palistian', 'palestinian'),
    ('peculure', 'peculiar'),
    ('playwrite', 'playwright'),
    ('playwrites', 'playwrights'),
    ('premonasterians', 'premonstratensians'),
    ('prominately', 'prominently'),
    ('pususading', 'persuading'),
    ('resssurecting', 'resurrecting'),
    ('restaraunteur', 'restaurateur'),
    ('restaraunteurs', 'restaurateurs'),
    ('resteraunt', 'restraunt', 'restaurant'),
    ('resteraunts', 'restaurants'),
    ('sepina', 'subpoena'),
    ('shoudln', 'shouldnt'),
    ('sophicated', 'sophisticated'),
    ('strikely', 'strikingly'),
    ('suburburban', 'suburban'),
    ('sucesfuly', 'successfully'),
    ('supposingly', 'supposedly'),
    ('thast', 'thats'),
    ('thru', 'through'),
    ('transcendentational', 'transcendental'),
    ('uneccesary', 'unnecessary'),
    ('unsucesfuly', 'unsuccessfully'),
    ('warantee', 'warranty'),
    ('wendsay', 'wensday', 'wednesday'),
    
    # Other misspellings
    
    ('relieve', 'releive'),
    ('relief', 'releif'),
    ('chieftain', 'cheiftain'),
    ('grievous', 'greivous'),
    ('field', 'feild'),
    ('belief', 'beleif'),
    ('yield', 'yeild'),
    ('thief', 'theif'),
    ('reprieve', 'repreive'),
    ('besiege', 'beseige'),
    ('piece', 'peice'),
    ('pierce', 'peirce'),
    ('reprieve', 'repreive'),
    ('brief', 'breif'),
    ('sieve', 'seive'),
    ('siege', 'seige'),
    ('believe', 'beleive'),
    ('conceit', 'conciet'),
    ('ceiling', 'cieling'),
    ('deceive', 'decieve'),
    ('perceive', 'percieve'),
    ('receipt', 'reciept'),
    ('deceit', 'deciet'),
    ('conceive', 'concieve'),
    ('receive', 'recieve'),
    ('rein', 'rien'),
    ('veil', 'viel'),
    ('heir', 'hier'),
    ('weight', 'wieght'),
    ('their', 'thier'),
    ('sleigh', 'sliegh'),
    ('vein', 'vien'),
    ('neigh', 'niegh'),
    ('skein', 'skien'),
    ('neighbor', 'nieghbor'),
    ('reign', 'riegn'),
    ('freight', 'frieght'),
    ('heifer', 'hiefer'),
    ('weir', 'wier'),
    ('weigh', 'wiegh'),
    ('foreign', 'foriegn'),
    ('forfeit', 'forfiet'),
    ('friend', 'freind'),
    ('mischief', 'mischeif'),
    ('seize', 'sieze'),
    ('sheik', 'shiek'),
    ('weird', 'wierd'),
    ('neither', 'niether'),
    ('financier', 'financeir'),
    ('leisure', 'liesure'),
    ('seize', 'sieze'),
    ('species', 'speceis'),
    ('ancient', 'anceint'),
    ('fierce', 'feirce'),
    ('lieutenant', 'leiutenant'),
    
    # Abbreviations
    ('doctor', 'dr'),
    ('edition', 'ed'),
    ('illustrated', 'illus'),
    ('introduction', 'intro'),
    ('light', 'lite'),
    ('limited', 'ltd'),
    ('original', 'orig'),
    ('road', 'rd'),
    ('saint', 'st'),
    ('science fiction', 'sf'),
    ('street', 'st'),
    ('through', 'thru'),
    ('throughout', 'thruout'),
    ('versus', 'vs'),
    ('volume', 'vol', 'part'),
    
    # Prepositions
    ('at', 'in', 'of'),
    
    #British/American English
    ('aerie', 'eyrie'),
    ('aluminum', 'aluminium'),
    ('annex', 'annexe'),
    ('ass', 'arse'),
    ('analog', 'analogue'),
    ('airplane', 'aeroplane'),
    ('artifact', 'artefact'),
    ('behavior', 'behaviour'),
    ('behaviorism', 'behaviourism'),
    ('behoove', 'behove'),
    ('boogeyman', 'bogeyman'),
    ('brazier', 'brasier'),
    ('caliber', 'calibre'),
    ('canceled', 'cancelled'),
    ('carburetor', 'carburettor'),
    ('catalog', 'catalogue'),
    ('center', 'centre'),
    ('chamomile', 'camomile'),
    ('check', 'cheque'),
    ('checker', 'chequer'),
    ('chili', 'chilli'),
    ('cipher', 'cypher'),
    ('color', 'colour'),
    ('colored', 'coloured'),
    ('cozy', 'cosy'),
    ('connection', 'connexion'),
    ('curb', 'kerb'),
    ('defense', 'defence'),
    ('dialog', 'dialogue'),
    ('dike', 'dyke'),
    ('donut', 'doughnut'),
    ('draft', 'draught'),
    ('endeavor', 'endeavour'),
    ('fantasm', 'phantasm'),
    ('favor', 'favour'),
    ('favorite', 'favourite'),
    ('fiber', 'fibre'),
    ('filet', 'fillet'),
    ('flavor', 'flavour'),
    ('furor', 'furore'),
    ('gage', 'gauge'),
    ('glamor', 'glamour'),
    ('glamorous', 'glamourous'),
    ('gray', 'grey'),
    ('grill', 'grille'),
    ('harbor', 'harbour'),
    ('harken', 'hearken'),
    ('honor', 'honour'),
    ('honorable', 'honourable'),
    ('humor', 'humour'),
    ('humorless', 'humourless'),
    ('idyl', 'idyll'),
    ('inquiry', 'enquiry'),
    ('insure', 'ensure'),
    ('jail', 'gaol'),
    ('labor', 'labour'),
    ('leveled', 'levelled'),
    ('libeled', 'libelled'),
    ('licorice', 'liquorice'),
    ('liter', 'litre'),
    ('luster', 'lustre'),
    ('maneuver', 'manoeuvre'),
    ('meter', 'metre'),
    ('mollusk', 'mollusc'),
    ('mold', 'mould'),
    ('molt', 'moult'),
    ('monolog', 'monologue'),
    ('mustache', 'moustache'),
    ('mom', 'mum'),
    ('mommy', 'mummy'),
    ('naivete', 'naivety'),
    ('neighbor', 'neighbour'),
    ('neighborhood','neighbourhood'),
    ('neuron', 'neurone'),
    ('offense', 'offence'),
    ('omelet', 'omelette'),
    ('organize', 'organise'),
    ('oriented', 'orientated'),
    ('pajamas', 'pyjamas'),
    ('parlor', 'parlour'),
    ('persnickety', 'pernickety'),
    ('phony', 'phoney'),
    ('plow', 'plough'),
    ('pretense', 'pretence'),
    ('primeval', 'primaeval'),
    ('program', 'programme'),
    ('quarreled', 'quarrelled'),
    ('realize', 'realise'),
    ('recognize', 'recognise'),
    ('redout', 'redoubt'),
    ('rime', 'rhyme'),
    ('rumor', 'rumour'),
    ('savory', 'savoury'),
    ('sirup', 'syrup'),
    ('skeptic', 'sceptic'),
    ('skeptical', 'sceptical'),
    ('skepticism', 'scepticism'),
    ('sled', 'sledge'),
    ('smolder', 'smoulder'),
    ('specialty', 'speciality'),
    ('specter', 'spectre'),
    ('stedfast', 'steadfast'),
    ('story', 'storey'),
    ('sulfate', 'sulphate'),
    ('sulfur', 'sulphur'),
    ('theater', 'theatre'),
    ('tidbit', 'titbit'),
    ('tire', 'tyre'),
    ('ton', 'tonne'),
    ('traveled', 'travelled'),
    ('traveler', 'traveller'),
    ('valor', 'valour'),
    ('villan', 'villain'),
    ('vinyard', 'vineyard'),
    ('vise', 'vice'),
    ('vizor', 'visor'),
    ('while', 'whilst'),
    ('woful', 'woeful'),
    ('wrack', 'rack'),
    ('yogurt', 'yoghurt'),
    
    # Numbers (others added automatically)
    ('zero', '0'),
    ('hundred', '100'),
    ('thousand', '1000'),
    ('million', '1000000'),
    
    # Rankings (others added automatically)
    ('first', '1st', 'i', 'senior', 'sr'),
    ('second', '2nd', 'ii', 'junior', 'jr')]
    
    

def organize_equivalents():
    global equivalents
    global equivalent_words
    
    equivalent_words = {}
    
    # Generate numeric equivalents on-the-fly
    for i in range(1, 301):
        equivalents.append((word_number(i), roman_number(i), unicode(i)))
        equivalents.append((word_rank(i), roman_number(i), numeric_rank(i)))
        
    # Generate year equivalents on-the-fly
    for i in range(1400, 2101):
        equivalents.append((word_year(i), unicode(i)))
    

    for equivalent in equivalents:
        for key in equivalent:
            # single words can be used as a key to matching equivalents
            if ' ' not in key:
                for w in equivalent:
                    if w != key:
                        if len(w.split()) == 1:
                            if key not in equivalent_words:
                                equivalent_words[key] = set()
                            
                            equivalent_words[key].add(w)
                        else:
                            if key not in equivalent_multi_words:
                                equivalent_multi_words[key] = set()
                                
                            equivalent_multi_words[key].add(tuple(w.split()))
                        
    
organize_equivalents()
