#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Functions to handle tag list.

Supported structure is the list consist of single layer tags and comments such as:
[<root>, '<child1/>', '<child2>text</child2>', '<!-- comment -->, '</root>'].
"""

import sys, os, re


def find(srclist, tag, attrib=None, value=None, start=0, end=None, indices=None):
    # Find first index that given conditions match.
    indices = findall(srclist, tag, attrib, value, 1, start, end, indices)
    if len(indices) == 1:
        return indices[0]
    else:
        return None

def findall(srclist, tag, attrib=None, value=None, n=0, start=0, end=None, indices=None):
    # Find indices that given conditions matches.
    if indices == None:
        if end == None:
            end = len(srclist)
        indices = range(start, end)

    if tag[:3] == '!--':
        pattern = r'(<{:})'.format(tag)
    elif tag[:4] == '<!--':
        pattern = r'({:})'.format(tag)
    elif attrib == None:
        pattern = r'<!--.*?-->|(<{:}\s+.*?>)'.format(tag)
    elif value == None:
        pattern = r'<!--.*?-->|(<{:}\s+.*?{:}.*?>)'.format(tag, attrib)
    else:
        pattern = r'<!--.*?-->|(<{:}\s+.*?{:}\s*=\s*"{:}".*?>)'.format(tag, attrib, value)
    re_ = re.compile(pattern, re.S)

    newindices = []
    for i in indices:
        mo = re_.search(srclist[i])
        if mo != None and mo.group(1) != None:
            newindices.append(i)
            n -= 1
            if n == 0:
                break
    return newindices

def extract_tags(srclist, indices):
    # Extract tags specified by indices
    new_data = []
    for i, item in enumerate(srclist):
        if i in indices:
            new_data.append(item)
    return new_data

def remove_tags(srclist, indices):
    # Remove tags specified by indices
    new_data = []
    for i, item in enumerate(srclist):
        if i not in indices:
            new_data.append(item)
    return new_data

def remove_attrib(srclist, index, attrib):
    # Return a tag whose specified attribute is removed.
    pattern = r'\s+{:}\s*=\s*"(.*?)"'.format(attrib)
    newdata = re.sub(pattern, '', srclist[index])
    return newdata

def get_attrib(srclist, index, attrib):
    # Get specified attribute value.
    pattern = r'\s+{:}\s*=\s*"(.*?)"'.format(attrib)
    mo = re.search(pattern, srclist[index])
    if mo != None:
        return mo.group(1)
    else:
        return None

def set_attrib(srclist, index, attrib, value):
    # Return a tag whose specified attribute is added or replaced.
    item = srclist[index]
    repl = ' {:}="{:}"'.format(attrib, value)
    pattern = r'(\s+{:}\s*=\s*".*?")|\s*(/?>)'.format(attrib)
    mo = re.search(pattern, item)
    if mo != None:
        if mo.group(1) != None:
            newitem = item[:mo.start()] + repl + item[mo.end():]
        else:
            newitem = item[:mo.start()] + repl + mo.group(2) + item[mo.end():]
    return newitem

def get_content(srclist, index):
    # Get the content of specified tag.
    re_tag = re.compile(r'(<.*?>)(.*?)(</.*>)')
    mo = re_tag.search(srclist[index])
    if mo != None:
        return mo.group(2)
    else:
        return None

def set_content(srclist, index, value):
    # Return a tag whose content is added or replaced.
    item = srclist[index]
    re_tag = re.compile(r'(<(\S*?)\s+.*?)/>|(<.*?>)(.*?)(</.*>)')

    mo = re_tag.search(item)
    if mo != None:
        if mo.group(1) != None:
            repl = '{:}>{:}</{:}>'.format(mo.group(1), value, mo.group(2))
        else:
            repl = '{:}{:}{:}'.format(mo.group(3), value, mo.group(5))
        newitem = item[:mo.start()] + repl + item[mo.end():]
        return newitem
    else:
        return item
