#!/usr/bin/env python
# -*- coding: utf-8 -*-

import sys
import re

class TAG:
    content = ''    #actual content
    pair = 0        #tag pair
    e_type = 0	    #1=OPEN 2=CLOSE 3=CONTAINED 4=TEXT OR CR/LF 9=REMOVE-EMPTY-SPAN

def strip_empty_spans(html_text):
    HTML_ENTITY = []

    entities = re.split(r'(<.+?>)', html_text)

    total = 0
    for entity in entities:
        if entity:
            total += 1
            this_entity = TAG()
            this_entity.content = entity
            if entity == '<span>':
                this_entity.e_type = 9
            elif entity[0] != '<':
                this_entity.e_type = 4
            elif entity[:3] == '<hr' or entity[:3] == '<br' or entity[:4] == '<img':
                this_entity.e_type = 3
            elif entity[-1:] == '/>':
                this_entity.e_type = 3
            elif entity[:2] == '</':
                this_entity.e_type = 2
            else:
                this_entity.e_type = 1
            HTML_ENTITY.append(this_entity)

    pos = -1
    PAIR = 0
    while pos < total-1:
        pos+=1
        if HTML_ENTITY[pos].e_type == 2:
            PAIR += 1
            HTML_ENTITY[pos].pair = PAIR
            pair_pos = pos
            while True:
                pair_pos += -1
                if pair_pos<0 : break
                e_type = HTML_ENTITY[pair_pos].e_type
                if e_type == 1 or e_type==9:
                    if HTML_ENTITY[pair_pos].pair == 0:
                        HTML_ENTITY[pair_pos].pair = PAIR
                        if e_type == 9: HTML_ENTITY[pos].e_type = 9
                        break

    output = []
    for entry in HTML_ENTITY:
        if entry.e_type <9:
            output.append(entry.content)

    out_text = ''.join(output)
    return out_text

def main(argv=sys.argv):
    if len(argv) != 2:
        print "Usage:"
        print "  strip_empty_spans.py infile"
        return 1
    else:  
        infile = argv[1]

    with open(infile, 'rU') as fd:
        html = fd.read()
      
    html = strip_empty_spans(html)

    with open(infile,'w') as fd:
        fd.write(html)
    
if __name__ == "__main__":
    sys.exit(main())
