@darkom: That's pretty much what linearize_tables does currently
Code:
def linearize(self, root):
for x in XPath('//h:table|//h:td|//h:tr|//h:th|//h:caption|'
'//h:tbody|//h:tfoot|//h:thead|//h:colgroup|//h:col')(root):
x.tag = XHTML('div')
for attr in ('style', 'font', 'valign',
'colspan', 'width', 'height',
'rowspan', 'summary', 'align',
'cellspacing', 'cellpadding',
'frames', 'rules', 'border'):
if attr in x.attrib:
del x.attrib[attr]