Edit: actually what I had worked beautifully. Here's the final preprocess code.
Code:
def preprocess_html(self,soup):
# print 'the soup is: ', soup
for fn_tag in soup.findAll("span", {"class" : "fn"}):
previousSibling_tag = fn_tag.previousSibling
if previousSibling_tag.name == 'span':
new_tag = Tag(soup,'p')
new_tag.insert(0,fn_tag)
previousSibling_tag.insert(1,new_tag)
for img_tag in soup.findAll('img'):
previousSibling_tag = img_tag.previousSibling
# print 'img previoussibling is: ', previousSibling_tag
# print 'previousSibling_tag.name is: ', previousSibling_tag.name
if previousSibling_tag.name == 'span':
new_tag = Tag(soup,'p')
# print 'new_tag is: ', new_tag
new_tag.insert(0,img_tag)
# print 'new_tag is, after insert: ', new_tag
previousSibling_tag.insert(2,new_tag)
# print 'img previoussibling is after insert: ', previousSibling_tag
return soup
Thank you so much for your help!