from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.BeautifulSoup import MinimalSoup
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
from calibre.ebooks.BeautifulSoup import ICantBelieveItsBeautifulSoup
import os, re, sys, glob

# remove the <div class="calibrenavbar"> tags in all the content html files
# also unnecessary <hr> and <p> tags
def FuncRemoveNavBar(path): 
	# path = sys.argv[1]
	exception = 'index.html'

	def scandirs(path):
		for currentFile in glob.glob(os.path.join(path, '*')):
			if os.path.isdir(currentFile):
				scandirs(currentFile)
			# only works with .html file
			if currentFile.endswith('.html'): 
				#print "processing file: " + currentFile
				inFile = open(currentFile, 'rU')
				soup = BeautifulSoup(''.join(inFile))
				inFile.close()
				# remove problematic tags
				rs = soup.findAll('input')
				for r in rs:
					r.extract()
				rs = soup.findAll('select')
				for r in rs:
					r.extract()
				rs = soup.findAll('textarea')
				for r in rs:
					r.extract()
				rs = soup.findAll('form')
				for r in rs:
					r.extract() 
				# remove <hr> tag
				horbars = soup.findAll('hr')
				for horbar in horbars:
					horbar.extract()
				# remove calibre navbar
				navbars = soup.findAll('div', attrs={'class': "calibrenavbar"})
				for navbar in navbars:
					navbar.extract()
				# remove empty <p> tag (which doesn't enclose any text and there is no embraced child tags -- more precise: no child tag with text?)
				empty_tags = soup.findAll(lambda tag: tag.name == 'p' and (tag.string is None or not tag.string.strip()) and len(tag)==0)
				for empty_tag in empty_tags:
					empty_tag.extract()
				outFile = open(currentFile, 'w')
				outFile.write(soup.prettify())
				outFile.close()

	# only deal with folders of the given folder
	for i in glob.glob(os.path.join(path, '*')):
		if os.path.isdir(i):	
			scandirs(i)

# convert the given .opf and  file to kindlegen expected format
def FuncConvertKG(INOPFFILE, INNCXFILE, OUTOPFFILE, OUTNCXFILE):
	# avoid navpoint tag to be auto-rearrange
	BeautifulStoneSoup.NESTABLE_TAGS["navpoint"] = []
	BeautifulStoneSoup.NESTABLE_TAGS["navlabel"] = []
	BeautifulStoneSoup.NESTABLE_TAGS["text"] = []
	BeautifulStoneSoup.NESTABLE_TAGS["content"] = []

	# INOPFFILE = sys.argv[1]
	# INNCXFILE = sys.argv[2]
	# OUTOPFFILE = sys.argv[3]
	# OUTNCXFILE = sys.argv[4]

	# opf file
	inFile = open(INOPFFILE, 'rU')
	soup = BeautifulStoneSoup(''.join(inFile))
	inFile.close()
	# If you are using Kindlegen 1.2, Kindle will drop the last word from the periodical title and display that off-center, because it spaces it assuming it is displaying the last word. This command will cause an asterisk to be appended to your periodical so you don't lose the last word in the title and it displays centered. (due to nickredding)
	#soup.find('dc:title').contents[0].replaceWith(soup.find('dc:title').contents[0]+' *')
	# <dc:title> up from <meta name="calibre:timestamp"> tag
	soup.metadata.insert(0, soup.find('dc:title'))
	# <dc:date> up from <meta name="cover"> tag
	soup.metadata.insert(0, soup.find('dc:date'))
	# rename to dc:language to dc:Language
	while True:
	    dclang = soup.find("dc:language")
	    if not dclang:
	        break
	    dclang.name = 'dc:Language'
	# add x-metadata
	soup.find('metadata').insert(0, "<x-metadata><output encoding=\"utf-8\" Content-type=\"application/x-mobipocket-subscription-magazine\"/></x-metadata>")
	# make appropriate guide entries
	soup.find('guide').replaceWith("<guide><reference type=\"start\" title=\"start\" href=\"index.html\"/><reference type=\"toc\" title=\"toc\" href=\"index.html\"/><reference type=\"text\" title=\"text\" href=\"index.html\"/></guide>")
	# write the output
	outFile = open(OUTOPFFILE, 'w')
	outFile.write(soup.prettify())		
	outFile.close()

	# ncx file
	inFile = open(INNCXFILE, 'rU')
	soup = BeautifulStoneSoup(''.join(inFile), selfClosingTags=['content'])
	inFile.close()

	# obtain navMap node
	# If it has just one layers (since the recipe has just 1 feed),
	# insert an additional outer layer
	# Also fix the depth as specified
	navMap = soup.find('navmap')
	IsOneLayer = True
	outerNavPoints = navMap.findAll('navpoint', recursive=False)
	for i in outerNavPoints:
		if IsOneLayer:
			innerNavPoints = i.findAll('navpoint', recursive=False)
			if len(innerNavPoints) > 0:
				IsOneLayer = False
	if IsOneLayer:
		metadepth = soup.find('meta', {'name':'dtb:depth'})
		metadepth['content'] = 3
		navMap1 = soup.find('navmap')
		
		newNavMap1 = BeautifulStoneSoup(''.join("<navmap><navpoint class=\"section\"><navlabel><text>Latest</text></navlabel><content src=\"feed_0/index.html\" /></navpoint></navmap>"))
		i = 0
		items = navMap1.findChildren(recursive=False)
		for item in items:
			newNavMap1.navpoint.insert(i, item)
			i = i+1
		navMap1.replaceWith(newNavMap1)
		
		outFile = open(INNCXFILE, 'w')
		outFile.write(soup.prettify())
		outFile.close()
		
		inFile = open(INNCXFILE, 'rU')
		soup = BeautifulStoneSoup(''.join(inFile), selfClosingTags=['content'])
		inFile.close()
	
	# set 1st layer's chapters to sections, 2nd layer's chapters to articles
	navMap = soup.find('navmap')
	outerNavPoints = navMap.findAll('navpoint', recursive=False)
	for i in outerNavPoints:
		i['class'] = 'section'
		innerNavPoints = i.findAll('navpoint')
		if len(innerNavPoints) == 0:
			i.extract()
		else:
			for j in innerNavPoints:
				j['class'] = 'article'
	
	# embrace the original with periodical class
	newNavMap = BeautifulStoneSoup(''.join("<navmap><navpoint class=\"periodical\"><navlabel><text>Periodical</text></navlabel><content src=\"index.html\"></navpoint></navmap>"))
	i = 0
	items = navMap.findChildren(recursive=False)	
	for item in items:
		newNavMap.navpoint.insert(i, item)
		i = i + 1
	newNavMap.navpoint.insert(0, BeautifulStoneSoup("<mbp:meta-img name=\"mastheadImage\" src=\"mastheadImage.jpg\"/>", selfClosingTags=['mbp']));
	soup.find('navmap').replaceWith(newNavMap)
	
	outFile = open(OUTNCXFILE, 'w')
	outFile.write(soup.prettify())
	outFile.close()
