In case it's of interest to anyone, here is some ruby code I wrote to patch an epub+mathml file output by calibre and make it into valid epub 3. All it does is change the doctypes and add properties="mathml" to each html item in the package document.
Code:
unless File.exist?(infile) then fatal_error("in patch_epub3: input file #{infile} does not exist") end
Dir.mktmpdir { |tmpdir|
unless system("unzip -qq #{infile} -d #{tmpdir}") then fatal_error("in patch_epub3: unable to unzip file #{infile}") end
package_document = "#{tmpdir}/content.opf"
# EPUB 3.0 spec, section 4.3.4, says we need to declare mathml property in manifest file:
xml = ''
File.open(package_document,'r') { |f|
xml = f.gets(nil) # nil means read whole file
xml.gsub!(/(<item\s+([^\/]|"[^"]*")*\/>)/) {
item = $1 # e.g., item=<item href="ch01_split_000.xhtml" id="html15" media-type="application/xhtml+xml"/>
if item=~/media-type="application\/xhtml\+xml"/ then # don't do images, just html
#$stderr.print "item=#{item}\n"
if item=~/properties="[^"]*"/ then
item.gsub!(/properties="([^"]*)"/) {p=$1.clone; 'properties="'+(p=~/mathml/ ? p : p+" mathml")+'"'}
else
item.gsub!(/<item/,'<item properties="mathml"')
end
#$stderr.print "changed item to #{item}\n"
end # if html
item
}
}
File.open(package_document,'w') { |f| f.print xml }
Dir.entries(tmpdir).each { |x|
file = "#{tmpdir}/#{x}"
if file=~/html\Z/ then
#$stderr.print "file #{file}\n"
html = ''
File.open(file,'r') { |f| html = f.gets(nil) } # nil means read whole file
# first line output by calibre 0.7.44 looks like this: <?xml version='1.0' encoding='utf-8'?>
if html=~/\A<\?xml/ then
html.gsub!(/\A[^\n]*/) {"<!DOCTYPE html>"}
end
File.open(file,'w') { |f| f.print html}
end
}
File.rename(infile,"before_patch_epub3.epub")
unless system("zip -rqj #{infile} #{tmpdir}") then fatal_error("in patch_epub3: unable to rezip file #{infile}") end