Skip to content

Instantly share code, notes, and snippets.

@gabalese
Created March 6, 2014 14:35
Show Gist options
  • Save gabalese/9391122 to your computer and use it in GitHub Desktop.
Save gabalese/9391122 to your computer and use it in GitHub Desktop.
Python script to sort a NCX with no gaps in playOrder.
#! /usr/bin/env python
from lxml import etree as ET
import zipfile as ZIP
import sys
import os
namespaces = { "opf":"http://www.idpf.org/2007/opf",
"dc":"http://purl.org/dc/elements/1.1/",
"ncx":"http://www.daisy.org/z3986/2005/ncx/"
}
info = {}
def sortncx(file):
ncx = parseNCX(file)
ncx.xpath("//ncx:navLabel/ncx:text",namespaces=namespaces)[0].text = info["book_title"]
ncx.xpath("//ncx:docTitle/ncx:text",namespaces=namespaces)[0].text = info["book_title"]
for i in ncx.xpath(".//ncx:navPoint",namespaces=namespaces):
if i[0][0].text == None:
i.getparent().remove(i)
counter = 1
for item in ncx.xpath(".//ncx:navPoint",namespaces=namespaces):
item.set("playOrder",str(counter))
counter += 1
else:
counter = 1
ncx = ET.ElementTree(ncx)
return ET.tostring(ncx)
def parseInfo(file):
global info
try:
f = ZIP.ZipFile(file).read("META-INF/container.xml")
except KeyError:
print "The %s file is not a valid OCF." % str(file)
try:
m = ET.fromstring(f)
info["path_to_opf"] = m[0][0].get("full-path")
root_folder = os.path.dirname(info["path_to_opf"])
except Exception as e:
print e
sys.exit(2)
opf = ET.fromstring(ZIP.ZipFile(file).read(info["path_to_opf"]))
id = opf.xpath("//opf:spine",namespaces=namespaces)[0].get("toc")
info["book_title"] = "".join(opf.xpath("//dc:title/text()",namespaces=namespaces))
info["book_author"] = "".join(opf.xpath("//dc:creator[0]/text()",namespaces=namespaces))
expr = "//*[@id='%s']" % id
info["ncx_name"] = opf.xpath(expr)[0].get("href")
info["path_to_ncx"] = root_folder + "/" + info["ncx_name"]
info.pop("ncx_name")
return info
def parseNCX(file):
ncx = {}
ncx = ET.fromstring(ZIP.ZipFile(file).read(parseInfo(file)["path_to_ncx"]))
return ncx
if __name__ == '__main__':
info = parseInfo(sys.argv[1])
sorted_ncx = sortncx(sys.argv[1])
print sorted_ncx
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment