#!/usr/bin/python # # Verify the ranges of a docbook document, ensuring # endofrange come after startofrange, and the IDs used by startofrange # are unique. from lxml import etree import subprocess def main(): filename = 'freeculture.xml' # make sure entities are looked up / available # Based on idea from # http://stackoverflow.com/questions/14731633/how-to-resolve-external-entities-with-xml-etree-like-lxml-etree proc = subprocess.Popen(['xmllint','--noent',filename],stdout=subprocess.PIPE) output = proc.communicate()[0] doc = ElementTree.parse(StringIO.StringIO(output)) ids = {} order = 0 for it in doc.getroot().xpath('//indexterm'): order = order + 1 indextermclass = None id = None if 'class' in it.attrib: indextermclass = it.attrib['class'] if 'startofrange' == indextermclass: id=it.attrib['id'] if id in ids: print "error: non-unique indexterm id: %s" % id ids[id] = order if 'endofrange' == indextermclass: id = it.attrib['startref'] if id not in ids: print "error: indexterm id=\"%s\" not listed before endofrange" % id print indextermclass, id if __name__ == "main": main()