#!/usr/bin/python3 from lxml import etree from lxml.etree import tostring filemakerxml = 'meksme-utf8.xml' tree = etree.parse(filemakerxml) root = tree.getroot() #print(root) #print(tostring(tree)) cols = ( 'topic', 'sme', 'desc-sme', 'desc-nb', 'nb', 'sv', 'fi', 'en', 'is', ) resultset = root.find("{http://www.filemaker.com/fmpxmlresult}RESULTSET") words = [] for row in resultset.getchildren(): d = {} index = 0 for col in row.findall("{http://www.filemaker.com/fmpxmlresult}COL"): t = col.getchildren()[0].text if t: import re t = re.sub(r'\s+', ' ', t) d[cols[index]] = t index += 1 #print(d) words.append(d) def langsort(lang, e): if lang in e: return e[lang] else: return e['sme'] def make_glossary_docbook(lang): import lxml.builder E = lxml.builder.ElementMaker( nsmap={ # 'xi': "http://www.w3.org/2001/XInclude", } ) langcodes = ('en', 'nb', 'sme', 'sv', 'fi',) def indexit(entry, wlist, lang=None): for w in wlist.split(","): if "" != w: if lang and '[' not in w: w += "[%s]" % lang entry.append(E.indexterm(E.primary(w))) glossary = E.glosslist() for e in sorted(words, key=lambda x: langsort(lang, x)): if 'topic' not in e: e['topic'] = 'n/a' if lang in e and 'desc-%s' % lang in e: entry = E.glossentry( E.glossterm('%s [%s]' % (e[lang], e['topic'])), ) indexit(entry, e[lang]) lstr = "" for l in langcodes: if l != lang and l in e: lstr += "%s (%s) " % (e[l], l) # Add foreign words to index, split on comma indexit(entry, e[l], l) entry.append(E.glossdef(E.para(e['desc-%s' % lang]))) if "" != lstr: entry.append(E.glossdef(E.para(lstr))) glossary.append(entry) content = lxml.etree.tostring(glossary, pretty_print=True, xml_declaration=True, encoding='UTF-8') # print(content) with open('glossary.xml', 'wb') as f: f.write(content) def make_glossary(lang): make_glossary_docbook(lang) if True: print("Norsk/bokmål") print() make_glossary(lang='nb') else: print("Nordsamisk") print() make_glossary(lang='sme') #print("Engelsk") #print("=====") #print() #make_glossary(lang='en')