#!/usr/bin/python3 import locale from lxml import etree from lxml.etree import tostring list_topic = False filemakerxml = 'meksme-utf8.xml' tree = etree.parse(filemakerxml) root = tree.getroot() #print(root) #print(tostring(tree)) cols = ( 'topic', 'sme', 'desc-sme', 'desc-nb', 'nb', 'sv', 'fi', 'en', 'is', ) topicmap = { 'nb' : { 'fáddá': 'tema', 'ávnnas': 'emne', 'eanan': 'land', 'biras': 'miljø', 'huksen': 'bygg', 'bohcci': 'rør', 'data': 'data', 'hydr': 'hydraulikk', 'fys': 'fysikk', 'sveis': 'sveising', 'mihttu': 'måling', 'elektro': 'elektro', 'neavvu': 'verktøy', 'mohtor': 'motor', 'mašiidna': 'maskin', 'fuolahas': 'bearbeiding', } } resultset = root.find("{http://www.filemaker.com/fmpxmlresult}RESULTSET") words = [] for row in resultset.getchildren(): d = {} index = 0 for col in row.findall("{http://www.filemaker.com/fmpxmlresult}COL"): t = col.getchildren()[0].text if t: import re t = re.sub(r'\s+', ' ', t) d[cols[index]] = t index += 1 #print(d) words.append(d) def langsort(lang, e): if lang in e: return locale.strxfrm(e[lang]) else: return locale.strxfrm(e['sme']) def make_glossary_docbook(lang, desccodes, langcodes, output='glossary.xml'): import lxml.builder E = lxml.builder.ElementMaker( nsmap={ # 'xi': "http://www.w3.org/2001/XInclude", } ) def indexit(entry, wlist, lang=None): for w in wlist.split(","): if "" != w: if lang and '[' not in w: w += "[%s]" % lang entry.append(E.indexterm(E.primary(w))) glossary = E.glossary() for e in sorted(words, key=lambda x: langsort(lang, x)): ldesc = 'desc-%s' % lang if 'topic' in e and lang in topicmap: e['topic'] = topicmap[lang][e['topic']] if lang in e: if ldesc not in e: print("warning: %s missing %s description" % (e[lang], lang)) continue entry = E.glossentry() if list_topic and 'topic' in e: entry.append(E.glossterm('%s [%s]' % (e[lang], e['topic']))) else: entry.append(E.glossterm(e[lang])) indexit(entry, e[lang]) lstr = "" for l in langcodes: if l != lang and l in e: lstr += "%s (%s) " % (e[l], l) # Add foreign words to index, split on comma indexit(entry, e[l], l) if "" != lstr: entry.append(E.glossdef(E.para(lstr))) for desccode in desccodes: codestr = 'desc-%s' % desccode if codestr in e: entry.append(E.glossdef(E.para("%s: %s" % (desccode, e[codestr])))) glossary.append(entry) if False: # failed to set docbook glossary like xmlto and lint want it... glossary =\ E.glossary(E.title("x"), E.glossdiv(E.title("y"), glossary)) content = lxml.etree.tostring(glossary, pretty_print=True, xml_declaration=True, encoding='UTF-8') # print(content) with open(output, 'wb') as f: f.write(content) import argparse parser = argparse.ArgumentParser() parser.add_argument("langcode", help="language code to generate glossary for") parser.add_argument("--output", help="where to store the glossary") args = parser.parse_args() locale.setlocale(locale.LC_ALL, '') if 'nb' == args.langcode: print("Norsk/bokmål") print() make_glossary_docbook(lang='nb', desccodes=('nb',), langcodes=('en', 'sme', 'sv', 'da', 'fi', 'is',), output=args.output) elif 'sme' == args.langcode: print("Nordsamisk") print() make_glossary_docbook(lang='sme', desccodes=('sme', 'nb'), langcodes=('nb', 'en', 'sv', 'da', 'fi', 'is',), output=args.output) elif 'en' == args.langcode: print("Engelsk") print() make_glossary_docbook(lang='en', desccodes=('en', 'nb'), langcodes=('en', 'nb', 'sme', 'sv', 'da', 'fi', 'is',), output=args.output) else: print("error: Unknown language code %s" % args.langcode)