X-Git-Url: https://pere.pagekite.me/gitweb/text-mekanikerord.git/blobdiff_plain/5af55c944483ca3521476ab3d361ce90a9dfc357..43be5e6f2aaa1c7b726c9867b67a56ff8cef8314:/make-glossary diff --git a/make-glossary b/make-glossary index c54b02c..51c94bf 100755 --- a/make-glossary +++ b/make-glossary @@ -1,8 +1,12 @@ #!/usr/bin/python3 +import locale + from lxml import etree from lxml.etree import tostring +list_topic = False + filemakerxml = 'meksme-utf8.xml' tree = etree.parse(filemakerxml) @@ -53,11 +57,11 @@ for row in resultset.getchildren(): words.append(d) def langsort(lang, e): if lang in e: - return e[lang] + return locale.strxfrm(e[lang]) else: - return e['sme'] + return locale.strxfrm(e['sme']) -def make_glossary_docbook(lang, langcodes): +def make_glossary_docbook(lang, desccodes, langcodes, output='glossary.xml'): import lxml.builder E = lxml.builder.ElementMaker( nsmap={ @@ -71,14 +75,29 @@ def make_glossary_docbook(lang, langcodes): if lang and '[' not in w: w += "[%s]" % lang entry.append(E.indexterm(E.primary(w))) - glossary = E.glosslist() + ids = {} + glossary = E.glossary() for e in sorted(words, key=lambda x: langsort(lang, x)): ldesc = 'desc-%s' % lang if 'topic' in e and lang in topicmap: e['topic'] = topicmap[lang][e['topic']] if lang in e: - entry = E.glossentry() - if 'topic' in e: + w = e[lang].split(',') + id = w[0] \ + .replace('[', '_') \ + .replace(']', '_') \ + .replace('(', '_') \ + .replace(')', '_') \ + .replace('/', '_') \ + .replace(' ', '_') + while id in ids: + id = id + 'x' + ids[id] = True + if ldesc not in e: + print("warning: %s missing %s description" % (e[lang], lang)) + continue + entry = E.glossentry(id=id) + if list_topic and 'topic' in e: entry.append(E.glossterm('%s [%s]' % (e[lang], e['topic']))) else: entry.append(E.glossterm(e[lang])) @@ -91,38 +110,66 @@ def make_glossary_docbook(lang, langcodes): indexit(entry, e[l], l) if "" != lstr: entry.append(E.glossdef(E.para(lstr))) - if ldesc in e: - entry.append(E.glossdef(E.para(e[ldesc]))) + for desccode in desccodes: + codestr = 'desc-%s' % desccode + if codestr in e: + entry.append(E.glossdef(E.para("%s: %s" % (desccode, + e[codestr])))) glossary.append(entry) - if False: # failed to set docbook glossary like xmlto and lint want it... - glossary =\ - E.glossary(E.title("x"), - E.glossdiv(E.title("y"), - glossary)) + # Add See also entries pointing to main entry + if 1 < len(w): + for t in w[1:]: + t = t.strip().lstrip() + entry = E.glossentry() + entry.append(E.glossterm(t)) + # FIXME + entry.append(E.glosssee(otherterm=id)) + glossary.append(entry) + def glosstermlocale(x): + # Look up glossterm (FIXME figure out more robust way) + t = x.getchildren()[0].text + if t: + return locale.strxfrm(t) + else: + return "" + # Sort list to mix seealso entries into their correct location. + glossary[:] = sorted(glossary, key=glosstermlocale) + content = lxml.etree.tostring(glossary, pretty_print=True, xml_declaration=True, encoding='UTF-8') # print(content) - with open('glossary.xml', 'wb') as f: + with open(output, 'wb') as f: f.write(content) -focus = 'nb' -#focus = 'sme' -#focus = 'sv' -#focus = 'en' +import argparse +parser = argparse.ArgumentParser() +parser.add_argument("langcode", help="language code to generate glossary for") +parser.add_argument("--output", help="where to store the glossary") +args = parser.parse_args() + +locale.setlocale(locale.LC_ALL, '') -if 'nb' == focus: +if 'nb' == args.langcode: print("Norsk/bokmål") print() - make_glossary_docbook(lang='nb', langcodes=('en', 'sme', 'sv', 'da', 'fi', 'is',)) -elif 'sme' == focus: + make_glossary_docbook(lang='nb', desccodes=('nb',), + langcodes=('en', 'sme', 'sv', 'da', 'fi', 'is',), + output=args.output) +elif 'sme' == args.langcode: print("Nordsamisk") print() - make_glossary_docbook(lang='sme', langcodes=('nb', 'en', 'sv', 'da', 'fi', 'is',)) -elif 'en' == focus: + make_glossary_docbook(lang='sme', desccodes=('sme', 'nb'), + langcodes=('nb', 'en', 'sv', 'da', 'fi', 'is',), + output=args.output) +elif 'en' == args.langcode: print("Engelsk") print() - make_glossary_docbook(lang='en', langcodes=('en', 'nb', 'sme', 'sv', 'da', 'fi', 'is',)) + make_glossary_docbook(lang='en', desccodes=('en', 'nb'), + langcodes=('en', 'nb', 'sme', 'sv', 'da', 'fi', 'is',), + output=args.output) +else: + print("error: Unknown language code %s" % args.langcode)