From: Petter Reinholdtsen Date: Thu, 13 Aug 2020 11:37:11 +0000 (+0200) Subject: Start on script to read filemaker XML source. X-Git-Tag: published-en-nb-2021-07-17~92 X-Git-Url: https://pere.pagekite.me/gitweb/text-mekanikerord.git/commitdiff_plain/3f579fcd820586b100d6cae4dc0d02ba2309e533 Start on script to read filemaker XML source. --- diff --git a/make-glossary b/make-glossary new file mode 100755 index 0000000..6095a00 --- /dev/null +++ b/make-glossary @@ -0,0 +1,66 @@ +#!/usr/bin/python3 + +from lxml import etree +from lxml.etree import tostring + +tree = etree.parse('mekanikk-1999/meksme-utf8.xml') +root = tree.getroot() + +#print(root) +#print(tostring(tree)) + +cols = ( + 'topic', 'sme', 'desc-sme', 'desc-nb', 'nb', 'sv', 'fi', 'en', 'is', + 'unknown', +) + +resultset = root.find("{http://www.filemaker.com/fmpxmlresult}RESULTSET") + +words = [] +for row in resultset.getchildren(): + d = {} + index = 0 + for col in row.findall("{http://www.filemaker.com/fmpxmlresult}COL"): + t = col.getchildren()[0].text + if t: + import re + t = re.sub(r'\s+', ' ', t) + d[cols[index]] = t + index += 1 + #print(d) + words.append(d) + +def make_glossary(lang): + print(".. glossary::") + print() + + def langsort(e): + if lang in e: + return e[lang] + else: + return e['sme'] + for e in sorted(words, key=langsort): + if lang in e and 'desc-%s' % lang in e: + if 'topic' not in e: + e['topic'] = 'n/a' + #print(e) + print(" %s [%s]\n %s" % (e[lang], e['topic'], e['desc-%s' % lang])) + print() + else: + # ERROR / missing definition + pass + +print("Nordsamisk") +print("==========") +print() +make_glossary(lang='sme') + +print("Norsk") +print("=====") +print() +make_glossary(lang='nb') + +#print("Engelsk") +#print("=====") +#print() +#make_glossary(lang='en')