]> pere.pagekite.me Git - text-mekanikerord.git/commitdiff
Start on script to read filemaker XML source.
authorPetter Reinholdtsen <pere@hungry.com>
Thu, 13 Aug 2020 11:37:11 +0000 (13:37 +0200)
committerPetter Reinholdtsen <pere@hungry.com>
Thu, 13 Aug 2020 11:37:11 +0000 (13:37 +0200)
make-glossary [new file with mode: 0755]

diff --git a/make-glossary b/make-glossary
new file mode 100755 (executable)
index 0000000..6095a00
--- /dev/null
@@ -0,0 +1,66 @@
+#!/usr/bin/python3
+
+from lxml import etree
+from lxml.etree import tostring
+
+tree = etree.parse('mekanikk-1999/meksme-utf8.xml')
+root = tree.getroot()
+
+#print(root)
+#print(tostring(tree))
+
+cols = (
+    'topic', 'sme', 'desc-sme', 'desc-nb', 'nb', 'sv', 'fi', 'en', 'is',
+    'unknown',
+)
+
+resultset = root.find("{http://www.filemaker.com/fmpxmlresult}RESULTSET")
+
+words = []
+for row in resultset.getchildren():
+    d = {}
+    index = 0
+    for col in row.findall("{http://www.filemaker.com/fmpxmlresult}COL"):
+        t = col.getchildren()[0].text
+        if t:
+            import re
+            t = re.sub(r'\s+', ' ', t)
+            d[cols[index]] = t
+        index += 1
+    #print(d)
+    words.append(d)
+
+def make_glossary(lang):
+    print(".. glossary::")
+    print()
+
+    def langsort(e):
+        if lang in e:
+            return e[lang]
+        else:
+            return e['sme']
+    for e in sorted(words, key=langsort):
+        if lang in e and 'desc-%s' % lang in e:
+            if 'topic' not in e:
+                e['topic'] = 'n/a'
+            #print(e)
+            print("  %s [%s]\n     %s" % (e[lang], e['topic'], e['desc-%s' % lang]))
+            print()
+        else:
+            # ERROR / missing definition
+            pass
+
+print("Nordsamisk")
+print("==========")
+print()
+make_glossary(lang='sme')
+
+print("Norsk")
+print("=====")
+print()
+make_glossary(lang='nb')
+
+#print("Engelsk")
+#print("=====")
+#print()
+#make_glossary(lang='en')