]> pere.pagekite.me Git - text-mekanikerord.git/blob - make-glossary
Start on script to read filemaker XML source.
[text-mekanikerord.git] / make-glossary
1 #!/usr/bin/python3
2
3 from lxml import etree
4 from lxml.etree import tostring
5
6 tree = etree.parse('mekanikk-1999/meksme-utf8.xml')
7 root = tree.getroot()
8
9 #print(root)
10 #print(tostring(tree))
11
12 cols = (
13 'topic', 'sme', 'desc-sme', 'desc-nb', 'nb', 'sv', 'fi', 'en', 'is',
14 'unknown',
15 )
16
17 resultset = root.find("{http://www.filemaker.com/fmpxmlresult}RESULTSET")
18
19 words = []
20 for row in resultset.getchildren():
21 d = {}
22 index = 0
23 for col in row.findall("{http://www.filemaker.com/fmpxmlresult}COL"):
24 t = col.getchildren()[0].text
25 if t:
26 import re
27 t = re.sub(r'\s+', ' ', t)
28 d[cols[index]] = t
29 index += 1
30 #print(d)
31 words.append(d)
32
33 def make_glossary(lang):
34 print(".. glossary::")
35 print()
36
37 def langsort(e):
38 if lang in e:
39 return e[lang]
40 else:
41 return e['sme']
42 for e in sorted(words, key=langsort):
43 if lang in e and 'desc-%s' % lang in e:
44 if 'topic' not in e:
45 e['topic'] = 'n/a'
46 #print(e)
47 print(" %s [%s]\n %s" % (e[lang], e['topic'], e['desc-%s' % lang]))
48 print()
49 else:
50 # ERROR / missing definition
51 pass
52
53 print("Nordsamisk")
54 print("==========")
55 print()
56 make_glossary(lang='sme')
57
58 print("Norsk")
59 print("=====")
60 print()
61 make_glossary(lang='nb')
62
63 #print("Engelsk")
64 #print("=====")
65 #print()
66 #make_glossary(lang='en')