]> pere.pagekite.me Git - text-mekanikerord.git/blob - make-glossary
Change to only use several columns in body.
[text-mekanikerord.git] / make-glossary
1 #!/usr/bin/python3
2
3 from lxml import etree
4 from lxml.etree import tostring
5
6 filemakerxml = 'meksme-utf8.xml'
7
8 tree = etree.parse(filemakerxml)
9 root = tree.getroot()
10
11 #print(root)
12 #print(tostring(tree))
13
14 cols = (
15 'topic', 'sme', 'desc-sme', 'desc-nb', 'nb', 'sv', 'fi', 'en', 'is',
16 )
17
18 topicmap = {
19 'nb' : {
20 'fáddá': 'tema',
21 'ávnnas': 'emne',
22 'eanan': 'land',
23 'biras': 'miljø',
24 'huksen': 'bygg',
25 'bohcci': 'rør',
26 'data': 'data',
27 'hydr': 'hydraulikk',
28 'fys': 'fysikk',
29 'sveis': 'sveising',
30 'mihttu': 'måling',
31 'elektro': 'elektro',
32 'neavvu': 'verktøy',
33 'mohtor': 'motor',
34 'mašiidna': 'maskin',
35 'fuolahas': 'bearbeiding',
36 }
37 }
38
39 resultset = root.find("{http://www.filemaker.com/fmpxmlresult}RESULTSET")
40
41 words = []
42 for row in resultset.getchildren():
43 d = {}
44 index = 0
45 for col in row.findall("{http://www.filemaker.com/fmpxmlresult}COL"):
46 t = col.getchildren()[0].text
47 if t:
48 import re
49 t = re.sub(r'\s+', ' ', t)
50 d[cols[index]] = t
51 index += 1
52 #print(d)
53 words.append(d)
54 def langsort(lang, e):
55 if lang in e:
56 return e[lang]
57 else:
58 return e['sme']
59
60 def make_glossary_docbook(lang, langcodes):
61 import lxml.builder
62 E = lxml.builder.ElementMaker(
63 nsmap={
64 # 'xi': "http://www.w3.org/2001/XInclude",
65 }
66 )
67
68 def indexit(entry, wlist, lang=None):
69 for w in wlist.split(","):
70 if "" != w:
71 if lang and '[' not in w:
72 w += "[%s]" % lang
73 entry.append(E.indexterm(E.primary(w)))
74 glossary = E.glosslist()
75 for e in sorted(words, key=lambda x: langsort(lang, x)):
76 ldesc = 'desc-%s' % lang
77 if 'topic' in e and lang in topicmap:
78 e['topic'] = topicmap[lang][e['topic']]
79 if lang in e:
80 entry = E.glossentry()
81 if 'topic' in e:
82 entry.append(E.glossterm('%s [%s]' % (e[lang], e['topic'])))
83 else:
84 entry.append(E.glossterm(e[lang]))
85 indexit(entry, e[lang])
86 lstr = ""
87 for l in langcodes:
88 if l != lang and l in e:
89 lstr += "%s (%s) " % (e[l], l)
90 # Add foreign words to index, split on comma
91 indexit(entry, e[l], l)
92 if "" != lstr:
93 entry.append(E.glossdef(E.para(lstr)))
94 if ldesc in e:
95 entry.append(E.glossdef(E.para(e[ldesc])))
96 glossary.append(entry)
97
98 if False: # failed to set docbook glossary like xmlto and lint want it...
99 glossary =\
100 E.glossary(E.title("x"),
101 E.glossdiv(E.title("y"),
102 glossary))
103
104 content = lxml.etree.tostring(glossary,
105 pretty_print=True,
106 xml_declaration=True,
107 encoding='UTF-8')
108 # print(content)
109 with open('glossary.xml', 'wb') as f:
110 f.write(content)
111
112 focus = 'nb'
113 #focus = 'sme'
114 #focus = 'sv'
115 #focus = 'en'
116
117 if 'nb' == focus:
118 print("Norsk/bokmål")
119 print()
120 make_glossary_docbook(lang='nb', langcodes=('en', 'sme', 'sv', 'da', 'fi', 'is',))
121 elif 'sme' == focus:
122 print("Nordsamisk")
123 print()
124 make_glossary_docbook(lang='sme', langcodes=('nb', 'en', 'sv', 'da', 'fi', 'is',))
125 elif 'en' == focus:
126 print("Engelsk")
127 print()
128 make_glossary_docbook(lang='en', langcodes=('en', 'nb', 'sme', 'sv', 'da', 'fi', 'is',))