]> pere.pagekite.me Git - text-mekanikerord.git/blob - make-glossary
a3f91499181bc5601a63b5bad91dd58a26b735e8
[text-mekanikerord.git] / make-glossary
1 #!/usr/bin/python3
2
3 from lxml import etree
4 from lxml.etree import tostring
5
6 list_topic = False
7
8 filemakerxml = 'meksme-utf8.xml'
9
10 tree = etree.parse(filemakerxml)
11 root = tree.getroot()
12
13 #print(root)
14 #print(tostring(tree))
15
16 cols = (
17 'topic', 'sme', 'desc-sme', 'desc-nb', 'nb', 'sv', 'fi', 'en', 'is',
18 )
19
20 topicmap = {
21 'nb' : {
22 'fáddá': 'tema',
23 'ávnnas': 'emne',
24 'eanan': 'land',
25 'biras': 'miljø',
26 'huksen': 'bygg',
27 'bohcci': 'rør',
28 'data': 'data',
29 'hydr': 'hydraulikk',
30 'fys': 'fysikk',
31 'sveis': 'sveising',
32 'mihttu': 'måling',
33 'elektro': 'elektro',
34 'neavvu': 'verktøy',
35 'mohtor': 'motor',
36 'mašiidna': 'maskin',
37 'fuolahas': 'bearbeiding',
38 }
39 }
40
41 resultset = root.find("{http://www.filemaker.com/fmpxmlresult}RESULTSET")
42
43 words = []
44 for row in resultset.getchildren():
45 d = {}
46 index = 0
47 for col in row.findall("{http://www.filemaker.com/fmpxmlresult}COL"):
48 t = col.getchildren()[0].text
49 if t:
50 import re
51 t = re.sub(r'\s+', ' ', t)
52 d[cols[index]] = t
53 index += 1
54 #print(d)
55 words.append(d)
56 def langsort(lang, e):
57 if lang in e:
58 return e[lang]
59 else:
60 return e['sme']
61
62 def make_glossary_docbook(lang, langcodes, output='glossary.xml'):
63 import lxml.builder
64 E = lxml.builder.ElementMaker(
65 nsmap={
66 # 'xi': "http://www.w3.org/2001/XInclude",
67 }
68 )
69
70 def indexit(entry, wlist, lang=None):
71 for w in wlist.split(","):
72 if "" != w:
73 if lang and '[' not in w:
74 w += "[%s]" % lang
75 entry.append(E.indexterm(E.primary(w)))
76 glossary = E.glossary()
77 for e in sorted(words, key=lambda x: langsort(lang, x)):
78 ldesc = 'desc-%s' % lang
79 if 'topic' in e and lang in topicmap:
80 e['topic'] = topicmap[lang][e['topic']]
81 if lang in e:
82 if ldesc not in e:
83 print("warning: %s missing %s description" % (e[lang], lang))
84 continue
85 entry = E.glossentry()
86 if list_topic and 'topic' in e:
87 entry.append(E.glossterm('%s [%s]' % (e[lang], e['topic'])))
88 else:
89 entry.append(E.glossterm(e[lang]))
90 indexit(entry, e[lang])
91 lstr = ""
92 for l in langcodes:
93 if l != lang and l in e:
94 lstr += "%s (%s) " % (e[l], l)
95 # Add foreign words to index, split on comma
96 indexit(entry, e[l], l)
97 if "" != lstr:
98 entry.append(E.glossdef(E.para(lstr)))
99 if ldesc in e:
100 entry.append(E.glossdef(E.para(e[ldesc])))
101 glossary.append(entry)
102
103 if False: # failed to set docbook glossary like xmlto and lint want it...
104 glossary =\
105 E.glossary(E.title("x"),
106 E.glossdiv(E.title("y"),
107 glossary))
108
109 content = lxml.etree.tostring(glossary,
110 pretty_print=True,
111 xml_declaration=True,
112 encoding='UTF-8')
113 # print(content)
114 with open(output, 'wb') as f:
115 f.write(content)
116
117 import argparse
118 parser = argparse.ArgumentParser()
119 parser.add_argument("langcode", help="language code to generate glossary for")
120 parser.add_argument("--output", help="where to store the glossary")
121 args = parser.parse_args()
122
123 if 'nb' == args.langcode:
124 print("Norsk/bokmål")
125 print()
126 make_glossary_docbook(lang='nb', langcodes=('en', 'sme', 'sv', 'da', 'fi', 'is',), output=args.output)
127 elif 'sme' == args.langcode:
128 print("Nordsamisk")
129 print()
130 make_glossary_docbook(lang='sme', langcodes=('nb', 'en', 'sv', 'da', 'fi', 'is',), output=args.output)
131 elif 'en' == args.langcode:
132 print("Engelsk")
133 print()
134 make_glossary_docbook(lang='en', langcodes=('en', 'nb', 'sme', 'sv', 'da', 'fi', 'is',), output=args.output)
135 else:
136 print("error: Unknown language code %s" % args.langcode)