]>
pere.pagekite.me Git - text-mekanikerord.git/blob - make-glossary
6 from lxml
.etree
import tostring
12 filemakerxml
= 'meksme-utf8.xml'
14 tree
= etree
.parse(filemakerxml
)
18 #print(tostring(tree))
21 'topic', 'se', 'desc-se', 'desc-nb', 'nb', 'sv', 'fi', 'en', 'is',
41 'fuolahas': 'bearbeiding',
45 resultset
= root
.find("{http://www.filemaker.com/fmpxmlresult}RESULTSET")
48 for row
in resultset
.getchildren():
51 for col
in row
.findall("{http://www.filemaker.com/fmpxmlresult}COL"):
52 t
= col
.getchildren()[0].text
55 t
= re
.sub(r
'\s+', ' ', t
)
61 with open('meksme-utf8.json', 'w') as f
:
64 def langsort(lang
, e
):
66 return locale
.strxfrm(e
[lang
])
68 return locale
.strxfrm(e
['se'])
70 def make_glossary_docbook(lang
, desccodes
, langcodes
, output
='glossary.xml'):
72 E
= lxml
.builder
.ElementMaker(
74 # 'xi': "http://www.w3.org/2001/XInclude",
88 def indexit(entry
, wlist
, lang
=None):
89 for w
in wlist
.split(","):
91 if lang
and '[' not in w
:
93 entry
.append(E
.indexterm(E
.primary(w
)))
96 glossary
= E
.glossary()
97 for e
in sorted(words
, key
=lambda x
: langsort(lang
, x
)):
98 ldesc
= 'desc-%s' % lang
99 if 'topic' in e
and lang
in topicmap
:
100 e
['topic'] = topicmap
[lang
][e
['topic']]
102 w
= e
[lang
].split(',')
108 # First handle redirections with not extra info
109 if -1 != e
[lang
].find('>') and ldesc
not in e
:
110 p
= e
[lang
].split(' > ')
111 if p
[0] in redirects
: # Skip if already added
113 if -1 == p
[1].find(','):
115 print("warning: Skipping dangling reference %s -> %s" %
118 seeentry
= E
.glossentry()
119 seeentry
.append(E
.glossterm(p
[0]))
121 seeentry
.append(E
.glosssee(otherterm
=id))
122 glossary
.append(seeentry
)
125 print("warning: skipping split refererence %s -> %s" %
127 if False: # Not allowed in docbook
128 seeentry
= E
.glossentry()
129 seeentry
.append(E
.glossterm(p
[0]))
130 for s
in p
[1].split(','):
131 s
= s
.strip().lstrip()
132 seeentry
.append(E
.glosssee(otherterm
=word2id(s
)))
133 glossary
.append(seeentry
)
136 # Add See also entries pointing to main entry
139 t
= t
.strip().lstrip()
140 if t
not in redirects
:
141 #print("info: Adding see also entry for %s" % t)
142 seeentry
= E
.glossentry()
143 seeentry
.append(E
.glossterm(t
))
144 seeentry
.append(E
.glosssee(otherterm
=id))
145 glossary
.append(seeentry
)
148 print("warning: term %s missing primary language %s description" % (e
[lang
], lang
))
149 entry
= E
.glossentry(id=id)
150 if list_topic
and 'topic' in e
:
151 entry
.append(E
.glossterm('%s [%s]' % (e
[lang
], e
['topic'])))
153 entry
.append(E
.glossterm(e
[lang
]))
154 indexit(entry
, e
[lang
])
157 if l
!= lang
and l
in e
:
158 lstr
+= "%s (%s) " % (e
[l
], l
)
159 # Add foreign words to index, split on comma
160 indexit(entry
, e
[l
], l
)
162 entry
.append(E
.glossdef(E
.para(lstr
)))
164 # only single word witout translations, skip it
166 for desccode
in desccodes
:
167 codestr
= 'desc-%s' % desccode
169 entry
.append(E
.glossdef(E
.para("(%s): %s" % (desccode
,
171 glossary
.append(entry
)
173 def glosstermlocale(x
):
174 # Look up glossterm (FIXME figure out more robust way)
175 t
= x
.getchildren()[0].text
177 return locale
.strxfrm(t
)
180 # Sort list to mix seealso entries into their correct location.
181 glossary
[:] = sorted(glossary
, key
=glosstermlocale
)
184 print("info: dictionary contain %d entries" % l
)
186 content
= lxml
.etree
.tostring(glossary
,
188 xml_declaration
=True,
191 with open(output
, 'wb') as f
:
195 parser
= argparse
.ArgumentParser()
196 parser
.add_argument("langcode", help="language code to generate glossary for")
197 parser
.add_argument("--output", help="where to store the glossary")
198 args
= parser
.parse_args()
200 locale
.setlocale(locale
.LC_ALL
, '')
202 if 'nb' == args
.langcode
:
203 print("Norsk/bokmål")
205 make_glossary_docbook(lang
='nb', desccodes
=('nb',),
206 langcodes
=('en', 'se', 'sv', 'da', 'fi', 'is',),
208 elif 'se' == args
.langcode
:
211 make_glossary_docbook(lang
='se', desccodes
=('se', 'nb'),
212 langcodes
=('nb', 'en', 'sv', 'da', 'fi', 'is',),
214 elif 'en' == args
.langcode
:
217 make_glossary_docbook(lang
='en', desccodes
=('en'),
218 langcodes
=('en', 'nb', 'se', 'sv', 'da', 'fi', 'is',),
221 print("error: Unknown language code %s" % args
.langcode
)