]>
pere.pagekite.me Git - text-mekanikerord.git/blob - make-glossary
4 from lxml
.etree
import tostring
6 filemakerxml
= 'meksme-utf8.xml'
8 tree
= etree
.parse(filemakerxml
)
12 #print(tostring(tree))
15 'topic', 'sme', 'desc-sme', 'desc-nb', 'nb', 'sv', 'fi', 'en', 'is',
18 resultset
= root
.find("{http://www.filemaker.com/fmpxmlresult}RESULTSET")
21 for row
in resultset
.getchildren():
24 for col
in row
.findall("{http://www.filemaker.com/fmpxmlresult}COL"):
25 t
= col
.getchildren()[0].text
28 t
= re
.sub(r
'\s+', ' ', t
)
33 def langsort(lang
, e
):
39 def make_glossary_docbook(lang
):
41 E
= lxml
.builder
.ElementMaker(
43 # 'xi': "http://www.w3.org/2001/XInclude",
47 langcodes
= ('en', 'nb', 'sme', 'sv', 'fi',)
49 def indexit(entry
, wlist
, lang
=None):
50 for w
in wlist
.split(","):
54 entry
.append(E
.indexterm(E
.primary(w
)))
55 glossary
= E
.glosslist()
56 for e
in sorted(words
, key
=lambda x
: langsort(lang
, x
)):
59 if lang
in e
and 'desc-%s' % lang
in e
:
61 E
.glossterm('%s [%s]' % (e
[lang
], e
['topic'])),
63 indexit(entry
, e
[lang
])
66 if l
!= lang
and l
in e
:
67 lstr
+= "%s (%s) " % (e
[l
], l
)
68 # Add foreign words to index, split on comma
69 indexit(entry
, e
[l
], l
)
70 entry
.append(E
.glossdef(E
.para(e
['desc-%s' % lang
])))
72 entry
.append(E
.glossdef(E
.para(lstr
)))
73 glossary
.append(entry
)
75 content
= lxml
.etree
.tostring(glossary
,
80 with open('glossary.xml', 'wb') as f
:
83 def make_glossary(lang
):
84 make_glossary_docbook(lang
)
89 make_glossary(lang
='nb')
93 make_glossary(lang
='sme')
98 #make_glossary(lang='en')