]>
pere.pagekite.me Git - text-mekanikerord.git/blob - make-glossary
4 from lxml
.etree
import tostring
6 filemakerxml
= 'meksme-utf8.xml'
8 tree
= etree
.parse(filemakerxml
)
12 #print(tostring(tree))
15 'topic', 'sme', 'desc-sme', 'desc-nb', 'nb', 'sv', 'fi', 'en', 'is',
35 'fuolahas': 'bearbeiding',
39 resultset
= root
.find("{http://www.filemaker.com/fmpxmlresult}RESULTSET")
42 for row
in resultset
.getchildren():
45 for col
in row
.findall("{http://www.filemaker.com/fmpxmlresult}COL"):
46 t
= col
.getchildren()[0].text
49 t
= re
.sub(r
'\s+', ' ', t
)
54 def langsort(lang
, e
):
60 def make_glossary_docbook(lang
, langcodes
):
62 E
= lxml
.builder
.ElementMaker(
64 # 'xi': "http://www.w3.org/2001/XInclude",
68 def indexit(entry
, wlist
, lang
=None):
69 for w
in wlist
.split(","):
71 if lang
and '[' not in w
:
73 entry
.append(E
.indexterm(E
.primary(w
)))
74 glossary
= E
.glosslist()
75 for e
in sorted(words
, key
=lambda x
: langsort(lang
, x
)):
76 ldesc
= 'desc-%s' % lang
77 if 'topic' in e
and lang
in topicmap
:
78 e
['topic'] = topicmap
[lang
][e
['topic']]
80 entry
= E
.glossentry()
82 entry
.append(E
.glossterm('%s [%s]' % (e
[lang
], e
['topic'])))
84 entry
.append(E
.glossterm(e
[lang
]))
85 indexit(entry
, e
[lang
])
88 if l
!= lang
and l
in e
:
89 lstr
+= "%s (%s) " % (e
[l
], l
)
90 # Add foreign words to index, split on comma
91 indexit(entry
, e
[l
], l
)
93 entry
.append(E
.glossdef(E
.para(lstr
)))
95 entry
.append(E
.glossdef(E
.para(e
[ldesc
])))
96 glossary
.append(entry
)
98 if False: # failed to set docbook glossary like xmlto and lint want it...
100 E
.glossary(E
.title("x"),
101 E
.glossdiv(E
.title("y"),
104 content
= lxml
.etree
.tostring(glossary
,
106 xml_declaration
=True,
109 with open('glossary.xml', 'wb') as f
:
118 print("Norsk/bokmål")
120 make_glossary_docbook(lang
='nb', langcodes
=('en', 'sme', 'sv', 'da', 'fi', 'is',))
124 make_glossary_docbook(lang
='sme', langcodes
=('nb', 'en', 'sv', 'da', 'fi', 'is',))
128 make_glossary_docbook(lang
='en', langcodes
=('en', 'nb', 'sme', 'sv', 'da', 'fi', 'is',))