]>
pere.pagekite.me Git - text-mekanikerord.git/blob - make-glossary
6 from lxml
.etree
import tostring
10 filemakerxml
= 'meksme-utf8.xml'
12 tree
= etree
.parse(filemakerxml
)
16 #print(tostring(tree))
19 'topic', 'sme', 'desc-sme', 'desc-nb', 'nb', 'sv', 'fi', 'en', 'is',
39 'fuolahas': 'bearbeiding',
43 resultset
= root
.find("{http://www.filemaker.com/fmpxmlresult}RESULTSET")
46 for row
in resultset
.getchildren():
49 for col
in row
.findall("{http://www.filemaker.com/fmpxmlresult}COL"):
50 t
= col
.getchildren()[0].text
53 t
= re
.sub(r
'\s+', ' ', t
)
58 def langsort(lang
, e
):
60 return locale
.strxfrm(e
[lang
])
62 return locale
.strxfrm(e
['sme'])
64 def make_glossary_docbook(lang
, desccodes
, langcodes
, output
='glossary.xml'):
66 E
= lxml
.builder
.ElementMaker(
68 # 'xi': "http://www.w3.org/2001/XInclude",
72 def indexit(entry
, wlist
, lang
=None):
73 for w
in wlist
.split(","):
75 if lang
and '[' not in w
:
77 entry
.append(E
.indexterm(E
.primary(w
)))
79 glossary
= E
.glossary()
80 for e
in sorted(words
, key
=lambda x
: langsort(lang
, x
)):
81 ldesc
= 'desc-%s' % lang
82 if 'topic' in e
and lang
in topicmap
:
83 e
['topic'] = topicmap
[lang
][e
['topic']]
85 w
= e
[lang
].split(',')
97 print("warning: %s missing %s description" % (e
[lang
], lang
))
99 entry
= E
.glossentry(id=id)
100 if list_topic
and 'topic' in e
:
101 entry
.append(E
.glossterm('%s [%s]' % (e
[lang
], e
['topic'])))
103 entry
.append(E
.glossterm(e
[lang
]))
104 indexit(entry
, e
[lang
])
107 if l
!= lang
and l
in e
:
108 lstr
+= "%s (%s) " % (e
[l
], l
)
109 # Add foreign words to index, split on comma
110 indexit(entry
, e
[l
], l
)
112 entry
.append(E
.glossdef(E
.para(lstr
)))
113 for desccode
in desccodes
:
114 codestr
= 'desc-%s' % desccode
116 entry
.append(E
.glossdef(E
.para("%s: %s" % (desccode
,
118 glossary
.append(entry
)
120 # Add See also entries pointing to main entry
123 t
= t
.strip().lstrip()
124 entry
= E
.glossentry()
125 entry
.append(E
.glossterm(t
))
127 entry
.append(E
.glosssee(otherterm
=id))
128 glossary
.append(entry
)
130 def glosstermlocale(x
):
131 # Look up glossterm (FIXME figure out more robust way)
132 t
= x
.getchildren()[0].text
134 return locale
.strxfrm(t
)
137 # Sort list to mix seealso entries into their correct location.
138 glossary
[:] = sorted(glossary
, key
=glosstermlocale
)
140 content
= lxml
.etree
.tostring(glossary
,
142 xml_declaration
=True,
145 with open(output
, 'wb') as f
:
149 parser
= argparse
.ArgumentParser()
150 parser
.add_argument("langcode", help="language code to generate glossary for")
151 parser
.add_argument("--output", help="where to store the glossary")
152 args
= parser
.parse_args()
154 locale
.setlocale(locale
.LC_ALL
, '')
156 if 'nb' == args
.langcode
:
157 print("Norsk/bokmål")
159 make_glossary_docbook(lang
='nb', desccodes
=('nb',),
160 langcodes
=('en', 'sme', 'sv', 'da', 'fi', 'is',),
162 elif 'sme' == args
.langcode
:
165 make_glossary_docbook(lang
='sme', desccodes
=('sme', 'nb'),
166 langcodes
=('nb', 'en', 'sv', 'da', 'fi', 'is',),
168 elif 'en' == args
.langcode
:
171 make_glossary_docbook(lang
='en', desccodes
=('en', 'nb'),
172 langcodes
=('en', 'nb', 'sme', 'sv', 'da', 'fi', 'is',),
175 print("error: Unknown language code %s" % args
.langcode
)