]> pere.pagekite.me Git - text-mekanikerord.git/blobdiff - make-glossary
Expand and correct nb/nn entry.
[text-mekanikerord.git] / make-glossary
index 612fa52b6359006f256c5a0d3f1fbc1757086883..51c94bf291b092896226ead78fe549168ba3c88f 100755 (executable)
@@ -1,5 +1,7 @@
 #!/usr/bin/python3
 
+import locale
+
 from lxml import etree
 from lxml.etree import tostring
 
@@ -55,11 +57,11 @@ for row in resultset.getchildren():
     words.append(d)
 def langsort(lang, e):
     if lang in e:
-        return e[lang]
+        return locale.strxfrm(e[lang])
     else:
-        return e['sme']
+        return locale.strxfrm(e['sme'])
 
-def make_glossary_docbook(lang, langcodes):
+def make_glossary_docbook(lang, desccodes, langcodes, output='glossary.xml'):
     import lxml.builder
     E = lxml.builder.ElementMaker(
         nsmap={
@@ -73,13 +75,28 @@ def make_glossary_docbook(lang, langcodes):
                 if lang and '[' not in w:
                     w += "[%s]" % lang
                 entry.append(E.indexterm(E.primary(w)))
+    ids = {}
     glossary = E.glossary()
     for e in sorted(words, key=lambda x: langsort(lang, x)):
         ldesc = 'desc-%s' % lang
         if 'topic' in e and lang in topicmap:
             e['topic'] = topicmap[lang][e['topic']]
         if lang in e:
-            entry = E.glossentry()
+            w = e[lang].split(',')
+            id = w[0] \
+                .replace('[', '_') \
+                .replace(']', '_') \
+                .replace('(', '_') \
+                .replace(')', '_') \
+                .replace('/', '_') \
+                .replace(' ', '_')
+            while id in ids:
+                id = id + 'x'
+            ids[id] = True
+            if ldesc not in e:
+                print("warning: %s missing %s description" % (e[lang], lang))
+                continue
+            entry = E.glossentry(id=id)
             if list_topic and 'topic' in e:
                 entry.append(E.glossterm('%s [%s]' % (e[lang], e['topic'])))
             else:
@@ -93,38 +110,66 @@ def make_glossary_docbook(lang, langcodes):
                     indexit(entry, e[l], l)
             if "" != lstr:
                 entry.append(E.glossdef(E.para(lstr)))
-            if ldesc in e:
-                entry.append(E.glossdef(E.para(e[ldesc])))
+            for desccode in desccodes:
+                codestr = 'desc-%s' % desccode
+                if codestr in e:
+                    entry.append(E.glossdef(E.para("%s: %s" % (desccode,
+                                                               e[codestr]))))
             glossary.append(entry)
 
-    if False: # failed to set docbook glossary like xmlto and lint want it...
-      glossary =\
-        E.glossary(E.title("x"),
-                   E.glossdiv(E.title("y"),
-                              glossary))
+            # Add See also entries pointing to main entry
+            if 1 < len(w):
+                for t in w[1:]:
+                    t = t.strip().lstrip()
+                    entry = E.glossentry()
+                    entry.append(E.glossterm(t))
+                    # FIXME
+                    entry.append(E.glosssee(otherterm=id))
+                glossary.append(entry)
 
+    def glosstermlocale(x):
+        # Look up glossterm (FIXME figure out more robust way)
+        t = x.getchildren()[0].text
+        if t:
+            return locale.strxfrm(t)
+        else:
+            return ""
+    # Sort list to mix seealso entries into their correct location.
+    glossary[:] = sorted(glossary, key=glosstermlocale)
+    
     content = lxml.etree.tostring(glossary,
                                   pretty_print=True,
                                   xml_declaration=True,
                                   encoding='UTF-8')
 #    print(content)
-    with open('glossary.xml', 'wb') as f:
+    with open(output, 'wb') as f:
         f.write(content)
 
-focus = 'nb'
-#focus = 'sme'
-#focus = 'sv'
-#focus = 'en'
+import argparse
+parser = argparse.ArgumentParser()
+parser.add_argument("langcode", help="language code to generate glossary for")
+parser.add_argument("--output", help="where to store the glossary")
+args = parser.parse_args()
+
+locale.setlocale(locale.LC_ALL, '')
 
-if 'nb' == focus:
+if 'nb' == args.langcode:
     print("Norsk/bokmÃ¥l")
     print()
-    make_glossary_docbook(lang='nb', langcodes=('en', 'sme', 'sv', 'da', 'fi', 'is',))
-elif 'sme' == focus:
+    make_glossary_docbook(lang='nb', desccodes=('nb',),
+                          langcodes=('en', 'sme', 'sv', 'da', 'fi', 'is',),
+                          output=args.output)
+elif 'sme' == args.langcode:
     print("Nordsamisk")
     print()
-    make_glossary_docbook(lang='sme', langcodes=('nb', 'en', 'sv', 'da', 'fi', 'is',))
-elif 'en' == focus:
+    make_glossary_docbook(lang='sme', desccodes=('sme', 'nb'),
+                          langcodes=('nb', 'en', 'sv', 'da', 'fi', 'is',),
+                          output=args.output)
+elif 'en' == args.langcode:
     print("Engelsk")
     print()
-    make_glossary_docbook(lang='en',  langcodes=('en', 'nb', 'sme', 'sv', 'da', 'fi', 'is',))
+    make_glossary_docbook(lang='en', desccodes=('en', 'nb'),
+                          langcodes=('en', 'nb', 'sme', 'sv', 'da', 'fi', 'is',),
+                          output=args.output)
+else:
+    print("error: Unknown language code %s" % args.langcode)