from lxml import etree
from lxml.etree import tostring
+import json
+
list_topic = False
filemakerxml = 'meksme-utf8.xml'
#print(tostring(tree))
cols = (
- 'topic', 'sme', 'desc-sme', 'desc-nb', 'nb', 'sv', 'fi', 'en', 'is',
+ 'topic', 'se', 'desc-se', 'desc-nb', 'nb', 'sv', 'fi', 'en', 'is',
)
topicmap = {
index += 1
#print(d)
words.append(d)
+
+ with open('meksme-utf8.json', 'w') as f:
+ json.dump(words, f)
+
def langsort(lang, e):
if lang in e:
return locale.strxfrm(e[lang])
else:
- return locale.strxfrm(e['sme'])
+ return locale.strxfrm(e['se'])
def make_glossary_docbook(lang, desccodes, langcodes, output='glossary.xml'):
import lxml.builder
}
)
+ def word2id(word):
+ return word \
+ .replace('[', '_') \
+ .replace(']', '_') \
+ .replace('(', '_') \
+ .replace(')', '_') \
+ .replace('/', '_') \
+ .replace('\'', '_') \
+ .replace(' ', '_')
+
def indexit(entry, wlist, lang=None):
for w in wlist.split(","):
if "" != w:
w += "[%s]" % lang
entry.append(E.indexterm(E.primary(w)))
ids = {}
+ redirects = {}
glossary = E.glossary()
for e in sorted(words, key=lambda x: langsort(lang, x)):
ldesc = 'desc-%s' % lang
e['topic'] = topicmap[lang][e['topic']]
if lang in e:
w = e[lang].split(',')
- id = w[0] \
- .replace('[', '_') \
- .replace(']', '_') \
- .replace('(', '_') \
- .replace(')', '_') \
- .replace('/', '_') \
- .replace(' ', '_')
+ id = word2id(w[0])
while id in ids:
id = id + 'x'
ids[id] = True
- if ldesc not in e:
- print("warning: %s missing %s description" % (e[lang], lang))
+
+ # First handle redirections with not extra info
+ if -1 != e[lang].find('>') and ldesc not in e:
+ p = e[lang].split(' > ')
+ if p[0] in redirects: # Skip if already added
+ continue
+ if -1 == p[1].find(','):
+ if '-' == p[1][-1]:
+ print("warning: Skipping dangling reference %s -> %s" %
+ (p[0], p[1]))
+ else:
+ seeentry = E.glossentry()
+ seeentry.append(E.glossterm(p[0]))
+ id = word2id(p[1])
+ seeentry.append(E.glosssee(otherterm=id))
+ glossary.append(seeentry)
+ redirects[p[0]] = id
+ else:
+ print("warning: skipping split refererence %s -> %s" %
+ (p[0], p[1]))
+ if False: # Not allowed in docbook
+ seeentry = E.glossentry()
+ seeentry.append(E.glossterm(p[0]))
+ for s in p[1].split(','):
+ s = s.strip().lstrip()
+ seeentry.append(E.glosssee(otherterm=word2id(s)))
+ glossary.append(seeentry)
continue
+
+ # Add See also entries pointing to main entry
+ if 1 < len(w):
+ for t in w[1:]:
+ t = t.strip().lstrip()
+ if t not in redirects:
+ #print("info: Adding see also entry for %s" % t)
+ seeentry = E.glossentry()
+ seeentry.append(E.glossterm(t))
+ seeentry.append(E.glosssee(otherterm=id))
+ glossary.append(seeentry)
+ redirects[t] = id
+ elif ldesc not in e:
+ print("warning: term %s missing primary language %s description" % (e[lang], lang))
entry = E.glossentry(id=id)
if list_topic and 'topic' in e:
entry.append(E.glossterm('%s [%s]' % (e[lang], e['topic'])))
indexit(entry, e[l], l)
if "" != lstr:
entry.append(E.glossdef(E.para(lstr)))
+ else:
+ # only single word witout translations, skip it
+ continue
for desccode in desccodes:
codestr = 'desc-%s' % desccode
if codestr in e:
- entry.append(E.glossdef(E.para("%s: %s" % (desccode,
+ entry.append(E.glossdef(E.para("(%s): %s" % (desccode,
e[codestr]))))
glossary.append(entry)
- # Add See also entries pointing to main entry
- if 1 < len(w):
- for t in w[1:]:
- t = t.strip().lstrip()
- entry = E.glossentry()
- entry.append(E.glossterm(t))
- # FIXME
- entry.append(E.glosssee(otherterm=id))
- glossary.append(entry)
-
def glosstermlocale(x):
# Look up glossterm (FIXME figure out more robust way)
t = x.getchildren()[0].text
return ""
# Sort list to mix seealso entries into their correct location.
glossary[:] = sorted(glossary, key=glosstermlocale)
+
+ l = len(glossary)
+ print("info: dictionary contain %d entries" % l)
content = lxml.etree.tostring(glossary,
pretty_print=True,
print("Norsk/bokmål")
print()
make_glossary_docbook(lang='nb', desccodes=('nb',),
- langcodes=('en', 'sme', 'sv', 'da', 'fi', 'is',),
+ langcodes=('en', 'se', 'sv', 'da', 'fi', 'is',),
output=args.output)
-elif 'sme' == args.langcode:
+elif 'se' == args.langcode:
print("Nordsamisk")
print()
- make_glossary_docbook(lang='sme', desccodes=('sme', 'nb'),
+ make_glossary_docbook(lang='se', desccodes=('se', 'nb'),
langcodes=('nb', 'en', 'sv', 'da', 'fi', 'is',),
output=args.output)
elif 'en' == args.langcode:
print("Engelsk")
print()
- make_glossary_docbook(lang='en', desccodes=('en', 'nb'),
- langcodes=('en', 'nb', 'sme', 'sv', 'da', 'fi', 'is',),
+ make_glossary_docbook(lang='en', desccodes=('en'),
+ langcodes=('en', 'nb', 'se', 'sv', 'da', 'fi', 'is',),
output=args.output)
else:
print("error: Unknown language code %s" % args.langcode)