]> pere.pagekite.me Git - text-madewithcc.git/blob - fixup-docbook.rb
a201d9a40d6e8a0bab982a08fe52df8e54655b3c
[text-madewithcc.git] / fixup-docbook.rb
1 #!/usr/bin/ruby
2 # coding: utf-8
3
4 require 'nokogiri'
5
6 LogLevel=1
7 raise ArgumentError, 'Source/destination files not specified' if ARGV.size != 2
8 srcfile = ARGV[0]
9 dstfile = ARGV[1]
10
11 f=File.open(srcfile)
12 xml = Nokogiri::XML(f)
13
14 def log(level,what)
15 indicators = %w(! • # -)
16 if level >= LogLevel
17 print indicators[level]
18 else
19 print "\n%s %s\n" % ['*' * (level+1), what]
20 end
21 end
22
23 def partreplace(xml, partid, tag)
24 xml.css('part[id=' + partid + ']').each do |part|
25 part.name = tag
26 end
27 end
28
29
30 log 0, 'replace article* with book*'
31 xml.css('articleinfo').each do |node|
32 node.name = 'bookinfo'
33 node.last_element_child.after(<<'XML')
34 <copyright>
35 <year>2017</year>
36 <holder>Creative Commons</holder>
37 </copyright>
38 XML
39 node.last_element_child.after(<<'XML')
40 <publisher>
41 <publishername>Instituto de Investigaciones Económicas</publishername>
42 <address><city>Universidad Nacional Autónoma de México</city></address>
43 </publisher>
44 XML
45 end
46 xml.css('article').each do |node|
47 node.name = 'book'
48 node['lang'] = 'en'
49 end
50
51 log 0, 'change parts to colophon, dedication and chapter'
52 partreplace(xml, 'colophon', 'colophon')
53 partreplace(xml, 'dedication', 'dedication')
54 partreplace(xml, 'foreword', 'preface')
55 partreplace(xml, 'introduction', 'preface')
56 partreplace(xml, 'bibliography', 'appendix')
57 partreplace(xml, 'acknowledgments', 'appendix')
58
59 log 0, 'place part introduction into <partintro>'
60 s = xml.xpath("//part/title[text()='The Case Studies']")[0]
61 if s
62 s.after('<partintro>')
63 p = xml.css('part partintro')[0]
64 s.parent.xpath("//part/para").each do |node|
65 node.parent = p
66 end
67 end
68
69
70 log 0, 'remove empty notes/web links sections'
71 [
72 'Notes',
73 'Web links',
74 'Web link',
75 ].each do |title|
76 xml.xpath("//title[text()='%s']" % title).each do |node|
77 p = node.parent
78 node.remove
79 if p.content =~ /^\s*$/
80 p.remove
81 else
82 raise RuntimeError, 'Non-empty «%s» found' % title
83 end
84 end
85 end
86
87 log 0, 'remove title from dedication'
88 if ! xml.css('dedication title').empty?
89 xml.css('dedication title')[0].content = ""
90 end
91
92 log 0, 're-styling dedication, preface and introduction authors'
93 xml.css('dedication').each do |ded|
94 # Take content out of the quote marks, and into the blockquote
95 quote = ded.css('quote').first
96 auth = ded.css('itemizedlist').first
97 quote_c = quote.content
98 auth_c = auth.content
99 # Styling the attribution (including the introducing hyphen) should
100 # be left to the following layers
101 auth_c.gsub!(/\s+/, ' ')
102 c = ded.css('quote').first.content
103 quote.parent.remove # remove the '<para>' containing the quote
104 auth.remove
105 ded.last_element_child.after('<blockquote><attribution>%s</attribution><para>%s</para></blockquote>' %
106 [auth_c, quote_c])
107 end
108
109 %w(preface#foreword preface#introduction).each do |spec|
110 sect = xml.css(spec)[0]
111 paras = sect.css('para emphasis')
112 auth = '<blockquote><attribution>'
113 auth += paras.map{|p| p.content}.join('<?latex \newline ?>')
114 auth += '</attribution><para/></blockquote>'
115 sect.last_element_child.after(auth)
116 paras.each {|i| i.parent.remove} # Remove both the 'emphasis' and its parent 'para'
117 end
118
119 auth = xml.css('chapter')[0].css('para')[0]
120 auth.content =~ /^\s*Paul Stacey\s*$/ or
121 raise RuntimeError, 'Error finding author name in chapter 1 -- ' + auth.content
122 auth.before('<blockquote><attribution>Paul Stacey</attribution><para/></blockquote>')
123 auth.remove
124
125 auth = xml.css('chapter')[1].css('para')[0]
126 auth.content =~ /^\s*Sarah Hinchliff Pearson.*$/ or
127 raise RuntimeError, 'Error finding author name in chapter 2 -- ' + auth.content
128 auth.before('<blockquote><attribution>Sarah Hinchliff Pearson</attribution><para/></blockquote>')
129 auth.remove
130
131 log 0, 'move legal notice to bookinfo'
132 xml.css('book bookinfo')[0].last_element_child.after('<legalnotice>')
133 ln = xml.css('book bookinfo legalnotice')[0]
134 xml.css('para').each do |para|
135 if para.content =~ /This book is published under a/
136 log 0, 'found legal'
137 para.parent = ln
138 break
139 end
140 end
141
142 log 0, 'replace colophon page with one for this edition'
143 xml.xpath('//colophon/para').remove
144 s = xml.xpath('//colophon')[0]
145 s.first_element_child.after(<<'XML')
146 <para>Made with Creative Commons by Paul Stacey and Sarah Hinchliff Pearson</para>
147
148 <para>© 2017 by the Creative Commons Foundation.</para>
149
150 <para>Published under a Creative Commons Attribution-ShareAlike
151 license (CC BY-SA), version 4.0.</para>
152
153 <para>The license means that you can copy, redistribute, remix,
154 transform, and build upon the content for any purpose, even
155 commercially, as long as you give appropriate credit, provide a link
156 to the license, and indicate if changes were made. If you remix,
157 transform, or build upon the material, you must distribute your
158 contributions under the same license as the original. License details:
159 <ulink url="http://creativecommons.org/licenses/by-sa/4.0/"/></para>
160
161 <para>Illustrations by Bryan Mathers,
162 <ulink url="https://bryanmmathers.com/"/>.</para>
163
164 <para>Publisher: Gunnar Wolf.</para>
165
166 <para>
167 <!--space for information about translators-->
168 &nbsp;
169 </para>
170
171 <para>Made With Creative Commons was originally published with the
172 kind support of Creative Commons and backers of our
173 crowdfunding-campaign on the Kickstarter.com platform.</para>
174
175 <para>This edition of the book is maintained on
176 <ulink url="https://gitlab.com/gunnarwolf/madewithcc-es/"/>, and the
177 translations are maintained on
178 <ulink url="https://hosted.weblate.org/projects/madewithcc/"/>. If
179 you find any error in the book, please let us know.</para>
180
181 <para>ISBN: YET-TO-BE-DECIDED (PDF), YET-TO-BE-DECIDED (ePub),
182 YET-TO-BE-DECIDED (Paperback) </para>
183
184 <para><ulink url="https://madewith.cc/"/></para>
185
186 <para>
187 (Dewey) 346.048, 347.78
188 </para>
189
190 <para>
191 (US Library of Congress) Z286 O63 S73 2017
192 </para>
193
194 <para>
195 (Melvil) 025.523
196 </para>
197
198 XML
199
200 log 0, 'remove title from colophon'
201 xml.css('colophon title')[0].content = ""
202
203 log 0, 'change CC logo images to informalfigure'
204 xml.css('figure mediaobject imageobject imagedata[width="40.0%"]').each do |id|
205 f = id.parent.parent.parent
206 f.name = 'informalfigure'
207 end
208
209 log 0, 'assigning IDs to formal figures'
210 seq = 1
211 xml.css('figure').each do |fig|
212 fig['id'] = 'fig-%d' % seq
213 seq = seq + 1
214 end
215
216 log 0, 'Set epigraph line in each case study'
217 xml.css('blockquote para').select {|p| p.inner_text =~ /Profile written by/}.each do |epi|
218 p = epi.parent
219 by = epi.inner_text
220 epi.remove
221 p.first_element_child.before('<attribution>%s</attribution></para>' % by)
222 end
223
224
225 # Disabled as dblatex do not understand chapter/chapterinfo/author,
226 # see <URL: https://bugs.debian.org/891183 >.
227 if false
228 log 0, 'migrate chapter author into <chapterinfo> where relevant'
229 xml.css('chapter para').each do |para|
230 if para.content =~ /^\s*((Paul|Sarah Hinchliff) (Stacey|Pearson))\s*$/
231 log 1, 'migrated %s %s' % [$2, $3]
232 para.parent.css('title')[0].before(<<'XML' % [$2, $3])
233 <chapterinfo>
234 <author>
235 <firstname>%s</firstname><surname>%s</surname>
236 </author>
237 </chapterinfo>
238 XML
239 para.remove
240 end
241 end
242 end
243
244 log 0, 'replace "Fig. \#." with docbook figure xref'
245 xml.css("para").each do |para|
246 xml_text = para.to_xml(:skip_instruct => true).to_s
247 xml_text.gsub!(/Fig\.\s+(\d)\.?/,
248 'Fig. <xref xrefstyle="template:%n" linkend="fig-\\1"/>')
249 para.after(xml_text)
250 para.remove
251 end
252
253 log 0, 'Writing processed file'
254 # Unable to figure out API way to replace DOCTYPE
255 data = xml.to_xml().gsub!(/DOCTYPE article/, 'DOCTYPE book')
256 File.open(dstfile, 'w') {|f| f.write(data)}