]> pere.pagekite.me Git - text-madewithcc.git/blob - fixup-docbook.rb
7ed81df2f024c7ffe6494a9d5fcbdc8748b9f1a7
[text-madewithcc.git] / fixup-docbook.rb
1 #!/usr/bin/ruby
2 # coding: utf-8
3
4 require 'nokogiri'
5
6 LogLevel=1
7 raise ArgumentError, 'Source/destination files not specified' if ARGV.size != 2
8 srcfile = ARGV[0]
9 dstfile = ARGV[1]
10
11 f=File.open(srcfile)
12 xml = Nokogiri::XML(f)
13
14 def log(level,what)
15 indicators = %w(! • # -)
16 if level >= LogLevel
17 print indicators[level]
18 else
19 print "\n%s %s\n" % ['*' * (level+1), what]
20 end
21 end
22
23 def partreplace(xml, partid, tag)
24 xml.css('part[id=' + partid + ']').each do |part|
25 part.name = tag
26 end
27 end
28
29
30 log 0, 'replace article* with book*'
31 xml.css('articleinfo').each do |node|
32 node.name = 'bookinfo'
33 node.last_element_child.after(<<'XML')
34 <copyright>
35 <year>2017</year>
36 <holder>Creative Commons</holder>
37 </copyright>
38 XML
39 node.last_element_child.after(<<'XML')
40 <publisher>
41 <publishername>Gunnar Wolf</publishername>
42 <address><city>Mexico City</city></address>
43 </publisher>
44 XML
45 end
46 xml.css('article').each do |node|
47 node.name = 'book'
48 node['lang'] = 'en'
49 end
50
51 log 0, 'change parts to colophon, dedication and chapter'
52 partreplace(xml, 'colophon', 'colophon')
53 partreplace(xml, 'dedication', 'dedication')
54 partreplace(xml, 'foreword', 'preface')
55 partreplace(xml, 'introduction', 'preface')
56 partreplace(xml, 'bibliography', 'chapter')
57 partreplace(xml, 'acknowledgments', 'chapter')
58
59 log 0, 'place part introduction into <partintro>'
60 s = xml.xpath("//part/title[text()='The Case Studies']")[0]
61 if s
62 s.after('<partintro>')
63 p = xml.css('part partintro')[0]
64 s.parent.xpath("//part/para").each do |node|
65 node.parent = p
66 end
67 end
68
69
70 log 0, 'remove empty notes/web links sections'
71 [
72 'Notes',
73 'Web links',
74 'Web link',
75 ].each do |title|
76 xml.xpath("//title[text()='%s']" % title).each do |node|
77 p = node.parent
78 node.remove
79 if p.content =~ /^\s*$/
80 p.remove
81 else
82 raise RuntimeError, 'Non-empty «%s» found' % title
83 end
84 end
85 end
86
87 log 0, 'remove title from dedication'
88 xml.css('dedication title')[0].content = ""
89
90 log 0, 'move legal notice to bookinfo'
91 xml.css('book bookinfo')[0].last_element_child.after('<legalnotice>')
92 ln = xml.css('book bookinfo legalnotice')[0]
93 xml.css('para').each do |para|
94 if para.content =~ /This book is published under a/
95 log 0, 'found legal'
96 para.parent = ln
97 break
98 end
99 end
100
101 log 0, 'replace colophon page with one for this edition'
102 xml.xpath('//colophon/para').remove
103 s = xml.xpath('//colophon')[0]
104 s.first_element_child.after(<<'XML')
105 <para>Made with Creative Commons</para>
106
107 <para>by Paul Stacey & Sarah Hinchliff Pearson</para>
108
109 <para>© 2017 by the Creative Commons Foundation.</para>
110
111 <para>Published under a Creative Commons Attribution-ShareAlike
112 license (CC BY-SA), version 4.0.</para>
113
114 <para>ISBN: YET-TO-BE-DECIDED (PDF), YET-TO-BE-DECIDED (ePub),
115 YET-TO-BE-DECIDED (Paperback) </para>
116
117 <para>Illustrations by Bryan Mathers,
118 <ulink url="https://bryanmmathers.com/"/>.</para>
119
120 <para>Publisher: Gunnar Wolf.</para>
121
122 <para>
123 <!--space for information about translators-->
124 &nbsp;
125 </para>
126
127 <para>Downloadable e-book available at
128 <ulink url="https://madewith.cc/"/>.</para>
129
130 <para>This book is published under a CC BY-SA license, which means that you
131 can copy, redistribute, remix, transform, and build upon the content for
132 any purpose, even commercially, as long as you give appropriate credit,
133 provide a link to the license, and indicate if changes were made. If you
134 remix, transform, or build upon the material, you must distribute your
135 contributions under the same license as the original. License details:
136 <ulink url="http://creativecommons.org/licenses/by-sa/4.0/"/></para>
137
138 <para>Made With Creative Commons is published with the kind support of
139 Creative Commons and backers of our crowdfunding-campaign on the
140 Kickstarter.com platform.</para>
141
142 <para>This edition of the book is maintained on
143 <ulink url="https://gitlab.com/gunnarwolf/madewithcc-es/"/>, and the
144 translations are maintained on
145 <ulink url="https://hosted.weblate.org/projects/madewithcc/"/>. If
146 you find any error in the book, please let us know via gitlab.</para>
147
148 <para>
149 Classifications:
150 </para>
151
152 <para>
153 (Dewey) 346.048, 347.78
154 </para>
155
156 <para>
157 (UDK) ?
158 </para>
159
160 <para>
161 (US Library of Congress) Z286 O63 S73 2017
162 </para>
163
164 <para>
165 (Melvil) 025.523
166 </para>
167
168 <para>
169 (ACM CRCS) ?
170 </para>
171
172 XML
173
174 log 0, 'remove title from colophon'
175 xml.css('colophon title')[0].content = ""
176
177 log 0, 'change CC logo images to informalfigure'
178 xml.css('figure mediaobject imageobject imagedata[width="40.0%"]').each do |id|
179 f = id.parent.parent.parent
180 f.name = 'informalfigure'
181 end
182
183 log 0, 'assigning IDs to formal figures'
184 seq = 1
185 xml.css('figure').each do |fig|
186 fig['id'] = 'fig-%d' % seq
187 seq = seq + 1
188 end
189
190 # Disabled as dblatex do not understand chapter/chapterinfo/author,
191 # see <URL: https://bugs.debian.org/891183 >.
192 if false
193 log 0, 'migrate chapter author into <chapterinfo> where relevant'
194 xml.css('chapter para').each do |para|
195 if para.content =~ /^\s*((Paul|Sarah Hinchliff) (Stacey|Pearson))\s*$/
196 log 1, 'migrated %s %s' % [$2, $3]
197 para.parent.css('title')[0].before(<<'XML' % [$2, $3])
198 <chapterinfo>
199 <author>
200 <firstname>%s</firstname><surname>%s</surname>
201 </author>
202 </chapterinfo>
203 XML
204 para.remove
205 end
206 end
207 end
208
209 log 0, 'Writing processed file'
210 # Unable to figure out API way to replace DOCTYPE
211 data = xml.to_xml().gsub!(/DOCTYPE article/, 'DOCTYPE book')
212 File.open(dstfile, 'w') {|f| f.write(data)}