]> pere.pagekite.me Git - text-madewithcc.git/blob - fixup.rb
Created pt_BR proof reading web page.
[text-madewithcc.git] / fixup.rb
1 #!/usr/bin/ruby
2 # coding: utf-8
3 LogLevel=1
4 raise ArgumentError, 'Source/destination files not specified' if ARGV.size != 2
5 srcfile = ARGV[0]
6 dstfile = ARGV[1]
7
8 data=File.open(srcfile).readlines.map {|l| l.chomp!}
9
10 data.map {|lin| lin.gsub!(/\[\]{#anchor-?\d*}/, '')}
11
12 def log(level,what)
13 indicators = %w(! • # -)
14 if level >= LogLevel
15 print indicators[level]
16 else
17 print "\n%s %s\n" % ['*' * (level+1), what]
18 end
19 end
20
21 # There are several titles that are spread in more than one line. Make
22 # them into translatable sentences.
23 log 0, 'Merging multiline sentences '
24 [ ['Made', '', 'with', '', 'Creative', '', 'Commons'],
25 ['The New','', 'World of', '', 'Digital', '', 'Commons'],
26 ['How', '', 'to Be', '', 'Made with', '', 'Creative', '', 'Commons'],
27 ['Providing a custom service to consumers of your work *', '\[MARKET-BASED\]*'],
28 ['Memberships and individual donations', '*\[RECIPROCITY-BASED\]*'],
29 ['The', '', 'Creative', '', 'Commons', '', 'Licenses'],
30 ['jonathanmann.net and', '', 'jonathanmann.bandcamp.com'],
31 ['PLOS','', '(Public Library of Science)']
32 ].each do |str|
33 # This should be done more generic, more robust... But before
34 # burning brain cells, verify if it's needed!
35 log 1, str
36 matches=[]
37 data.each_with_index do |lin, idx|
38 if lin == str[0]
39 matches << idx
40 end
41 end
42 if matches.size == 0
43 raise RuntimeError, 'Anchor string («%s» for «%s») not found' %
44 [str[0], str.reject {|word| word==''}.join(' ')]
45 elsif matches.size > 1
46 raise RuntimeError, 'Anchor string (%s) appears multiple times: %s' %
47 [str[0], matches.map {|i| i.to_s}.join(', ')]
48 end
49 log 2, 'Matches %s: %d - %s' % [str.join(' '), matches.size, matches.join(',')]
50
51 len = str.size
52 at = data.index(str[0])
53 joined = str.reject {|word| word==''}.join(' ')
54 len.times do |offset|
55 if str[offset] != data[at+offset]
56 raise RuntimeError,
57 'Warning: String does not match ("%s" of "%s", offset %d, book at %d)' %
58 [str[offset], joined, offset, at+offset]
59 end
60 end
61
62 data[at] = joined
63 (str.size - 1).times { data.delete_at(at+1) }
64 end
65
66 log 0, 'correct emphesis in some titles'
67 data.map {|lin| lin.gsub!(/ *\* \\\[/, ' *\[')}
68
69 log 0, 'tag title and author'
70 data.delete_at(1) # Remove unwanted blank line between title and authors
71 data[0].sub!(/^/, '% ') # title
72 data[1].sub!(/^/, '% ') # authors
73 data[1].sub!(/ and /, ';') # authors
74
75 # Mark up headings: Very artisanal and suboptimal, but should do the
76 # trick.
77 #
78 # For every heading, put here the full string and its heading level,
79 # as follows:
80 #
81 # 1 - Part
82 # 2 - Chapter
83 # 3 - Section
84 # 4 - Subsection
85 #
86 # Try to keep this ordered as it appears within the book, as it will
87 # help us spot omissions and mistakes!
88 log 0, 'Mark up headings'
89 [ [1, 'Foreword'],
90 [1, 'Introduction'],
91 [1, 'Part 1'],
92 [1, 'The Big Picture'],
93 [2, 'The New World of Digital Commons'],
94 [3, 'The Commons, the Market, and the State'],
95 [3, 'The Four Aspects of a Resource'],
96 [4, 'Characteristics'],
97 [4, 'People and processes'],
98 [4, 'Norms and rules'],
99 [4, 'Goals'],
100 [3, 'A Short History of the Commons'],
101 [3, 'The Digital Revolution'],
102 [3, 'The Birth of Creative Commons'],
103 [3, 'The Changing Market'],
104 [3, 'Benefits of the Digital Commons'],
105 [3, 'Our Case Studies'],
106 [3, 'Notes'],
107 [2, 'How to Be Made with Creative Commons'],
108 [3, 'Problem Zero: Getting Discovered'],
109 [4, 'Use CC to grow a larger audience'],
110 [4, 'Use CC to get attribution and name recognition'],
111 [4, 'Use CC-licensed content as a marketing tool'],
112 [4, 'Use CC to enable hands-on engagement with your work'],
113 [4, 'Use CC to differentiate yourself'],
114 [3, 'Making Money'],
115 [4, 'Market-based revenue streams'],
116 [4, 'Providing a custom service to consumers of your work *\[MARKET-BASED\]*'],
117 [4, 'Charging for the physical copy *\[MARKET-BASED\]*'],
118 [4, 'Charging for the in-person version *\[MARKET-BASED\]*'],
119 [4, 'Selling merchandise *\[MARKET-BASED\]*'],
120 [4, 'Charging advertisers or sponsors *\[MARKET-BASED\]*'],
121 [4, 'Charging your content creators *\[MARKET-BASED\]*'],
122 [4, 'Charging a transaction fee *\[MARKET-BASED\]*'],
123 [4, 'Providing a service to your creators *\[MARKET-BASED\]*'],
124 [4, 'Licensing a trademark *\[MARKET-BASED\]*'],
125 [4, 'Reciprocity-based revenue streams'],
126 [4, 'Memberships and individual donations *\[RECIPROCITY-BASED\]*'],
127 [4, 'The pay-what-you-want model *\[RECIPROCITY-BASED\]*'],
128 [4, 'Crowdfunding *\[RECIPROCITY-BASED\]*'],
129 [3, 'Making Human Connections'],
130 [4, 'Be human'],
131 [4, 'Be open and accountable'],
132 [4, 'Design for the good actors'],
133 [4, 'Treat humans like, well, humans'],
134 [4, 'State your principles and stick to them'],
135 [4, 'Build a community'],
136 [4, 'Give more to the commons than you take'],
137 [4, 'Involve people in what you do'],
138 [3, 'Notes'],
139 [2, 'The Creative Commons Licenses'],
140 [1, 'Part 2'],
141 [1, 'The Case Studies'],
142 [2, 'Arduino'],
143 [2, 'Ártica'],
144 [2, 'Blender Institute'],
145 [2, 'Cards Against Humanity'],
146 [2, 'The Conversation'],
147 [2, 'Cory Doctorow'],
148 [2, 'Figshare'],
149 [2, 'Figure.NZ'],
150 [2, 'Knowledge Unlatched'],
151 [2, 'Lumen Learning'],
152 [2, 'Jonathan Mann'],
153 [2, 'Noun Project'],
154 [2, 'Open Data Institute'],
155 [2, 'OpenDesk'],
156 [2, 'OpenStax'],
157 [2, 'Amanda Palmer'],
158 [2, 'PLOS (Public Library of Science)'],
159 [2, 'Rijksmuseum'],
160 [2, 'Shareable'],
161 [2, 'Siyavula'],
162 [2, 'SparkFun'],
163 [2, 'TeachAIDS'],
164 [2, 'Tribe of Noise'],
165 [2, 'Wikimedia Foundation'],
166 [1, 'Bibliography'],
167 [1, 'Acknowledgments'],
168
169 ].each do |item|
170 log 1, item.join(' -> ')
171 at = data.index {|i| i == item[1]}
172 if at.nil?
173 raise RuntimeError, 'Heading string (level %d) not found: «%s»' % item
174 end
175 data[at] = '%s %s' % ['#' * item[0], data[at]]
176 end
177
178 # We have the explicit strings "Part 1" and "Part 2" as structural
179 # elements — They are to be generated upon book compilation. Nuke
180 # them.
181 data.delete("# Part 1")
182 data.delete("# Part 2")
183
184 log 0, 'add heading to colophon page'
185 data.insert(data.index('Made With Creative Commons'), '# Colophon {-}')
186
187 log 0, 'add dedication as separeate chapter'
188 data.insert(data.index('"I don\'t know a whole lot about nonfiction journalism. . .'), '# Dedication {-}')
189
190 # Join erroneously split paragraphs: Write the contents of the line
191 # _preceding_ the unneeded break, the break will be removed.
192 #
193 # I'm noting the line number for each _after_ corrections so it's
194 # easier to find them; please keep them sorted! :-P
195 log 0, 'Join erroneously split paragraphs'
196 ['content and, in turn, spend money and', # 1595
197 'still other', # 1662
198 'content functions as a marketing tool for the paid product or', # 1724
199 'lowest-common-denominator solutions and', #2035
200 'to the values symbolized by', # 2145
201 'the kinds of participative communities that drive open', # 2157
202 'time', # 2220
203 'At a minimum, a CC-', # 2375
204 '"Share Your Work" at', # 2508
205 'easier to trust a', # 2580
206 'the free download, the', # 3086
207 'openness to fans remixing the game---give', # 3087
208 'Attribution-', # 3307
209 'to both journal publishers and researchers. Figshare now provides', # 3672
210 'get the "network effect"---', # 4002
211 'access to scholarly books. For Frances, the current scholarly-', # 4033
212 'for-', # 4288
213 'sales', # 4410
214 'contributing to the open', # 4438
215 'doesn\'t seem like it should be sung about', # 4616
216 'songwriter, and he has found a way to keep it interesting for', # 4624
217 'building trust is the top', # 4793
218 'version', # 6023
219 'license', # 6169
220 'authors and Shuttleworth; Mark remains incredibly proud of this', # 6452
221 'BY-SA and opting in others with collecting societies like', # 7218
222 'Journeys to a Generative Economy. San Francisco:', # 7553
223 'Cecilie Maria, Cedric Howe, Cefn Hoile,', # 7796
224 'Braddlee, Drew Spencer, Duncan', # 7839
225 'Elizabeth Holloway, Ellen Buecher, Ellen Kaye-', # 7844
226 'Helen', # 7874
227 ].each do |line|
228 log 1, line
229 at = data.index {|i| i == line}
230 if !at.nil? and data[at+1] == ''
231 data.delete_at(at+1)
232 end
233 end
234
235 log 0, 'Identify and mark footnotes/endnodes'
236 scope="unknown"
237 noteblock=false
238 data.each_with_index do |lin, idx|
239 if lin =~ /^## (.+)$/
240 scope=$1.gsub(" ", "-")
241 end
242 # First, mark note reference
243 lin.sub!(/([a-z][\.\)]+["”]?)(\d+)(\s)/, "\\1[^" + scope + "-\\2]\\3")
244 lin.sub!(/([a-z][\.\)]+["”]?)(\d+)$/, "\\1[^" + scope + "-\\2]")
245 # Special case some refs hard to match otherwise
246 lin.sub!(/(section.\)) 36 /, "\\1[^" + scope + "-36] ")
247 lin.sub!(/(Data Futures Forum in 2014,)1 /, "\\1[^" + scope + "-1] ")
248 lin.sub!(/(5,080)5/, "\\1[^" + scope + "-5]")
249 lin.sub!(/(sustain her creative work.) 1/, "\\1[^" + scope + "-1]")
250 # Next, mark note content, only between /Web Links?|Notes/ and next heading
251 if noteblock
252 lin.sub!(/^(\d+)\. /, "[^" + scope + "-\\1]: ")
253 if lin =~ /^##?.+/
254 noteblock=false
255 end
256 end
257 if lin =~ /^(### Notes|Web links?)/
258 noteblock=true
259 # Turn web link line into section header, to make it easier to
260 # find by fixup-docbook.rb.
261 lin.gsub!(/^(Web links?)/, "### \\1")
262 end
263 end
264
265 log 0, 'verify every footnote/endnote is unique and used'
266 notes = Hash.new
267 data.each_with_index do |lin, idx|
268 if lin =~ /(\[\^[^\]]+\])(:)?/
269 # log 0, "*** found %s %s" % [$1, $2]
270 if not notes.has_key?($1)
271 notes[$1] = Hash.new
272 end
273 if $2 == ':'
274 notes[$1]['def'] = true
275 else
276 notes[$1]['ref'] = true
277 end
278 end
279 end
280 notes.each do |key, val|
281 if val.has_key?('def') != val.has_key?('ref')
282 log 0, "error: check use of footnote %s" % key
283 end
284 end
285
286 log 0, 'Turn indented block after use cases into block quotes'
287 inscope=false
288 quote=false
289 data.each_with_index do |lin, idx|
290 if quote
291 lin.sub!(/^/, "> ")
292 end
293 if lin =~ /^> Profile written by/
294 quote=false
295 end
296 # To this heading
297 if lin =~ /^## Bibliography/
298 inscope=false
299 end
300 # From this heading
301 if lin =~ /^## Arduino/
302 inscope=true
303 end
304 if inscope and lin =~ /^## /
305 quote=true
306 next
307 end
308 end
309
310 log 0, 'emphesize keywords'
311 data.map {|lin| lin.gsub!(/^(>\s*)(Revenue model|Interview date|Interviewees?):/, '\\1**\\2**:')}
312
313 log 0, 'make figure sizes relative to text body width while keeping aspect ratio'
314 data.map {|lin| lin.gsub!(/width="6.5in"/, 'width="80%"')}
315 data.map {|lin| lin.gsub!(/width="4.198in"/, 'width="40%"')}
316 data.map {|lin| lin.gsub!(/width="4.1665in"/, 'width="40%"')}
317 data.map {|lin| lin.gsub!(/height="[0-9.]+in"/, '')}
318
319 log 0, 'add figure titles required by Docbook for referable figures'
320 [
321 ['10000201000008000000045C30360249076453E6.png', 'Enterprise engagement with commons, state and market.'],
322 ['10000201000007D0000007D0ACF13F8B71EAF0B9.png', 'Four aspects of resource management'],
323 ['10000201000009C40000065D9EC4F530BD4DFBE0.png', 'How the market, commons and state concieve of resources.'],
324 ['10000201000009C4000005153EACBD62F00F6BA9.png', 'In preindustrialized society.'],
325 ['10000201000009C4000005150F069409C1CC12F0.png', 'The commons is gradually superseded by the state.'],
326 ['10000201000009C400000515F1CAA15B223F6BAF.png', 'How the market, the state and the commons look today.'],
327 ].each do |fig|
328 at = data.index {|i| i.include? fig[0]}
329 if at.nil?
330 raise RuntimeError, 'No figure named «%s» found' % fig[0]
331 end
332 data[at].gsub!(/!\[\]\(Pictures/, '![%s](Pictures' % fig[1])
333 end
334
335 log 0, 'adding http:// to all URLs and turn them into links'
336 data.map {|lin| lin.gsub!(/(^|\s+)([-a-z0-9\\.]+\.(ca|cc|com|edu|eu|io|is|it|kr|net|nl|nz|org|se))/, '\\1http://\\2')}
337 data.map {|lin| lin.gsub!(/\b(https?:\/\/[-a-z0-9\\.]+)(\/[-\\.\/a-zA-Z0-9#_\?&=,]+[-\/a-zA-Z0-9#_\?&=,])?/, '[](\\1\\2)')}
338
339 log 0, 'Writing processed file'
340 File.open(dstfile, 'w') {|f| f.puts data.join("\n")}