]> pere.pagekite.me Git - text-madewithcc.git/blob - fixup.rb
Removed two spurious Part headings
[text-madewithcc.git] / fixup.rb
1 #!/usr/bin/ruby
2 # coding: utf-8
3 LogLevel=1
4 raise ArgumentError, 'Source/destination files not specified' if ARGV.size != 2
5 srcfile = ARGV[0]
6 dstfile = ARGV[1]
7
8 data=File.open(srcfile).readlines.map {|l| l.chomp!}
9
10 data.map {|lin| lin.gsub!(/\[\]{#anchor-?\d*}/, '')}
11
12 def log(level,what)
13 indicators = %w(! • # -)
14 if level >= LogLevel
15 print indicators[level]
16 else
17 print "\n%s %s\n" % ['*' * (level+1), what]
18 end
19 end
20
21 # There are several titles that are spread in more than one line. Make
22 # them into translatable sentences.
23 log 0, 'Merging multiline sentences '
24 [ ['Made', '', 'with', '', 'Creative', '', 'Commons'],
25 ['The New','', 'World of', '', 'Digital', '', 'Commons'],
26 ['How', '', 'to Be', '', 'Made with', '', 'Creative', '', 'Commons'],
27 ['Providing a custom service to consumers of your work *', '\[MARKET-BASED\]*'],
28 ['Memberships and individual donations', '*\[RECIPROCITY-BASED\]*'],
29 ['The', '', 'Creative', '', 'Commons', '', 'Licenses'],
30 ['PLOS','', '(Public Library of Science)']
31 ].each do |str|
32 # This should be done more generic, more robust... But before
33 # burning brain cells, verify if it's needed!
34 log 1, str
35 matches=[]
36 data.each_with_index do |lin, idx|
37 if lin == str[0]
38 matches << idx
39 end
40 end
41 if matches.size == 0
42 raise RuntimeError, 'Anchor string («%s» for «%s») not found' %
43 [str[0], str.reject {|word| word==''}.join(' ')]
44 elsif matches.size > 1
45 raise RuntimeError, 'Anchor string (%s) appears multiple times: %s' %
46 [str[0], matches.map {|i| i.to_s}.join(', ')]
47 end
48 log 2, 'Matches %s: %d - %s' % [str.join(' '), matches.size, matches.join(',')]
49
50 len = str.size
51 at = data.index(str[0])
52 joined = str.reject {|word| word==''}.join(' ')
53 len.times do |offset|
54 if str[offset] != data[at+offset]
55 raise RuntimeError,
56 'Warning: String does not match ("%s" of "%s", offset %d, book at %d)' %
57 [str[offset], joined, offset, at+offset]
58 end
59 end
60
61 data[at] = joined
62 (str.size - 1).times { data.delete_at(at+1) }
63 end
64
65 # Mark up headings: Very artisanal and suboptimal, but should do the
66 # trick.
67 #
68 # For every heading, put here the full string and its heading level,
69 # as follows:
70 #
71 # 1 - Part
72 # 2 - Chapter
73 # 3 - Section
74 # 4 - Subsection
75 #
76 # Try to keep this ordered as it appears within the book, as it will
77 # help us spot omissions and mistakes!
78 log 0, 'Mark up headings'
79 [ [2, 'Foreword'],
80 [2, 'Introduction'],
81 [1, 'Part 1'],
82 [1, 'The Big Picture'],
83 [2, 'The New World of Digital Commons'],
84 [3, 'The Commons, the Market, and the State'],
85 [3, 'The Four Aspects of a Resource'],
86 [4, 'Characteristics'],
87 [4, 'People and processes'],
88 [4, 'Norms and rules'],
89 [4, 'Goals'],
90 [3, 'A Short History of the Commons'],
91 [3, 'The Digital Revolution'],
92 [3, 'The Birth of Creative Commons'],
93 [3, 'The Changing Market'],
94 [3, 'Benefits of the Digital Commons'],
95 [3, 'Our Case Studies'],
96 [3, 'Notes'],
97 [2, 'How to Be Made with Creative Commons'],
98 [3, 'Problem Zero: Getting Discovered'],
99 [4, 'Use CC to grow a larger audience'],
100 [4, 'Use CC to get attribution and name recognition'],
101 [4, 'Use CC-licensed content as a marketing tool'],
102 [4, 'Use CC to enable hands-on engagement with your work'],
103 [4, 'Use CC to differentiate yourself'],
104 [3, 'Making Money'],
105 [4, 'Market-based revenue streams'],
106 [4, 'Providing a custom service to consumers of your work * \[MARKET-BASED\]*'],
107 [4, 'Charging for the physical copy * \[MARKET-BASED\]*'],
108 [4, 'Charging for the in-person version * \[MARKET-BASED\]*'],
109 [4, 'Selling merchandise * \[MARKET-BASED\]*'],
110 [4, 'Charging advertisers or sponsors * \[MARKET-BASED\]*'],
111 [4, 'Charging your content creators * \[MARKET-BASED\]*'],
112 [4, 'Charging a transaction fee * \[MARKET-BASED\]*'],
113 [4, 'Providing a service to your creators* \[MARKET-BASED\]*'],
114 [4, 'Licensing a trademark* \[MARKET-BASED\]*'],
115 [4, 'Reciprocity-based revenue streams'],
116 [4, 'Memberships and individual donations *\[RECIPROCITY-BASED\]*'],
117 [4, 'The pay-what-you-want model *\[RECIPROCITY-BASED\]*'],
118 [4, 'Crowdfunding *\[RECIPROCITY-BASED\]*'],
119 [3, 'Making Human Connections'],
120 [4, 'Be human'],
121 [4, 'Be open and accountable'],
122 [4, 'Design for the good actors'],
123 [4, 'Treat humans like, well, humans'],
124 [4, 'State your principles and stick to them'],
125 [4, 'Build a community'],
126 [4, 'Give more to the commons than you take'],
127 [4, 'Involve people in what you do'],
128 [4, 'Notes'],
129 [2, 'The Creative Commons Licenses'],
130 [1, 'Part 2'],
131 [1, 'The Case Studies'],
132 [2, 'Arduino'],
133 [2, 'Ártica'],
134 [2, 'Blender Institute'],
135 [2, 'Cards Against Humanity'],
136 [2, 'The Conversation'],
137 [2, 'Cory Doctorow'],
138 [2, 'Figshare'],
139 [2, 'Figure.NZ'],
140 [2, 'Knowledge Unlatched'],
141 [2, 'Lumen Learning'],
142 [2, 'Jonathan Mann'],
143 [2, 'Noun Project'],
144 [2, 'Open Data Institute'],
145 [2, 'OpenDesk'],
146 [2, 'OpenStax'],
147 [2, 'Amanda Palmer'],
148 [2, 'PLOS (Public Library of Science)'],
149 [2, 'Rijksmuseum'],
150 [2, 'Shareable'],
151 [2, 'Siyavula'],
152 [2, 'Sparkfun'],
153 [2, 'TeachAIDS'],
154 [2, 'Tribe of Noise'],
155 [2, 'Wikimedia Foundation'],
156 [2, 'Bibliography'],
157 [2, 'Acknowledgments'],
158
159 ].each do |item|
160 log 1, item.join(' -> ')
161 at = data.index {|i| i == item[1]}
162 if at.nil?
163 raise RuntimeError, 'Heading string (level %d) not found: «%s»' % item
164 end
165 data[at] = '%s %s' % ['#' * item[0], data[at]]
166 end
167
168 # We have the explicit strings "Part 1" and "Part 2" as structural
169 # elements — They are to be generated upon book compilation. Nuke
170 # them.
171 data.delete("# Part 1")
172 data.delete("# Part 2")
173
174 # Join erroneously split paragraphs: Write the contents of the line
175 # _preceding_ the unneeded break, the break will be removed.
176 #
177 # I'm noting the line number for each _after_ corrections so it's
178 # easier to find them; please keep them sorted! :-P
179 log 0, 'Join erroneously split paragraphs'
180 ['content and, in turn, spend money and', # 1595
181 'still other', # 1662
182 'content functions as a marketing tool for the paid product or', # 1724
183 'lowest-common-denominator solutions and', #2035
184 'to the values symbolized by', # 2145
185 'the kinds of participative communities that drive open', # 2157
186 'time', # 2220
187 'At a minimum, a CC-', # 2375
188 'easier to trust a', # 2580
189 'free download, the', # 3086
190 'openness to fans remixing the game—give', # 3087
191 'Attribution-', # 3307
192 'both journal publishers and researchers. Figshare now provides', # 3672
193 'get the “network effect”—', # 4002
194 'access to scholarly books. For Frances, the current scholarly-', # 4033
195 'for-', # 4288
196 'sales', # 4410
197 'contributing to the open', # 4438
198 'doesn’t seem like it should be sung about', # 4616
199 'songwriter, and he has found a way to keep it interesting for', # 4624
200 'building trust is the top', # 4793
201 'version', # 6023
202 'license', # 6169
203 'authors and Shuttleworth; Mark remains incredibly proud of this', # 6452
204 'BY-SA and opting in others with collecting societies like', # 7218
205 'Cecilie Maria, Cedric Howe, Cefn Hoile,', # 7796
206 'Braddlee, Drew Spencer, Duncan', # 7839
207 'Elizabeth Holloway, Ellen Buecher, Ellen Kaye-', # 7844
208 'Helen', # 7874
209 ].each do |line|
210 log 1, line
211 at = data.index {|i| i == line}
212 if !at.nil? and data[at+1] == ''
213 data.delete_at(at+1)
214 end
215 end
216
217 log 0, 'Writing processed file'
218 File.open(dstfile, 'w') {|f| f.puts data.join("\n")}