fixup.rb

   1 #!/usr/bin/ruby
   2 # coding: utf-8
   3 LogLevel=1
   4 raise ArgumentError, 'Source/destination files not specified' if ARGV.size != 2
   5 srcfile = ARGV[0]
   6 dstfile = ARGV[1]
   7
   8 data=File.open(srcfile).readlines.map {|l| l.chomp!}
   9
  10 data.map {|lin| lin.gsub!(/\[\]{#anchor-?\d*}/, '')}
  11
  12 def log(level,what)
  13   indicators = %w(! • # -)
  14   if level >= LogLevel
  15     print indicators[level]
  16   else
  17     print "\n%s %s\n" % ['*' * (level+1), what]
  18   end
  19 end
  20
  21 # There are several titles that are spread in more than one line. Make
  22 # them into translatable sentences.
  23 log 0, 'Merging multiline sentences '
  24 [ ['Made', '', 'with', '', 'Creative', '', 'Commons'],
  25   ['The New','', 'World of', '', 'Digital', '', 'Commons'],
  26   ['How', '', 'to Be', '', 'Made with', '', 'Creative', '', 'Commons'],
  27   ['Providing a custom service to consumers of your work *', '\[MARKET-BASED\]*'],
  28   ['Memberships and individual donations', '*\[RECIPROCITY-BASED\]*'],
  29   ['The', '', 'Creative', '', 'Commons', '', 'Licenses'],
  30   ['jonathanmann.net and', '', 'jonathanmann.bandcamp.com'],
  31   ['PLOS','', '(Public Library of Science)']
  32 ].each do |str|
  33   # This should be done more generic, more robust... But before
  34   # burning brain cells, verify if it's needed!
  35   log 1, str
  36   matches=[]
  37   data.each_with_index do |lin, idx|
  38     if lin == str[0]
  39       matches << idx
  40     end
  41   end
  42   if matches.size == 0
  43     raise RuntimeError, 'Anchor string («%s» for «%s») not found' %
  44                         [str[0], str.reject {|word| word==''}.join(' ')]
  45   elsif matches.size > 1
  46     raise RuntimeError, 'Anchor string (%s) appears multiple times: %s' %
  47                         [str[0], matches.map {|i| i.to_s}.join(', ')]
  48   end
  49   log 2, 'Matches %s: %d - %s' % [str.join(' '), matches.size, matches.join(',')]
  50
  51   len = str.size
  52   at = data.index(str[0])
  53   joined = str.reject {|word| word==''}.join(' ')
  54   len.times do |offset|
  55     if str[offset] != data[at+offset]
  56       raise RuntimeError,
  57             'Warning: String does not match ("%s" of "%s", offset %d, book at %d)' %
  58             [str[offset], joined, offset, at+offset]
  59     end
  60   end
  61
  62   data[at] = joined
  63   (str.size - 1).times { data.delete_at(at+1) }
  64 end
  65
  66 log 0, 'correct emphesis in some titles'
  67 data.map {|lin| lin.gsub!(/ *\* \\\[/, ' *\[')}
  68
  69 log 0, 'tag title and author'
  70 data.delete_at(1) # Remove unwanted blank line between title and authors
  71 data[0].sub!(/^/, '% ') # title
  72 data[1].sub!(/^/, '% ') # authors
  73 data[1].sub!(/ and /, ';') # authors
  74
  75 # Mark up headings: Very artisanal and suboptimal, but should do the
  76 # trick.
  77 #
  78 # For every heading, put here the full string and its heading level,
  79 # as follows:
  80 #
  81 # 1 - Part
  82 # 2 - Chapter
  83 # 3 - Section
  84 # 4 - Subsection
  85 #
  86 # Try to keep this ordered as it appears within the book, as it will
  87 # help us spot omissions and mistakes!
  88 log 0, 'Mark up headings'
  89 [ [1, 'Foreword'],
  90   [1, 'Introduction'],
  91   [1, 'Part 1'],
  92   [1, 'The Big Picture'],
  93   [2, 'The New World of Digital Commons'],
  94   [3, 'The Commons, the Market, and the State'],
  95   [3, 'The Four Aspects of a Resource'],
  96   [4, 'Characteristics'],
  97   [4, 'People and processes'],
  98   [4, 'Norms and rules'],
  99   [4, 'Goals'],
 100   [3, 'A Short History of the Commons'],
 101   [3, 'The Digital Revolution'],
 102   [3, 'The Birth of Creative Commons'],
 103   [3, 'The Changing Market'],
 104   [3, 'Benefits of the Digital Commons'],
 105   [3, 'Our Case Studies'],
 106   [3, 'Notes'],
 107   [2, 'How to Be Made with Creative Commons'],
 108   [3, 'Problem Zero: Getting Discovered'],
 109   [4, 'Use CC to grow a larger audience'],
 110   [4, 'Use CC to get attribution and name recognition'],
 111   [4, 'Use CC-licensed content as a marketing tool'],
 112   [4, 'Use CC to enable hands-on engagement with your work'],
 113   [4, 'Use CC to differentiate yourself'],
 114   [3, 'Making Money'],
 115   [4, 'Market-based revenue streams'],
 116   [4, 'Providing a custom service to consumers of your work *\[MARKET-BASED\]*'],
 117   [4, 'Charging for the physical copy *\[MARKET-BASED\]*'],
 118   [4, 'Charging for the in-person version *\[MARKET-BASED\]*'],
 119   [4, 'Selling merchandise *\[MARKET-BASED\]*'],
 120   [4, 'Charging advertisers or sponsors *\[MARKET-BASED\]*'],
 121   [4, 'Charging your content creators *\[MARKET-BASED\]*'],
 122   [4, 'Charging a transaction fee *\[MARKET-BASED\]*'],
 123   [4, 'Providing a service to your creators *\[MARKET-BASED\]*'],
 124   [4, 'Licensing a trademark *\[MARKET-BASED\]*'],
 125   [4, 'Reciprocity-based revenue streams'],
 126   [4, 'Memberships and individual donations *\[RECIPROCITY-BASED\]*'],
 127   [4, 'The pay-what-you-want model *\[RECIPROCITY-BASED\]*'],
 128   [4, 'Crowdfunding *\[RECIPROCITY-BASED\]*'],
 129   [3, 'Making Human Connections'],
 130   [4, 'Be human'],
 131   [4, 'Be open and accountable'],
 132   [4, 'Design for the good actors'],
 133   [4, 'Treat humans like, well, humans'],
 134   [4, 'State your principles and stick to them'],
 135   [4, 'Build a community'],
 136   [4, 'Give more to the commons than you take'],
 137   [4, 'Involve people in what you do'],
 138   [3, 'Notes'],
 139   [2, 'The Creative Commons Licenses'],
 140   [1, 'Part 2'],
 141   [1, 'The Case Studies'],
 142   [2, 'Arduino'],
 143   [2, 'Ártica'],
 144   [2, 'Blender Institute'],
 145   [2, 'Cards Against Humanity'],
 146   [2, 'The Conversation'],
 147   [2, 'Cory Doctorow'],
 148   [2, 'Figshare'],
 149   [2, 'Figure.NZ'],
 150   [2, 'Knowledge Unlatched'],
 151   [2, 'Lumen Learning'],
 152   [2, 'Jonathan Mann'],
 153   [2, 'Noun Project'],
 154   [2, 'Open Data Institute'],
 155   [2, 'OpenDesk'],
 156   [2, 'OpenStax'],
 157   [2, 'Amanda Palmer'],
 158   [2, 'PLOS (Public Library of Science)'],
 159   [2, 'Rijksmuseum'],
 160   [2, 'Shareable'],
 161   [2, 'Siyavula'],
 162   [2, 'SparkFun'],
 163   [2, 'TeachAIDS'],
 164   [2, 'Tribe of Noise'],
 165   [2, 'Wikimedia Foundation'],
 166   [1, 'Bibliography'],
 167   [1, 'Acknowledgments'],
 168
 169 ].each do |item|
 170   log 1, item.join(' -> ')
 171   at = data.index {|i| i == item[1]}
 172   if at.nil?
 173     raise RuntimeError, 'Heading string (level %d) not found: «%s»' % item
 174   end
 175   data[at] = '%s %s' % ['#' * item[0], data[at]]
 176 end
 177
 178 # We have the explicit strings "Part 1" and "Part 2" as structural
 179 # elements — They are to be generated upon book compilation. Nuke
 180 # them.
 181 data.delete("# Part 1")
 182 data.delete("# Part 2")
 183
 184 log 0, 'add heading to colophon page'
 185 data.insert(data.index('Made With Creative Commons'), '# Colophon {-}')
 186
 187 log 0, 'add dedication as separeate chapter'
 188 data.insert(data.index('"I don\'t know a whole lot about nonfiction journalism. . .'), '# Dedication {-}')
 189
 190 # Join erroneously split paragraphs: Write the contents of the line
 191 # _preceding_ the unneeded break, the break will be removed.
 192 #
 193 # I'm noting the line number for each _after_ corrections so it's
 194 # easier to find them; please keep them sorted! :-P
 195 log 0, 'Join erroneously split paragraphs'
 196 ['content and, in turn, spend money and', # 1595
 197  'still other', # 1662
 198  'content functions as a marketing tool for the paid product or', # 1724
 199  'lowest-common-denominator solutions and', #2035
 200  'to the values symbolized by', # 2145
 201  'the kinds of participative communities that drive open', # 2157
 202  'time', # 2220
 203  'At a minimum, a CC-', # 2375
 204  '"Share Your Work" at', # 2508
 205  'easier to trust a', # 2580
 206  'the free download, the', # 3086
 207  'openness to fans remixing the game---give', # 3087
 208  'Attribution-', # 3307
 209  'to both journal publishers and researchers. Figshare now provides', # 3672
 210  'get the "network effect"---', # 4002
 211  'access to scholarly books. For Frances, the current scholarly-', # 4033
 212  'for-', # 4288
 213  'sales', # 4410
 214  'contributing to the open', # 4438
 215  'doesn\'t seem like it should be sung about', # 4616
 216  'songwriter, and he has found a way to keep it interesting for', # 4624
 217  'building trust is the top', # 4793
 218  'version', # 6023
 219  'license', # 6169
 220  'authors and Shuttleworth; Mark remains incredibly proud of this', # 6452
 221  'BY-SA and opting in others with collecting societies like', # 7218
 222  'Journeys to a Generative Economy. San Francisco:', # 7553
 223  'Cecilie Maria, Cedric Howe, Cefn Hoile,', # 7796
 224  'Braddlee, Drew Spencer, Duncan', # 7839
 225  'Elizabeth Holloway, Ellen Buecher, Ellen Kaye-', # 7844
 226  'Helen', # 7874
 227 ].each do |line|
 228   log 1, line
 229   at = data.index {|i| i == line}
 230   if !at.nil? and data[at+1] == ''
 231     data.delete_at(at+1)
 232   end
 233 end
 234
 235 log 0, 'Identify and mark footnotes/endnodes'
 236 scope="unknown"
 237 noteblock=false
 238 data.each_with_index do |lin, idx|
 239   if lin =~ /^## (.+)$/
 240     scope=$1.gsub(" ", "-")
 241   end
 242   # First, mark note reference
 243   lin.sub!(/([a-z][\.\)]+["”]?)(\d+)(\s)/, "\\1[^" + scope + "-\\2]\\3")
 244   lin.sub!(/([a-z][\.\)]+["”]?)(\d+)$/, "\\1[^" + scope + "-\\2]")
 245   # Special case some refs hard to match otherwise
 246   lin.sub!(/(section.\)) 36 /, "\\1[^" + scope + "-36] ")
 247   lin.sub!(/(Data Futures Forum in 2014,)1 /, "\\1[^" + scope + "-1] ")
 248   lin.sub!(/(5,080)5/, "\\1[^" + scope + "-5]")
 249   lin.sub!(/(sustain her creative work.) 1/, "\\1[^" + scope + "-1]")
 250   # Next, mark note content, only between /Web Links?|Notes/ and next heading
 251   if noteblock
 252     lin.sub!(/^(\d+)\. /, "[^" + scope + "-\\1]: ")
 253     if lin =~ /^##?.+/
 254       noteblock=false
 255     end
 256   end
 257   if lin =~ /^(### Notes|Web links?)/
 258     noteblock=true
 259     # Turn web link line into section header, to make it easier to
 260     # find by fixup-docbook.rb.
 261     lin.gsub!(/^(Web links?)/, "### \\1")
 262   end
 263 end
 264
 265 log 0, 'verify every footnote/endnote is unique and used'
 266 notes = Hash.new
 267 data.each_with_index do |lin, idx|
 268   if lin =~ /(\[\^[^\]]+\])(:)?/
 269 #    log 0, "*** found %s %s" % [$1, $2]
 270     if not notes.has_key?($1)
 271       notes[$1] = Hash.new
 272     end
 273     if $2 == ':'
 274       notes[$1]['def'] = true
 275     else
 276       notes[$1]['ref'] = true
 277     end
 278   end
 279 end
 280 notes.each do |key, val|
 281   if val.has_key?('def') != val.has_key?('ref')
 282     log 0, "error: check use of footnote %s" % key
 283   end
 284 end
 285
 286 log 0, 'Turn indented block after use cases into block quotes'
 287 inscope=false
 288 quote=false
 289 data.each_with_index do |lin, idx|
 290   if quote
 291     lin.sub!(/^/, "> ")
 292   end
 293   if lin =~ /^> Profile written by/
 294     quote=false
 295   end
 296   # To this heading
 297   if lin =~ /^## Bibliography/
 298     inscope=false
 299   end
 300   # From this heading
 301   if lin =~ /^## Arduino/
 302     inscope=true
 303   end
 304   if inscope and lin =~ /^## /
 305     quote=true
 306     next
 307   end
 308 end
 309
 310 log 0, 'emphesize keywords'
 311 data.map {|lin| lin.gsub!(/^(>\s*)(Revenue model|Interview date|Interviewees?):/, '\\1**\\2**:')}
 312
 313 log 0, 'make figure sizes relative to text body width while keeping aspect ratio'
 314 data.map {|lin| lin.gsub!(/width="6.5in"/, 'width="80%"')}
 315 data.map {|lin| lin.gsub!(/width="4.198in"/, 'width="40%"')}
 316 data.map {|lin| lin.gsub!(/width="4.1665in"/, 'width="40%"')}
 317 data.map {|lin| lin.gsub!(/height="[0-9.]+in"/, '')}
 318
 319 log 0, 'add figure titles required by Docbook for referable figures'
 320 [
 321   ['10000201000008000000045C30360249076453E6.png', 'Enterprise engagement with commons, state and market.'],
 322   ['10000201000007D0000007D0ACF13F8B71EAF0B9.png', 'Four aspects of resource management'],
 323   ['10000201000009C40000065D9EC4F530BD4DFBE0.png', 'How the market, commons and state concieve of resources.'],
 324   ['10000201000009C4000005153EACBD62F00F6BA9.png', 'In preindustrialized society.'],
 325   ['10000201000009C4000005150F069409C1CC12F0.png', 'The commons is gradually superseded by the state.'],
 326   ['10000201000009C400000515F1CAA15B223F6BAF.png', 'How the market, the state and the commons look today.'],
 327 ].each do |fig|
 328   at = data.index {|i| i.include? fig[0]}
 329   if at.nil?
 330     raise RuntimeError, 'No figure named «%s» found' % fig[0]
 331   end
 332   data[at].gsub!(/!\[\]\(Pictures/, '![%s](Pictures' % fig[1])
 333 end
 334
 335 log 0, 'adding http:// to all URLs and turn them into links'
 336 data.map {|lin| lin.gsub!(/(^|\s+)([-a-z0-9\\.]+\.(ca|cc|com|edu|eu|io|is|it|kr|net|nl|nz|org|se))/, '\\1http://\\2')}
 337 data.map {|lin| lin.gsub!(/\b(https?:\/\/[-a-z0-9\\.]+)(\/[-\\.\/a-zA-Z0-9#_\?&=,]+[-\/a-zA-Z0-9#_\?&=,])?/, '[](\\1\\2)')}
 338
 339 log 0, 'Writing processed file'
 340 File.open(dstfile, 'w') {|f| f.puts data.join("\n")}