#!/usr/bin/ruby
# coding: utf-8
+LogLevel=1
raise ArgumentError, 'Source/destination files not specified' if ARGV.size != 2
srcfile = ARGV[0]
dstfile = ARGV[1]
data=File.open(srcfile).readlines.map {|l| l.chomp!}
-data.map {|lin| lin.gsub!(/<span id="anchor-?\d*"><\/span>/, '')}
+data.map {|lin| lin.gsub!(/\[\]{#anchor-?\d*}/, '')}
+
+def log(level,what)
+ indicators = %w(! • # -)
+ if level >= LogLevel
+ print indicators[level]
+ else
+ print "\n%s %s\n" % ['*' * (level+1), what]
+ end
+end
# There are several titles that are spread in more than one line. Make
# them into translatable sentences.
+log 0, 'Merging multiline sentences '
[ ['Made', '', 'with', '', 'Creative', '', 'Commons'],
['The New','', 'World of', '', 'Digital', '', 'Commons'],
['How', '', 'to Be', '', 'Made with', '', 'Creative', '', 'Commons'],
- ['Use CC to get attribution and name', 'recognition'],
- ['Use CC to enable hands-on engagement with', 'your work'],
- ['Providing a custom service to consumers of', 'your work * \[MARKET-BASED\]*'],
- ['Charging for the physical copy *', '\[MARKET-BASED\]*'],
- ['Charging for the in-person version *','\[MARKET-BASED\]*'],
- ['Charging advertisers or sponsors *', '\[MARKET-BASED\]*'],
- ['Charging your content creators *', '\[MARKET-BASED\]*'],
- ['Charging a transaction fee *', '\[MARKET-BASED\]*'],
- ['Providing a service to your creators*', '\[MARKET-BASED\]*'],
+ ['Providing a custom service to consumers of your work *', '\[MARKET-BASED\]*'],
['Memberships and individual donations', '*\[RECIPROCITY-BASED\]*'],
- ['The pay-what-you-want model', '*\[RECIPROCITY-BASED\]*'],
['The', '', 'Creative', '', 'Commons', '', 'Licenses'],
+ ['jonathanmann.net and', '', 'jonathanmann.bandcamp.com'],
['PLOS','', '(Public Library of Science)']
].each do |str|
# This should be done more generic, more robust... But before
# burning brain cells, verify if it's needed!
- if (data.select {|i| i == str[0]}.size != 1)
- raise RuntimeError, 'First string appears multiple times: %s (%s)' %
- [str.reject{|i| i==''}.join(' '), str[0]]
+ log 1, str
+ matches=[]
+ data.each_with_index do |lin, idx|
+ if lin == str[0]
+ matches << idx
+ end
+ end
+ if matches.size == 0
+ raise RuntimeError, 'Anchor string («%s» for «%s») not found' %
+ [str[0], str.reject {|word| word==''}.join(' ')]
+ elsif matches.size > 1
+ raise RuntimeError, 'Anchor string (%s) appears multiple times: %s' %
+ [str[0], matches.map {|i| i.to_s}.join(', ')]
end
+ log 2, 'Matches %s: %d - %s' % [str.join(' '), matches.size, matches.join(',')]
len = str.size
at = data.index(str[0])
(str.size - 1).times { data.delete_at(at+1) }
end
+log 0, 'correct emphesis in some titles'
+data.map {|lin| lin.gsub!(/ *\* \\\[/, ' *\[')}
+
+log 0, 'tag title and author'
+data.delete_at(1) # Remove unwanted blank line between title and authors
+data[0].sub!(/^/, '% ') # title
+data[1].sub!(/^/, '% ') # authors
+data[1].sub!(/ and /, ';') # authors
+
# Mark up headings: Very artisanal and suboptimal, but should do the
# trick.
#
#
# Try to keep this ordered as it appears within the book, as it will
# help us spot omissions and mistakes!
-[ [2, 'Foreword'],
- [2, 'Introduction'],
+log 0, 'Mark up headings'
+[ [1, 'Foreword'],
+ [1, 'Introduction'],
[1, 'Part 1'],
[1, 'The Big Picture'],
[2, 'The New World of Digital Commons'],
[4, 'Use CC to differentiate yourself'],
[3, 'Making Money'],
[4, 'Market-based revenue streams'],
- [4, 'Providing a custom service to consumers of your work * \[MARKET-BASED\]*'],
- [4, 'Charging for the physical copy * \[MARKET-BASED\]*'],
- [4, 'Charging for the in-person version * \[MARKET-BASED\]*'],
- [4, 'Selling merchandise * \[MARKET-BASED\]*'],
- [4, 'Charging advertisers or sponsors * \[MARKET-BASED\]*'],
- [4, 'Charging your content creators * \[MARKET-BASED\]*'],
- [4, 'Charging a transaction fee * \[MARKET-BASED\]*'],
- [4, 'Providing a service to your creators* \[MARKET-BASED\]*'],
- [4, 'Licensing a trademark* \[MARKET-BASED\]*'],
+ [4, 'Providing a custom service to consumers of your work *\[MARKET-BASED\]*'],
+ [4, 'Charging for the physical copy *\[MARKET-BASED\]*'],
+ [4, 'Charging for the in-person version *\[MARKET-BASED\]*'],
+ [4, 'Selling merchandise *\[MARKET-BASED\]*'],
+ [4, 'Charging advertisers or sponsors *\[MARKET-BASED\]*'],
+ [4, 'Charging your content creators *\[MARKET-BASED\]*'],
+ [4, 'Charging a transaction fee *\[MARKET-BASED\]*'],
+ [4, 'Providing a service to your creators *\[MARKET-BASED\]*'],
+ [4, 'Licensing a trademark *\[MARKET-BASED\]*'],
[4, 'Reciprocity-based revenue streams'],
[4, 'Memberships and individual donations *\[RECIPROCITY-BASED\]*'],
[4, 'The pay-what-you-want model *\[RECIPROCITY-BASED\]*'],
[4, 'Build a community'],
[4, 'Give more to the commons than you take'],
[4, 'Involve people in what you do'],
- [4, 'Notes'],
+ [3, 'Notes'],
[2, 'The Creative Commons Licenses'],
[1, 'Part 2'],
[1, 'The Case Studies'],
[2, 'Rijksmuseum'],
[2, 'Shareable'],
[2, 'Siyavula'],
- [2, 'Sparkfun'],
+ [2, 'SparkFun'],
[2, 'TeachAIDS'],
[2, 'Tribe of Noise'],
[2, 'Wikimedia Foundation'],
- [2, 'Bibliography'],
- [2, 'Acknowledgments'],
+ [1, 'Bibliography'],
+ [1, 'Acknowledgments'],
].each do |item|
+ log 1, item.join(' -> ')
at = data.index {|i| i == item[1]}
if at.nil?
raise RuntimeError, 'Heading string (level %d) not found: «%s»' % item
data[at] = '%s %s' % ['#' * item[0], data[at]]
end
+# We have the explicit strings "Part 1" and "Part 2" as structural
+# elements — They are to be generated upon book compilation. Nuke
+# them.
+data.delete("# Part 1")
+data.delete("# Part 2")
+
+log 0, 'add heading to colophon page'
+data.insert(data.index('Made With Creative Commons'), '# Colophon {-}')
+log 0, 'add dedication as separeate chapter'
+data.insert(data.index('"I don\'t know a whole lot about nonfiction journalism. . .'), '# Dedication {-}')
-# Join erroneously split paragraphs
-['content and, in turn, spend money and',
- 'still other',
- 'to the values symbolized by',
- 'the kinds of participative communities that drive open',
- 'At a minimum, a CC-',
- 'easier to trust a',
- 'free download, the',
- 'openness to fans remixing the game—give',
- 'Attribution-',
- 'both journal publishers and researchers. Figshare now provides',
- 'get the “network effect”—',
- 'access to scholarly books. For Frances, the current scholarly-',
- 'for-',
- 'sales',
- 'contributing to the open',
- 'doesn’t seem like it should be sung about',
- 'songwriter, and he has found a way to keep it interesting for',
- 'building trust is the top',
- 'license',
- 'authors and Shuttleworth; Mark remains incredibly proud of this',
- 'BY-SA and opting in others with collecting societies like',
- 'Cecilie Maria, Cedric Howe, Cefn Hoile,',
- 'Braddlee, Drew Spencer, Duncan',
- 'Elizabeth Holloway, Ellen Buecher, Ellen Kaye-',
- 'Helen',
+# Join erroneously split paragraphs: Write the contents of the line
+# _preceding_ the unneeded break, the break will be removed.
+#
+# I'm noting the line number for each _after_ corrections so it's
+# easier to find them; please keep them sorted! :-P
+log 0, 'Join erroneously split paragraphs'
+['content and, in turn, spend money and', # 1595
+ 'still other', # 1662
+ 'content functions as a marketing tool for the paid product or', # 1724
+ 'lowest-common-denominator solutions and', #2035
+ 'to the values symbolized by', # 2145
+ 'the kinds of participative communities that drive open', # 2157
+ 'time', # 2220
+ 'At a minimum, a CC-', # 2375
+ '“Share Your Work” at', # 2508
+ 'easier to trust a', # 2580
+ 'free download, the', # 3086
+ 'openness to fans remixing the game—give', # 3087
+ 'Attribution-', # 3307
+ 'both journal publishers and researchers. Figshare now provides', # 3672
+ 'get the “network effect”—', # 4002
+ 'access to scholarly books. For Frances, the current scholarly-', # 4033
+ 'for-', # 4288
+ 'sales', # 4410
+ 'contributing to the open', # 4438
+ 'doesn’t seem like it should be sung about', # 4616
+ 'songwriter, and he has found a way to keep it interesting for', # 4624
+ 'building trust is the top', # 4793
+ 'version', # 6023
+ 'license', # 6169
+ 'authors and Shuttleworth; Mark remains incredibly proud of this', # 6452
+ 'BY-SA and opting in others with collecting societies like', # 7218
+ 'Journeys to a Generative Economy. San Francisco:', # 7553
+ 'Cecilie Maria, Cedric Howe, Cefn Hoile,', # 7796
+ 'Braddlee, Drew Spencer, Duncan', # 7839
+ 'Elizabeth Holloway, Ellen Buecher, Ellen Kaye-', # 7844
+ 'Helen', # 7874
].each do |line|
+ log 1, line
at = data.index {|i| i == line}
if !at.nil? and data[at+1] == ''
data.delete_at(at+1)
end
end
+log 0, 'Mark quote in dedication as quote with attribute in markdown'
+start = "“I don’t know a whole lot about nonfiction journalism. . ."
+stop = "- David Foster Wallace"
+quote=false
+data.each_with_index do |lin, idx|
+ if lin == stop
+ lin.sub!(/^- /, "> — ")
+ lin.sub!("David Foster Wallace", "*David Foster Wallace*")
+ quote=false
+ break
+ end
+ if lin == start
+ quote=true
+ end
+ if quote
+ lin.sub!(/^/, "> ")
+ end
+end
+
+log 0, 'Identify and mark footnotes/endnodes'
+scope="unknown"
+noteblock=false
+data.each_with_index do |lin, idx|
+ if lin =~ /^## (.+)$/
+ scope=$1.gsub(" ", "-")
+ end
+ # First, mark note reference
+ lin.sub!(/([a-z][\.\)]+["”]?)(\d+)(\s)/, "\\1[^" + scope + "-\\2]\\3")
+ lin.sub!(/([a-z][\.\)]+["”]?)(\d+)$/, "\\1[^" + scope + "-\\2]")
+ # Special case some refs hard to match otherwise
+ lin.sub!(/(section.\)) 36 /, "\\1[^" + scope + "-36] ")
+ lin.sub!(/(Data Futures Forum in 2014,)1 /, "\\1[^" + scope + "-1] ")
+ lin.sub!(/(5,080)5/, "\\1[^" + scope + "-5]")
+ lin.sub!(/(sustain her creative work.) 1/, "\\1[^" + scope + "-1]")
+ # Next, mark note content, only between /Web Links?|Notes/ and next heading
+ if noteblock
+ lin.sub!(/^(\d+)\. /, "[^" + scope + "-\\1]: ")
+ if lin =~ /^##?.+/
+ noteblock=false
+ end
+ end
+ if lin =~ /^(### Notes|Web links?)/
+ noteblock=true
+ # Turn web link line into section header, to make it easier to
+ # find by fixup-docbook.rb.
+ lin.gsub!(/^(Web links?)/, "### \\1")
+ end
+end
+
+log 0, 'verify every footnote/endnote is unique and used'
+notes = Hash.new
+data.each_with_index do |lin, idx|
+ if lin =~ /(\[\^[^\]]+\])(:)?/
+# log 0, "*** found %s %s" % [$1, $2]
+ if not notes.has_key?($1)
+ notes[$1] = Hash.new
+ end
+ if $2 == ':'
+ notes[$1]['def'] = true
+ else
+ notes[$1]['ref'] = true
+ end
+ end
+end
+notes.each do |key, val|
+ if val.has_key?('def') != val.has_key?('ref')
+ log 0, "error: check use of footnote %s" % key
+ end
+end
+
+log 0, 'Turn indented block after use cases into block quotes'
+inscope=false
+quote=false
+data.each_with_index do |lin, idx|
+ if quote
+ lin.sub!(/^/, "> ")
+ end
+ if lin =~ /^> Profile written by/
+ quote=false
+ end
+ # To this heading
+ if lin =~ /^## Bibliography/
+ inscope=false
+ end
+ # From this heading
+ if lin =~ /^## Arduino/
+ inscope=true
+ end
+ if inscope and lin =~ /^## /
+ quote=true
+ next
+ end
+end
+
+log 0, 'emphesize keywords'
+data.map {|lin| lin.gsub!(/^(>\s*)(Revenue model|Interview date|Interviewees?):/, '\\1**\\2**:')}
+
+log 0, 'make figure sizes relative to text body width while keeping aspect ratio'
+data.map {|lin| lin.gsub!(/width="6.5in"/, 'width="100%"')}
+data.map {|lin| lin.gsub!(/width="4.198in"/, 'width="40%"')}
+data.map {|lin| lin.gsub!(/width="4.1665in"/, 'width="40%"')}
+data.map {|lin| lin.gsub!(/height="[0-9.]+in"/, '')}
+
+log 0, 'add figure titles required by Docbook for referable figures'
+[
+ ['10000201000008000000045C30360249076453E6.png', 'Enterprise engagement with commons, state and market.'],
+ ['10000201000007D0000007D0ACF13F8B71EAF0B9.png', 'Four aspects of resource management'],
+ ['10000201000009C40000065D9EC4F530BD4DFBE0.png', 'How the market, commons and state concieve of resources.'],
+ ['10000201000009C4000005153EACBD62F00F6BA9.png', 'In preindustrialized society.'],
+ ['10000201000009C4000005150F069409C1CC12F0.png', 'The commons is gradually superseded by the state.'],
+ ['10000201000009C400000515F1CAA15B223F6BAF.png', 'How the market, the state and the commons look today.'],
+].each do |fig|
+ at = data.index {|i| i.include? fig[0]}
+ if at.nil?
+ raise RuntimeError, 'No figure named «%s» found' % fig[0]
+ end
+ data[at].gsub!(/!\[\]\(Pictures/, '
+end
+
+log 0, 'adding http:// to all URLs and turn them into links'
+data.map {|lin| lin.gsub!(/(^|\s+)([-a-z0-9\\.]+\.(ca|cc|com|edu|eu|io|is|it|kr|net|nl|nz|org|se))/, '\\1http://\\2')}
+data.map {|lin| lin.gsub!(/\b(https?:\/\/[-a-z0-9\\.]+)(\/[-\\.\/a-zA-Z0-9#_\?&=,]+[-\/a-zA-Z0-9#_\?&=,])?/, '[](\\1\\2)')}
+
+log 0, 'Writing processed file'
File.open(dstfile, 'w') {|f| f.puts data.join("\n")}