X-Git-Url: https://pere.pagekite.me/gitweb/text-madewithcc.git/blobdiff_plain/bec5f77092a50adc94c8563c186c1ab8fc5d89f4..1a0a0a629b107b35a860195d77c84c48876d20c6:/fixup.rb?ds=sidebyside diff --git a/fixup.rb b/fixup.rb index 728b4f0..c55798c 100644 --- a/fixup.rb +++ b/fixup.rb @@ -1,15 +1,181 @@ #!/usr/bin/ruby # coding: utf-8 -raise ArgumentError, 'Source file not specified' if ARGV.size != 2 +LogLevel=1 +raise ArgumentError, 'Source/destination files not specified' if ARGV.size != 2 srcfile = ARGV[0] dstfile = ARGV[1] data=File.open(srcfile).readlines.map {|l| l.chomp!} -data.map {|lin| lin.gsub!(/<\/span>/, '')} +data.map {|lin| lin.gsub!(/\[\]{#anchor-?\d*}/, '')} -# Join erroneously split paragraphs -['At a minimum, a CC-', +def log(level,what) + indicators = %w(! • # -) + if level >= LogLevel + print indicators[level] + else + print "\n%s %s\n" % ['*' * (level+1), what] + end +end + +# There are several titles that are spread in more than one line. Make +# them into translatable sentences. +log 0, 'Merging multiline sentences ' +[ ['Made', '', 'with', '', 'Creative', '', 'Commons'], + ['The New','', 'World of', '', 'Digital', '', 'Commons'], + ['How', '', 'to Be', '', 'Made with', '', 'Creative', '', 'Commons'], + ['Providing a custom service to consumers of your work *', '\[MARKET-BASED\]*'], + ['Memberships and individual donations', '*\[RECIPROCITY-BASED\]*'], + ['The', '', 'Creative', '', 'Commons', '', 'Licenses'], + ['PLOS','', '(Public Library of Science)'] +].each do |str| + # This should be done more generic, more robust... But before + # burning brain cells, verify if it's needed! + log 1, str + matches=[] + data.each_with_index do |lin, idx| + if lin == str[0] + matches << idx + end + end + if matches.size == 0 + raise RuntimeError, 'Anchor string («%s» for «%s») not found' % + [str[0], str.reject {|word| word==''}.join(' ')] + elsif matches.size > 1 + raise RuntimeError, 'Anchor string (%s) appears multiple times: %s' % + [str[0], matches.map {|i| i.to_s}.join(', ')] + end + log 2, 'Matches %s: %d - %s' % [str.join(' '), matches.size, matches.join(',')] + + len = str.size + at = data.index(str[0]) + joined = str.reject {|word| word==''}.join(' ') + len.times do |offset| + if str[offset] != data[at+offset] + raise RuntimeError, + 'Warning: String does not match ("%s" of "%s", offset %d, book at %d)' % + [str[offset], joined, offset, at+offset] + end + end + + data[at] = joined + (str.size - 1).times { data.delete_at(at+1) } +end + +# Mark up headings: Very artisanal and suboptimal, but should do the +# trick. +# +# For every heading, put here the full string and its heading level, +# as follows: +# +# 1 - Part +# 2 - Chapter +# 3 - Section +# 4 - Subsection +# +# Try to keep this ordered as it appears within the book, as it will +# help us spot omissions and mistakes! +log 0, 'Mark up headings' +[ [2, 'Foreword'], + [2, 'Introduction'], + [1, 'Part 1'], + [1, 'The Big Picture'], + [2, 'The New World of Digital Commons'], + [3, 'The Commons, the Market, and the State'], + [3, 'The Four Aspects of a Resource'], + [4, 'Characteristics'], + [4, 'People and processes'], + [4, 'Norms and rules'], + [4, 'Goals'], + [3, 'A Short History of the Commons'], + [3, 'The Digital Revolution'], + [3, 'The Birth of Creative Commons'], + [3, 'The Changing Market'], + [3, 'Benefits of the Digital Commons'], + [3, 'Our Case Studies'], + [3, 'Notes'], + [2, 'How to Be Made with Creative Commons'], + [3, 'Problem Zero: Getting Discovered'], + [4, 'Use CC to grow a larger audience'], + [4, 'Use CC to get attribution and name recognition'], + [4, 'Use CC-licensed content as a marketing tool'], + [4, 'Use CC to enable hands-on engagement with your work'], + [4, 'Use CC to differentiate yourself'], + [3, 'Making Money'], + [4, 'Market-based revenue streams'], + [4, 'Providing a custom service to consumers of your work * \[MARKET-BASED\]*'], + [4, 'Charging for the physical copy * \[MARKET-BASED\]*'], + [4, 'Charging for the in-person version * \[MARKET-BASED\]*'], + [4, 'Selling merchandise * \[MARKET-BASED\]*'], + [4, 'Charging advertisers or sponsors * \[MARKET-BASED\]*'], + [4, 'Charging your content creators * \[MARKET-BASED\]*'], + [4, 'Charging a transaction fee * \[MARKET-BASED\]*'], + [4, 'Providing a service to your creators* \[MARKET-BASED\]*'], + [4, 'Licensing a trademark* \[MARKET-BASED\]*'], + [4, 'Reciprocity-based revenue streams'], + [4, 'Memberships and individual donations *\[RECIPROCITY-BASED\]*'], + [4, 'The pay-what-you-want model *\[RECIPROCITY-BASED\]*'], + [4, 'Crowdfunding *\[RECIPROCITY-BASED\]*'], + [3, 'Making Human Connections'], + [4, 'Be human'], + [4, 'Be open and accountable'], + [4, 'Design for the good actors'], + [4, 'Treat humans like, well, humans'], + [4, 'State your principles and stick to them'], + [4, 'Build a community'], + [4, 'Give more to the commons than you take'], + [4, 'Involve people in what you do'], + [4, 'Notes'], + [2, 'The Creative Commons Licenses'], + [1, 'Part 2'], + [1, 'The Case Studies'], + [2, 'Arduino'], + [2, 'Ártica'], + [2, 'Blender Institute'], + [2, 'Cards Against Humanity'], + [2, 'The Conversation'], + [2, 'Cory Doctorow'], + [2, 'Figshare'], + [2, 'Figure.NZ'], + [2, 'Knowledge Unlatched'], + [2, 'Lumen Learning'], + [2, 'Jonathan Mann'], + [2, 'Noun Project'], + [2, 'Open Data Institute'], + [2, 'OpenDesk'], + [2, 'OpenStax'], + [2, 'Amanda Palmer'], + [2, 'PLOS (Public Library of Science)'], + [2, 'Rijksmuseum'], + [2, 'Shareable'], + [2, 'Siyavula'], + [2, 'Sparkfun'], + [2, 'TeachAIDS'], + [2, 'Tribe of Noise'], + [2, 'Wikimedia Foundation'], + [2, 'Bibliography'], + [2, 'Acknowledgments'], + +].each do |item| + log 1, item.join(' -> ') + at = data.index {|i| i == item[1]} + if at.nil? + raise RuntimeError, 'Heading string (level %d) not found: «%s»' % item + end + data[at] = '%s %s' % ['#' * item[0], data[at]] +end + + + +# Join erroneously split paragraphs: Write the contents of the line +# _preceding_ the unneeded break, the break will be removed. +log 0, 'Join erroneously split paragraphs' +['content and, in turn, spend money and', + 'still other', + 'content functions as a marketing tool for the paid product or', + 'to the values symbolized by', + 'the kinds of participative communities that drive open', + 'At a minimum, a CC-', 'easier to trust a', 'free download, the', 'openness to fans remixing the game—give', @@ -31,10 +197,12 @@ data.map {|lin| lin.gsub!(/<\/span>/, '')} 'Elizabeth Holloway, Ellen Buecher, Ellen Kaye-', 'Helen', ].each do |line| + log 1, line at = data.index {|i| i == line} if !at.nil? and data[at+1] == '' data.delete_at(at+1) end end +log 0, 'Writing processed file' File.open(dstfile, 'w') {|f| f.puts data.join("\n")}