X-Git-Url: https://pere.pagekite.me/gitweb/text-madewithcc.git/blobdiff_plain/5ae54b9934be83eacfb5b1a66908b452e870c567..96afcb1558dc71921389416cf0e0ba4aec0e8ae3:/fixup.rb diff --git a/fixup.rb b/fixup.rb index 595a8c8..d15b243 100644 --- a/fixup.rb +++ b/fixup.rb @@ -1,38 +1,52 @@ #!/usr/bin/ruby # coding: utf-8 +LogLevel=1 raise ArgumentError, 'Source/destination files not specified' if ARGV.size != 2 srcfile = ARGV[0] dstfile = ARGV[1] data=File.open(srcfile).readlines.map {|l| l.chomp!} -data.map {|lin| lin.gsub!(/<\/span>/, '')} +data.map {|lin| lin.gsub!(/\[\]{#anchor-?\d*}/, '')} + +def log(level,what) + indicators = %w(! • # -) + if level >= LogLevel + print indicators[level] + else + print "\n%s %s\n" % ['*' * (level+1), what] + end +end # There are several titles that are spread in more than one line. Make # them into translatable sentences. +log 0, 'Merging multiline sentences ' [ ['Made', '', 'with', '', 'Creative', '', 'Commons'], ['The New','', 'World of', '', 'Digital', '', 'Commons'], ['How', '', 'to Be', '', 'Made with', '', 'Creative', '', 'Commons'], - ['Use CC to get attribution and name', 'recognition'], - ['Use CC to enable hands-on engagement with', 'your work'], - ['Providing a custom service to consumers of', 'your work * \[MARKET-BASED\]*'], - ['Charging for the physical copy *', '\[MARKET-BASED\]*'], - ['Charging for the in-person version *','\[MARKET-BASED\]*'], - ['Charging advertisers or sponsors *', '\[MARKET-BASED\]*'], - ['Charging your content creators *', '\[MARKET-BASED\]*'], - ['Charging a transaction fee *', '\[MARKET-BASED\]*'], - ['Providing a service to your creators*', '\[MARKET-BASED\]*'], + ['Providing a custom service to consumers of your work *', '\[MARKET-BASED\]*'], ['Memberships and individual donations', '*\[RECIPROCITY-BASED\]*'], - ['The pay-what-you-want model', '*\[RECIPROCITY-BASED\]*'], ['The', '', 'Creative', '', 'Commons', '', 'Licenses'], + ['jonathanmann.net and', '', 'jonathanmann.bandcamp.com'], ['PLOS','', '(Public Library of Science)'] ].each do |str| # This should be done more generic, more robust... But before # burning brain cells, verify if it's needed! - if (data.select {|i| i == str[0]}.size != 1) - raise RuntimeError, 'First string appears multiple times: %s (%s)' % - [str.reject{|i| i==''}.join(' '), str[0]] + log 1, str + matches=[] + data.each_with_index do |lin, idx| + if lin == str[0] + matches << idx + end end + if matches.size == 0 + raise RuntimeError, 'Anchor string («%s» for «%s») not found' % + [str[0], str.reject {|word| word==''}.join(' ')] + elsif matches.size > 1 + raise RuntimeError, 'Anchor string (%s) appears multiple times: %s' % + [str[0], matches.map {|i| i.to_s}.join(', ')] + end + log 2, 'Matches %s: %d - %s' % [str.join(' '), matches.size, matches.join(',')] len = str.size at = data.index(str[0]) @@ -62,6 +76,7 @@ end # # Try to keep this ordered as it appears within the book, as it will # help us spot omissions and mistakes! +log 0, 'Mark up headings' [ [2, 'Foreword'], [2, 'Introduction'], [1, 'Part 1'], @@ -111,7 +126,7 @@ end [4, 'Build a community'], [4, 'Give more to the commons than you take'], [4, 'Involve people in what you do'], - [4, 'Notes'], + [3, 'Notes'], [2, 'The Creative Commons Licenses'], [1, 'Part 2'], [1, 'The Case Studies'], @@ -143,6 +158,7 @@ end [2, 'Acknowledgments'], ].each do |item| + log 1, item.join(' -> ') at = data.index {|i| i == item[1]} if at.nil? raise RuntimeError, 'Heading string (level %d) not found: «%s»' % item @@ -150,39 +166,54 @@ end data[at] = '%s %s' % ['#' * item[0], data[at]] end +# We have the explicit strings "Part 1" and "Part 2" as structural +# elements — They are to be generated upon book compilation. Nuke +# them. +data.delete("# Part 1") +data.delete("# Part 2") - -# Join erroneously split paragraphs -['content and, in turn, spend money and', - 'still other', - 'to the values symbolized by', - 'the kinds of participative communities that drive open', - 'At a minimum, a CC-', - 'easier to trust a', - 'free download, the', - 'openness to fans remixing the game—give', - 'Attribution-', - 'both journal publishers and researchers. Figshare now provides', - 'get the “network effect”—', - 'access to scholarly books. For Frances, the current scholarly-', - 'for-', - 'sales', - 'contributing to the open', - 'doesn’t seem like it should be sung about', - 'songwriter, and he has found a way to keep it interesting for', - 'building trust is the top', - 'license', - 'authors and Shuttleworth; Mark remains incredibly proud of this', - 'BY-SA and opting in others with collecting societies like', - 'Cecilie Maria, Cedric Howe, Cefn Hoile,', - 'Braddlee, Drew Spencer, Duncan', - 'Elizabeth Holloway, Ellen Buecher, Ellen Kaye-', - 'Helen', +# Join erroneously split paragraphs: Write the contents of the line +# _preceding_ the unneeded break, the break will be removed. +# +# I'm noting the line number for each _after_ corrections so it's +# easier to find them; please keep them sorted! :-P +log 0, 'Join erroneously split paragraphs' +['content and, in turn, spend money and', # 1595 + 'still other', # 1662 + 'content functions as a marketing tool for the paid product or', # 1724 + 'lowest-common-denominator solutions and', #2035 + 'to the values symbolized by', # 2145 + 'the kinds of participative communities that drive open', # 2157 + 'time', # 2220 + 'At a minimum, a CC-', # 2375 + 'easier to trust a', # 2580 + 'free download, the', # 3086 + 'openness to fans remixing the game—give', # 3087 + 'Attribution-', # 3307 + 'both journal publishers and researchers. Figshare now provides', # 3672 + 'get the “network effect”—', # 4002 + 'access to scholarly books. For Frances, the current scholarly-', # 4033 + 'for-', # 4288 + 'sales', # 4410 + 'contributing to the open', # 4438 + 'doesn’t seem like it should be sung about', # 4616 + 'songwriter, and he has found a way to keep it interesting for', # 4624 + 'building trust is the top', # 4793 + 'version', # 6023 + 'license', # 6169 + 'authors and Shuttleworth; Mark remains incredibly proud of this', # 6452 + 'BY-SA and opting in others with collecting societies like', # 7218 + 'Cecilie Maria, Cedric Howe, Cefn Hoile,', # 7796 + 'Braddlee, Drew Spencer, Duncan', # 7839 + 'Elizabeth Holloway, Ellen Buecher, Ellen Kaye-', # 7844 + 'Helen', # 7874 ].each do |line| + log 1, line at = data.index {|i| i == line} if !at.nil? and data[at+1] == '' data.delete_at(at+1) end end +log 0, 'Writing processed file' File.open(dstfile, 'w') {|f| f.puts data.join("\n")}