#!/usr/bin/ruby
# coding: utf-8
LogLevel=1
raise ArgumentError, 'Source/destination files not specified' if ARGV.size != 2
srcfile = ARGV[0]
dstfile = ARGV[1]

data=File.open(srcfile).readlines.map {|l| l.chomp!}

data.map {|lin| lin.gsub!(/\[\]{#anchor-?\d*}/, '')}

def log(level,what)
  indicators = %w(! • # -)
  if level >= LogLevel
    print indicators[level]
  else
    print "\n%s %s\n" % ['*' * (level+1), what]
  end
end

# There are several titles that are spread in more than one line. Make
# them into translatable sentences.
log 0, 'Merging multiline sentences '
[ ['Made', '', 'with', '', 'Creative', '', 'Commons'],
  ['The New','', 'World of', '', 'Digital', '', 'Commons'],
  ['How', '', 'to Be', '', 'Made with', '', 'Creative', '', 'Commons'],
  ['Providing a custom service to consumers of your work *', '\[MARKET-BASED\]*'],
  ['Memberships and individual donations', '*\[RECIPROCITY-BASED\]*'],
  ['The', '', 'Creative', '', 'Commons', '', 'Licenses'],
  ['jonathanmann.net and', '', 'jonathanmann.bandcamp.com'],
  ['PLOS','', '(Public Library of Science)']
].each do |str|
  # This should be done more generic, more robust... But before
  # burning brain cells, verify if it's needed!
  log 1, str
  matches=[]
  data.each_with_index do |lin, idx|
    if lin == str[0]
      matches << idx
    end
  end
  if matches.size == 0
    raise RuntimeError, 'Anchor string («%s» for «%s») not found' %
                        [str[0], str.reject {|word| word==''}.join(' ')]
  elsif matches.size > 1
    raise RuntimeError, 'Anchor string (%s) appears multiple times: %s' %
                        [str[0], matches.map {|i| i.to_s}.join(', ')]
  end
  log 2, 'Matches %s: %d - %s' % [str.join(' '), matches.size, matches.join(',')]

  len = str.size
  at = data.index(str[0])
  joined = str.reject {|word| word==''}.join(' ')
  len.times do |offset|
    if str[offset] != data[at+offset]
      raise RuntimeError,
            'Warning: String does not match ("%s" of "%s", offset %d, book at %d)' %
            [str[offset], joined, offset, at+offset]
    end
  end

  data[at] = joined
  (str.size - 1).times { data.delete_at(at+1) }
end

log 0, 'correct emphesis in some titles'
data.map {|lin| lin.gsub!(/ *\* \\\[/, ' *\[')}

log 0, 'tag title and author'
data.delete_at(1) # Remove unwanted blank line between title and authors
data[0].sub!(/^/, '% ') # title
data[1].sub!(/^/, '% ') # authors
data[1].sub!(/ and /, ';') # authors

# Mark up headings: Very artisanal and suboptimal, but should do the
# trick.
#
# For every heading, put here the full string and its heading level,
# as follows:
#
# 1 - Part
# 2 - Chapter
# 3 - Section
# 4 - Subsection
#
# Try to keep this ordered as it appears within the book, as it will
# help us spot omissions and mistakes!
log 0, 'Mark up headings'
[ [1, 'Foreword'],
  [1, 'Introduction'],
  [1, 'Part 1'],
  [1, 'The Big Picture'],
  [2, 'The New World of Digital Commons'],
  [3, 'The Commons, the Market, and the State'],
  [3, 'The Four Aspects of a Resource'],
  [4, 'Characteristics'],
  [4, 'People and processes'],
  [4, 'Norms and rules'],
  [4, 'Goals'],
  [3, 'A Short History of the Commons'],
  [3, 'The Digital Revolution'],
  [3, 'The Birth of Creative Commons'],
  [3, 'The Changing Market'],
  [3, 'Benefits of the Digital Commons'],
  [3, 'Our Case Studies'],
  [3, 'Notes'],
  [2, 'How to Be Made with Creative Commons'],
  [3, 'Problem Zero: Getting Discovered'],
  [4, 'Use CC to grow a larger audience'],
  [4, 'Use CC to get attribution and name recognition'],
  [4, 'Use CC-licensed content as a marketing tool'],
  [4, 'Use CC to enable hands-on engagement with your work'],
  [4, 'Use CC to differentiate yourself'],
  [3, 'Making Money'],
  [4, 'Market-based revenue streams'],
  [4, 'Providing a custom service to consumers of your work *\[MARKET-BASED\]*'],
  [4, 'Charging for the physical copy *\[MARKET-BASED\]*'],
  [4, 'Charging for the in-person version *\[MARKET-BASED\]*'],
  [4, 'Selling merchandise *\[MARKET-BASED\]*'],
  [4, 'Charging advertisers or sponsors *\[MARKET-BASED\]*'],
  [4, 'Charging your content creators *\[MARKET-BASED\]*'],
  [4, 'Charging a transaction fee *\[MARKET-BASED\]*'],
  [4, 'Providing a service to your creators *\[MARKET-BASED\]*'],
  [4, 'Licensing a trademark *\[MARKET-BASED\]*'],
  [4, 'Reciprocity-based revenue streams'],
  [4, 'Memberships and individual donations *\[RECIPROCITY-BASED\]*'],
  [4, 'The pay-what-you-want model *\[RECIPROCITY-BASED\]*'],
  [4, 'Crowdfunding *\[RECIPROCITY-BASED\]*'],
  [3, 'Making Human Connections'],
  [4, 'Be human'],
  [4, 'Be open and accountable'],
  [4, 'Design for the good actors'],
  [4, 'Treat humans like, well, humans'],
  [4, 'State your principles and stick to them'],
  [4, 'Build a community'],
  [4, 'Give more to the commons than you take'],
  [4, 'Involve people in what you do'],
  [3, 'Notes'],
  [2, 'The Creative Commons Licenses'],
  [1, 'Part 2'],
  [1, 'The Case Studies'],
  [2, 'Arduino'],
  [2, 'Ártica'],
  [2, 'Blender Institute'],
  [2, 'Cards Against Humanity'],
  [2, 'The Conversation'],
  [2, 'Cory Doctorow'],
  [2, 'Figshare'],
  [2, 'Figure.NZ'],
  [2, 'Knowledge Unlatched'],
  [2, 'Lumen Learning'],
  [2, 'Jonathan Mann'],
  [2, 'Noun Project'],
  [2, 'Open Data Institute'],
  [2, 'OpenDesk'],
  [2, 'OpenStax'],
  [2, 'Amanda Palmer'],
  [2, 'PLOS (Public Library of Science)'],
  [2, 'Rijksmuseum'],
  [2, 'Shareable'],
  [2, 'Siyavula'],
  [2, 'SparkFun'],
  [2, 'TeachAIDS'],
  [2, 'Tribe of Noise'],
  [2, 'Wikimedia Foundation'],
  [1, 'Bibliography'],
  [1, 'Acknowledgments'],
  
].each do |item|
  log 1, item.join(' -> ')
  at = data.index {|i| i == item[1]}
  if at.nil?
    raise RuntimeError, 'Heading string (level %d) not found: «%s»' % item
  end
  data[at] = '%s %s' % ['#' * item[0], data[at]]
end

# We have the explicit strings "Part 1" and "Part 2" as structural
# elements — They are to be generated upon book compilation. Nuke
# them.
data.delete("# Part 1")
data.delete("# Part 2")

log 0, 'add heading to colophon page'
data.insert(data.index('Made With Creative Commons'), '# Colophon {-}')

log 0, 'add dedication as separeate chapter'
data.insert(data.index('“I don’t know a whole lot about nonfiction journalism. . .'), '# Dedication {-}')

# Join erroneously split paragraphs: Write the contents of the line
# _preceding_ the unneeded break, the break will be removed.
#
# I'm noting the line number for each _after_ corrections so it's
# easier to find them; please keep them sorted! :-P
log 0, 'Join erroneously split paragraphs'
['content and, in turn, spend money and', # 1595
 'still other', # 1662
 'content functions as a marketing tool for the paid product or', # 1724
 'lowest-common-denominator solutions and', #2035
 'to the values symbolized by', # 2145
 'the kinds of participative communities that drive open', # 2157
 'time', # 2220
 'At a minimum, a CC-', # 2375
 'easier to trust a', # 2580
 'free download, the', # 3086
 'openness to fans remixing the game—give', # 3087
 'Attribution-', # 3307
 'both journal publishers and researchers. Figshare now provides', # 3672
 'get the “network effect”—', # 4002
 'access to scholarly books. For Frances, the current scholarly-', # 4033
 'for-', # 4288
 'sales', # 4410
 'contributing to the open', # 4438
 'doesn’t seem like it should be sung about', # 4616
 'songwriter, and he has found a way to keep it interesting for', # 4624
 'building trust is the top', # 4793
 'version', # 6023
 'license', # 6169
 'authors and Shuttleworth; Mark remains incredibly proud of this', # 6452
 'BY-SA and opting in others with collecting societies like', # 7218
 'Cecilie Maria, Cedric Howe, Cefn Hoile,', # 7796
 'Braddlee, Drew Spencer, Duncan', # 7839
 'Elizabeth Holloway, Ellen Buecher, Ellen Kaye-', # 7844
 'Helen', # 7874
].each do |line|
  log 1, line
  at = data.index {|i| i == line}
  if !at.nil? and data[at+1] == ''
    data.delete_at(at+1)
  end
end

log 0, 'Mark quote in dedication as quote with attribute in markdown'
start = "“I don’t know a whole lot about nonfiction journalism. . ."
stop = "- David Foster Wallace"
quote=false
data.each_with_index do |lin, idx|
  if lin == stop
    lin.sub!(/^- /, "> &mdash; ")
    lin.sub!("David Foster Wallace", "*David Foster Wallace*")
    quote=false
    break
  end
  if lin == start
    quote=true
  end
  if quote
    lin.sub!(/^/, "> ")
  end
end

log 0, 'Identify and mark footnotes/endnodes'
scope="unknown"
noteblock=false
data.each_with_index do |lin, idx|
  if lin =~ /^## (.+)$/
    scope=$1.gsub(" ", "-")
  end
  # First, mark note reference
  lin.sub!(/([a-z]\.["”]?)(\d+)(\s)/, "\\1[^" + scope + "-\\2]\\3")
  lin.sub!(/([a-z]\.["”]?)(\d+)$/, "\\1[^" + scope + "-\\2]")
  # Next, mark note content, only between /Web Links?|Notes/ and next heading
  if noteblock
    lin.sub!(/^(\d+)\. /, "[^" + scope + "-\\1]: ")
    if lin =~ /^##?.+/
      noteblock=false
    end
  end
  if lin =~ /^(### Notes|Web links?)/
    noteblock=true
  end
end

log 0, 'Turn indented block after use cases into block quotes'
inscope=false
quote=false
data.each_with_index do |lin, idx|
  if quote
    lin.sub!(/^/, "> ")
  end
  if lin =~ /^> Profile written by/
    quote=false
  end
  # To this heading
  if lin =~ /^## Bibliography/
    inscope=false
  end
  # From this heading
  if lin =~ /^## Arduino/
    inscope=true
  end
  if inscope and lin =~ /^## /
    quote=true
    next
  end
end

log 0, 'emphesize keywords'
data.map {|lin| lin.gsub!(/^(>\s*)(Revenue model|Interview date|Interviewees?):/, '\\1**\\2**:')}

log 0, 'make figure sizes relative to text body width while keeping aspect ratio'
data.map {|lin| lin.gsub!(/width="6.5in"/, 'width="100%"')}
data.map {|lin| lin.gsub!(/width="4.198in"/, 'width="40%"')}
data.map {|lin| lin.gsub!(/width="4.1665in"/, 'width="40%"')}
data.map {|lin| lin.gsub!(/height="[0-9.]+in"/, '')}

log 0, 'Writing processed file'
File.open(dstfile, 'w') {|f| f.puts data.join("\n")}