#!/usr/bin/ruby
# coding: utf-8
raise ArgumentError, 'Source file not specified' if ARGV.size != 2
srcfile = ARGV[0]
dstfile = ARGV[1]

data=File.open(srcfile).readlines.map {|l| l.chomp!}

data.map {|lin| lin.gsub!(/<span id="anchor-?\d*"><\/span>/, '')}

# Join erroneously split paragraphs
['At a minimum, a CC-',
 'easier to trust a',
 'free download, the',
 'openness to fans remixing the game—give',
 'Attribution-',
 'both journal publishers and researchers. Figshare now provides',
 'get the “network effect”—',
 'access to scholarly books. For Frances, the current scholarly-',
 'for-',
 'sales',
 'contributing to the open',
 'doesn’t seem like it should be sung about',
 'songwriter, and he has found a way to keep it interesting for',
 'building trust is the top',
 'license',
 'authors and Shuttleworth; Mark remains incredibly proud of this',
 'BY-SA and opting in others with collecting societies like',
 'Cecilie Maria, Cedric Howe, Cefn Hoile,',
 'Braddlee, Drew Spencer, Duncan',
 'Elizabeth Holloway, Ellen Buecher, Ellen Kaye-',
 'Helen',
].each do |line|
  at = data.index {|i| i == line}
  if !at.nil? and data[at+1] == ''
    data.delete_at(at+1)
  end
end

File.open(dstfile, 'w') {|f| f.puts data.join("\n")}
