From: Petter Reinholdtsen Date: Tue, 29 Jul 2014 10:27:05 +0000 (+0200) Subject: Document how to compare index entires to the sisu version. X-Git-Tag: edition-2015-10-10~623 X-Git-Url: https://pere.pagekite.me/gitweb/text-free-culture-lessig.git/commitdiff_plain/ef8fdbd10a961bbcebde75b065d85257236960d2?ds=sidebyside Document how to compare index entires to the sisu version. --- diff --git a/README.md b/README.md index 2540599..5c4be4b 100644 --- a/README.md +++ b/README.md @@ -80,5 +80,5 @@ Todo is easiest). * Complete the proof reading. * Publish first edition. - - (* Add missing index entries.) + * (Add missing index entries. Nice to have, not vital. See + scripts/idxcompare for helper script.) diff --git a/scripts/idxcompare b/scripts/idxcompare new file mode 100755 index 0000000..f5d6f82 --- /dev/null +++ b/scripts/idxcompare @@ -0,0 +1,12 @@ +#!/bin/sh +clean() { +# grep -v class= | +# sed 's/ id=.[^ ]*. class/ class/' | +# sed 's/ startref=.[^ ]*. class/ class/' +# grep -v 'startref=' | + cat +} +bindir="$(dirname $0)" +$bindir/sisu2dbindex | clean > index-sisu-summary +grep ' index-xml-summary +diff -u index-sisu-summary index-xml-summary |less diff --git a/scripts/sisu2dbindex b/scripts/sisu2dbindex new file mode 100755 index 0000000..e2c3ebc --- /dev/null +++ b/scripts/sisu2dbindex @@ -0,0 +1,92 @@ +#!/usr/bin/perl + +use warnings; +use strict; + +my %usedid; +my $filename = shift || 'free_culture.lawrence_lessig.sst'; + +open(my $fh, '<', $filename) || die; + +my @ranges; +my $lastline = "foo"; +while (<$fh>) { + my $printpara = ""; + if (m/^group{/) { + while (my $line = <$fh>) { + $_ .= $line; + last if ($line =~ m/^}group/); + } + } + if (m/^={(.+)}/) { + my @entries = split(/;/, $1); + for my $entry (@entries) { + my @ends = dbentry($entry); + push(@ranges, @ends) if @ends; + } + } + if ($lastline =~ m/^$/) { + my @newranges; + for my $r (@ranges) { + $r->{'paracount'} --; + if (0 >= $r->{'paracount'}) { + my $idx = $r->{'id'}; + print "\n"; + $printpara = 1; + } else { + push(@newranges, $r); + } + } + @ranges = @newranges; + } + $lastline = $_; + my $startofline = substr($_, 0, 60); + chomp $startofline; + print ($startofline,"\n") if ($printpara); +} +close $fh; + +sub dbentry { + my ($entry) = @_; + my $isrange; + $entry =~ s/&/&/g; + if ($entry =~ m/^(.+)\+(\d+)$/) { + $entry = $1; + $isrange = $2; + } + my $block = "$entry"; + my @ranges; + if ($entry =~ /:/) { + my ($primary, $secondary) = split(/:/, $entry); + if ($secondary =~ m/\|/) { + my @seclist = split(/\|/, $secondary); + $secondary = pop @seclist; + for my $s (@seclist) { + push(@ranges, dbentry("$primary:$s")); + } + } + $block = "$primary$secondary" + } + + if ($isrange) { + my $id = $block; + $id =~ s/<[^>]+>//g; + $id =~ tr/A-Z/a-z/; +# print "id1: $id\n"; + $id =~ s/[^a-z]//g; +# print "id2: $id\n"; + my $idx = "idx$id"; + my $count = 1; + while (exists $usedid{$idx}) { + $count++; + $idx = "idx$id$count"; + } + $usedid{$idx} = 1; + print "$block\n"; +# print "\n"; + push(@ranges, {'paracount' => $isrange+1, 'id' => $idx}); + } else { + print "$block\n"; + } + return @ranges; +}