From: Petter Reinholdtsen <pere@hungry.com>
Date: Tue, 29 Jul 2014 10:27:05 +0000 (+0200)
Subject: Document how to compare index entires to the sisu version.
X-Git-Tag: edition-2015-10-10~623
X-Git-Url: https://pere.pagekite.me/gitweb/text-free-culture-lessig.git/commitdiff_plain/ef8fdbd10a961bbcebde75b065d85257236960d2?ds=sidebyside

Document how to compare index entires to the sisu version.
---

diff --git a/README.md b/README.md
index 2540599..5c4be4b 100644
--- a/README.md
+++ b/README.md
@@ -80,5 +80,5 @@ Todo
    is easiest).
  * Complete the proof reading.
  * Publish first edition.
-
- (* Add missing index entries.)
+ * (Add missing index entries.  Nice to have, not vital.  See
+   scripts/idxcompare for helper script.)
diff --git a/scripts/idxcompare b/scripts/idxcompare
new file mode 100755
index 0000000..f5d6f82
--- /dev/null
+++ b/scripts/idxcompare
@@ -0,0 +1,12 @@
+#!/bin/sh
+clean() {
+#    grep -v class= |
+#    sed 's/ id=.[^ ]*. class/ class/' |
+#    sed 's/ startref=.[^ ]*. class/ class/'
+#    grep -v 'startref=' |
+    cat
+}
+bindir="$(dirname $0)"
+$bindir/sisu2dbindex | clean > index-sisu-summary
+grep '<indexterm' freeculture.xml | clean > index-xml-summary
+diff -u index-sisu-summary index-xml-summary |less
diff --git a/scripts/sisu2dbindex b/scripts/sisu2dbindex
new file mode 100755
index 0000000..e2c3ebc
--- /dev/null
+++ b/scripts/sisu2dbindex
@@ -0,0 +1,92 @@
+#!/usr/bin/perl
+
+use warnings;
+use strict;
+
+my %usedid;
+my $filename = shift || 'free_culture.lawrence_lessig.sst';
+
+open(my $fh, '<', $filename) || die;
+
+my @ranges;
+my $lastline = "foo";
+while (<$fh>) {
+    my $printpara = "";
+    if (m/^group{/) {
+        while (my $line = <$fh>) {
+            $_ .= $line;
+            last if ($line =~ m/^}group/);
+        }
+    }
+    if (m/^={(.+)}/) {
+        my @entries = split(/;/, $1);
+        for my $entry (@entries) {
+            my @ends = dbentry($entry);
+            push(@ranges, @ends) if @ends;
+        }
+    }
+    if ($lastline =~ m/^$/) {
+        my @newranges;
+        for my $r (@ranges) {
+            $r->{'paracount'} --;
+            if (0 >= $r->{'paracount'}) {
+                my $idx = $r->{'id'};
+                print "<indexterm startref='$idx' class='endofrange'/>\n";
+                $printpara = 1;
+            } else {
+                push(@newranges, $r);
+            }
+        }
+        @ranges = @newranges;
+    }
+    $lastline = $_;
+    my $startofline = substr($_, 0, 60);
+    chomp $startofline;
+    print ($startofline,"\n") if ($printpara);
+}
+close $fh;
+
+sub dbentry {
+    my ($entry) = @_;
+    my $isrange;
+    $entry =~ s/&/&amp;/g;
+    if ($entry =~ m/^(.+)\+(\d+)$/) {
+        $entry = $1;
+        $isrange = $2;
+    }
+    my $block = "<primary>$entry</primary>";
+    my @ranges;
+    if ($entry =~ /:/) {
+        my ($primary, $secondary) = split(/:/, $entry);
+        if ($secondary =~ m/\|/) {
+            my @seclist = split(/\|/, $secondary);
+            $secondary = pop @seclist;
+            for my $s (@seclist) {
+                push(@ranges, dbentry("$primary:$s"));
+            }
+        }
+        $block = "<primary>$primary</primary><secondary>$secondary</secondary>"
+    }
+
+    if ($isrange) {
+        my $id = $block;
+        $id =~ s/<[^>]+>//g;
+        $id =~ tr/A-Z/a-z/;
+#        print "id1: $id\n";
+        $id =~ s/[^a-z]//g;
+#        print "id2: $id\n";
+        my $idx = "idx$id";
+        my $count = 1;
+        while (exists $usedid{$idx}) {
+            $count++;
+            $idx = "idx$id$count";
+        }
+        $usedid{$idx} = 1;
+        print "<indexterm id='$idx' class='startofrange'>$block</indexterm>\n";
+#        print "<indexterm startref='$idx+$isrange' class='endofrange'/>\n";
+        push(@ranges, {'paracount' => $isrange+1, 'id' => $idx});
+    } else {
+        print "<indexterm>$block</indexterm>\n";
+    }
+    return @ranges;
+}