]> pere.pagekite.me Git - text-free-culture-lessig.git/blob - scripts/sisu2dbindex
Fix grammar and spelling errors: 2nd pass.
[text-free-culture-lessig.git] / scripts / sisu2dbindex
1 #!/usr/bin/perl
2
3 use warnings;
4 use strict;
5
6 my %usedid;
7 my $filename = shift || 'free_culture.lawrence_lessig.sst';
8
9 open(my $fh, '<', $filename) || die;
10
11 my @ranges;
12 my $lastline = "foo";
13 while (<$fh>) {
14 my $printpara = "";
15 if (m/^group{/) {
16 while (my $line = <$fh>) {
17 $_ .= $line;
18 last if ($line =~ m/^}group/);
19 }
20 }
21 if (m/^={(.+)}/) {
22 my @entries = split(/;/, $1);
23 for my $entry (@entries) {
24 my @ends = dbentry($entry);
25 push(@ranges, @ends) if @ends;
26 }
27 }
28 if ($lastline =~ m/^$/) {
29 my @newranges;
30 for my $r (@ranges) {
31 $r->{'paracount'} --;
32 if (0 >= $r->{'paracount'}) {
33 my $idx = $r->{'id'};
34 print "<indexterm startref='$idx' class='endofrange'/>\n";
35 $printpara = 1;
36 } else {
37 push(@newranges, $r);
38 }
39 }
40 @ranges = @newranges;
41 }
42 $lastline = $_;
43 my $startofline = substr($_, 0, 60);
44 chomp $startofline;
45 print ($startofline,"\n") if ($printpara);
46 }
47 close $fh;
48
49 sub dbentry {
50 my ($entry) = @_;
51 my $isrange;
52 $entry =~ s/&/&amp;/g;
53 if ($entry =~ m/^(.+)\+(\d+)$/) {
54 $entry = $1;
55 $isrange = $2;
56 }
57 my $block = "<primary>$entry</primary>";
58 my @ranges;
59 if ($entry =~ /:/) {
60 my ($primary, $secondary) = split(/:/, $entry);
61 if ($secondary =~ m/\|/) {
62 my @seclist = split(/\|/, $secondary);
63 $secondary = pop @seclist;
64 for my $s (@seclist) {
65 push(@ranges, dbentry("$primary:$s"));
66 }
67 }
68 $block = "<primary>$primary</primary><secondary>$secondary</secondary>"
69 }
70
71 if ($isrange) {
72 my $id = $block;
73 $id =~ s/<[^>]+>//g;
74 $id =~ tr/A-Z/a-z/;
75 # print "id1: $id\n";
76 $id =~ s/[^a-z]//g;
77 # print "id2: $id\n";
78 my $idx = "idx$id";
79 my $count = 1;
80 while (exists $usedid{$idx}) {
81 $count++;
82 $idx = "idx$id$count";
83 }
84 $usedid{$idx} = 1;
85 print "<indexterm id='$idx' class='startofrange'>$block</indexterm>\n";
86 # print "<indexterm startref='$idx+$isrange' class='endofrange'/>\n";
87 push(@ranges, {'paracount' => $isrange+1, 'id' => $idx});
88 } else {
89 print "<indexterm>$block</indexterm>\n";
90 }
91 return @ranges;
92 }