#!/usr/bin/perl use warnings; use strict; my %usedid; my $filename = shift || 'free_culture.lawrence_lessig.sst'; open(my $fh, '<', $filename) || die; my @ranges; my $lastline = "foo"; while (<$fh>) { my $printpara = ""; if (m/^group{/) { while (my $line = <$fh>) { $_ .= $line; last if ($line =~ m/^}group/); } } if (m/^={(.+)}/) { my @entries = split(/;/, $1); for my $entry (@entries) { my @ends = dbentry($entry); push(@ranges, @ends) if @ends; } } if ($lastline =~ m/^$/) { my @newranges; for my $r (@ranges) { $r->{'paracount'} --; if (0 >= $r->{'paracount'}) { my $idx = $r->{'id'}; print "\n"; $printpara = 1; } else { push(@newranges, $r); } } @ranges = @newranges; } $lastline = $_; my $startofline = substr($_, 0, 60); chomp $startofline; print ($startofline,"\n") if ($printpara); } close $fh; sub dbentry { my ($entry) = @_; my $isrange; $entry =~ s/&/&/g; if ($entry =~ m/^(.+)\+(\d+)$/) { $entry = $1; $isrange = $2; } my $block = "$entry"; my @ranges; if ($entry =~ /:/) { my ($primary, $secondary) = split(/:/, $entry); if ($secondary =~ m/\|/) { my @seclist = split(/\|/, $secondary); $secondary = pop @seclist; for my $s (@seclist) { push(@ranges, dbentry("$primary:$s")); } } $block = "$primary$secondary" } if ($isrange) { my $id = $block; $id =~ s/<[^>]+>//g; $id =~ tr/A-Z/a-z/; # print "id1: $id\n"; $id =~ s/[^a-z]//g; # print "id2: $id\n"; my $idx = "idx$id"; my $count = 1; while (exists $usedid{$idx}) { $count++; $idx = "idx$id$count"; } $usedid{$idx} = 1; print "$block\n"; # print "\n"; push(@ranges, {'paracount' => $isrange+1, 'id' => $idx}); } else { print "$block\n"; } return @ranges; }