Add tool to check locale contents.

author Petter Reinholdtsen <pere@hungry.com>

Mon, 20 Dec 2004 19:16:48 +0000 (19:16 +0000)

committer Petter Reinholdtsen <pere@hungry.com>

Mon, 20 Dec 2004 19:16:48 +0000 (19:16 +0000)
author Petter Reinholdtsen <pere@hungry.com>
Mon, 20 Dec 2004 19:16:48 +0000 (19:16 +0000)
committer Petter Reinholdtsen <pere@hungry.com>
Mon, 20 Dec 2004 19:16:48 +0000 (19:16 +0000)
diff --git a/linux/glibc/check-locale b/linux/glibc/check-locale

new file mode 100755 (executable)

index 0000000..44a812a
--- /dev/null
+++ b/linux/glibc/check-locale
@@ -0,0 +1,219 @@
+#!/usr/bin/perl
+#
+# Author: Petter Reinholdtsen
+# Date: 2004-06-01
+#
+# Test the content of GNU libc locales, detect some common errors.
+
+use warnings;
+use strict;
+
+use vars qw($locale $warncount $errcount);
+
+sub uxx_to_utf8 {
+    my $line = shift;
+    $line =~ s/<U([0-9A-Za-z]{4})>/pack('U',hex($1))/ge;
+    return $line;
+}
+
+sub error {
+    my $msg = shift;
+    $errcount++;
+    print "error: $locale: $msg\n";
+}
+
+sub warning {
+    my $msg = shift;
+    $warncount++;
+    print "warning: $locale: $msg\n";
+}
+
+sub check_lc_identification {
+    my @lines = @_;
+    for my $line (@lines) {
+       if ($line =~ m/^category\s+(\S+)$/) {
+           warning "LC_IDENTIFICATION: missing quotes around category standard ref: $1"
+               if ($1 !~ m/\".+\"/);
+       }
+       if ($line =~ m/^email\s+(\S+)\s*$/) {
+           my $email = $1;
+           warning "LC_IDENTIFICATION: obsolete email: $email"
+               if ($email =~ m/"?bug-glibc\@gnu.org"?/);
+       }
+    }
+}
+sub check_lc_paper {
+    my @lines = @_;
+    my $height = undef;
+    my $width = undef;
+    for (@lines) {
+       $height = $1 if (m/^height\s*(\d+)\s*$/);
+       $width = $1 if (m/^width\s*(\d+)\s*$/);
+       return if (m/^copy\s+/); # Nothing to check
+    }
+
+    if (!defined $width || defined $height) {
+#      warning "LC_PAPER: Missing height or width.";
+       return;
+    }
+    
+    if (210 == $width && 297 == $height) {      # ISO A4
+    } elsif (216 == $width && 279 == $height) { # US Letter
+    } else {
+       warning "LC_PAPER: unknown paper size.";
+    }
+}
+
+sub check_lc_measurement {
+    my @lines = @_;
+    for my $line (@lines) {
+       if ($line =~ m/^\s*(measurement)\s+(\S+)\s*$/) {
+           my $value = $2;
+           if (defined $value && $value !~ m/^\d+$/) {
+               warning "LC_MEASUREMENT: measurements should be number 1 or 2.";
+           } elsif  ($value < 1 && 2 < $value) {
+               warning "LC_MEASUREMENT: measurements should be 1 or 2.";
+           }
+       }
+    }
+}
+
+sub check_lc_numeric {
+    my @lines = @_;
+    for my $line (@lines) {
+       next if ($line eq "LC_NUMERIC" || $line eq "END LC_NUMERIC");
+       next if ($line =~ m/^$/);
+       if ($line =~ m/^\s*(grouping)\s+(\S+)\s*$/) {
+           my $value = $2;
+           if ($value =~ m/^-?\d+$/) {
+               # Only digits (or - digits)
+               if ( $value < 1 && $value != -1) {
+                   warning "LC_NUMERIC: grouping should positive or -1: $value";
+               }
+           } else {
+               if (defined $value && $value !~ m/\d+;\d+/) {
+                   warning "LC_NUMERIC: grouping should use ; as separator: $value";
+               }
+           }
+       } elsif ($line =~ m/^\s*(decimal_point)\s+(\S+)\s*$/) {
+       } elsif ($line =~ m/^\s*(thousands_sep)\s+(\S+)\s*$/) {
+           my $sep = $2;
+#          print "P: '$sep'\n";
+           warning "LC_NUMERIC: Unusual thousands_sep '$sep' [".
+               uxx_to_utf8($sep)."]"
+               unless (grep { $_ eq $sep; }  ('"<U0020>"',
+                                              '"<U0027>"',
+                                              '"<U002C>"',
+                                              '"<U002E>"',
+                                              '"<U00A0>"',
+                                              '""'));
+       } elsif ($line =~ m/^\s*(copy)\s+(\S+)\s*$/) {
+       } else {
+           warning "LC_NUMERIC: Unknown keyword '$line'";
+       }
+    }
+}
+
+sub check_lc_messages {
+    my @lines = @_;
+    for (@lines) {
+       if (m/^\s*(yesexpr|noexpr)\s+(.+)$/) {
+           my $type  = $1;
+           my $regex = uxx_to_utf8($2);
+           unless ($regex =~ m/^"\^/) {
+               error "LC_MESSAGES: $type missing '^' prefix: $regex";
+           }
+           unless ($regex =~ m/\[.+\]|\(.+\)/) {
+               error "LC_MESSAGES: $type missing '[.+]|(.+)' content: $regex";
+           }
+           if ($regex =~ m/\.\*"$/) {
+               warning "LC_MESSAGES: $type have '.*' postfix: $regex";
+           }
+           if ($regex =~ m/[0-9]/) {
+               warning "LC_MESSAGES: $type have numbers in regex: $regex";
+           }
+           if ($type eq "yesexpr" && ($regex !~ m/y/ ||
+                                      $regex !~ m/Y/)) {
+               warning "LC_MESSAGES: $type missing 'yY' in content: $regex";
+           }
+           if ($type eq "noexpr" && ($regex !~ m/n/ ||
+                                     $regex !~ m/N/)) {
+               warning "LC_MESSAGES: $type missing 'nN' in content: $regex";
+           }
+       }
+    }
+}
+
+my $section;
+my %sections;
+my @blocks = ();
+for my $filename (@ARGV) {
+    open (FILE, "<$filename") || die "Unable to read $filename";
+    $locale = $filename;
+    my $buf = undef;
+    my @lines;
+    %sections = ();
+    while (<FILE>) {
+       chomp;
+       s/%.*$//; # Remove comments
+       s/\#.*$//;
+       if (m%^(.+)/$%) {
+           $buf .= $1;
+           next;
+       }
+       if (! defined $buf) {
+           $buf = $_;
+       } else {
+           $buf .= $_;
+       }
+       if (!$section && m/^(LC_.*)\s*$/) {
+           $section = $1;
+           @lines = ();
+           #print "Found section $section\n";
+       }
+       push(@lines, $buf);
+       if (m/^END (LC_.+)\s*$/) {
+           if (exists $sections{$section}) {
+               warning "duplicate section $section";
+           }
+           push(@blocks, $section);
+           $sections{$section} = [@lines];
+           $section = "";
+           #print "Stored section $section\n";
+       }
+       undef $buf;
+    }
+    close(FILE);
+
+    check_order(@blocks);
+    check_lc_identification(@{$sections{'LC_IDENTIFICATION'}});
+    check_lc_messages(@{$sections{'LC_MESSAGES'}});
+    check_lc_numeric(@{$sections{'LC_NUMERIC'}});
+    check_lc_paper(@{$sections{'LC_PAPER'}});
+    check_lc_measurement(@{$sections{'LC_MEASUREMENT'}});
+}
+sub check_order {
+    my @blocks = @_;
+    my @order= qw(LC_IDENTIFICATION
+                 LC_CTYPE
+                 LC_COLLATE
+                 LC_MONETARY
+                 LC_NUMERIC
+                 LC_TIME
+                 LC_MESSAGES
+                 LC_PAPER
+                 LC_NAME
+                 LC_ADDRESS
+                 LC_TELEPHONE
+                 LC_MEASUREMENT);
+    my $pos = 0;
+    for my $section (@blocks) {
+       if ($section eq $order[$pos]) {
+           $pos++;
+       } else {
+           $pos = 0;
+           $pos++ while ($order[$pos] ne $section);
+           warning "$section: not following section $order[$pos-1]";
+       }
+    }
+}
author	Petter Reinholdtsen <pere@hungry.com>
	Mon, 20 Dec 2004 19:16:48 +0000 (19:16 +0000)
committer	Petter Reinholdtsen <pere@hungry.com>
	Mon, 20 Dec 2004 19:16:48 +0000 (19:16 +0000)