#!/usr/bin/perl
#
# Author: Petter Reinholdtsen
# Date: 2004-06-01
#
# License: GPL v2 or later at your choice
#
# Test the content of GNU libc locales, detect some common errors.
#
# The latest version is available from
# .
use warnings;
use strict;
use vars qw($locale $warncount $errcount);
sub uxx_to_utf8 {
my $line = shift;
$line =~ s//pack('U',hex($1))/ge;
return $line;
}
sub error {
my $msg = shift;
$errcount++;
print "error: $locale: $msg\n";
}
sub warning {
my $msg = shift;
$warncount++;
print "warning: $locale: $msg\n";
}
sub check_lc_identification {
my @lines = @_;
for my $line (@lines) {
if ($line =~ m/^category\s+(\S+)$/) {
warning "LC_IDENTIFICATION: missing quotes around category standard ref: $1"
if ($1 !~ m/\".+\"/);
}
if ($line =~ m/^email\s+(\S+)\s*$/) {
my $email = $1;
warning "LC_IDENTIFICATION: obsolete email: $email"
if ($email =~ m/"?bug-glibc\@gnu.org"?/);
}
}
}
sub check_lc_paper {
my @lines = @_;
my $height = undef;
my $width = undef;
for (@lines) {
$height = $1 if (m/^height\s*(\d+)\s*$/);
$width = $1 if (m/^width\s*(\d+)\s*$/);
return if (m/^copy\s+/); # Nothing to check
}
if (!defined $width || defined $height) {
# warning "LC_PAPER: Missing height or width.";
return;
}
if (210 == $width && 297 == $height) { # ISO A4
} elsif (216 == $width && 279 == $height) { # US Letter
} else {
warning "LC_PAPER: unknown paper size.";
}
}
sub check_lc_measurement {
my @lines = @_;
for my $line (@lines) {
if ($line =~ m/^\s*(measurement)\s+(\S+)\s*$/) {
my $value = $2;
if (defined $value && $value !~ m/^\d+$/) {
warning "LC_MEASUREMENT: measurements should be number 1 or 2.";
} elsif ($value < 1 && 2 < $value) {
warning "LC_MEASUREMENT: measurements should be 1 or 2.";
}
}
}
}
sub check_lc_numeric {
my @lines = @_;
for my $line (@lines) {
next if ($line eq "LC_NUMERIC" || $line eq "END LC_NUMERIC");
next if ($line =~ m/^$/);
if ($line =~ m/^\s*(grouping)\s+(\S+)\s*$/) {
my $value = $2;
if ($value =~ m/^-?\d+$/) {
# Only digits (or - digits)
if ( $value < 1 && $value != -1) {
warning "LC_NUMERIC: grouping should positive or -1: $value";
}
} else {
if (defined $value && $value !~ m/\d+;\d+/) {
warning "LC_NUMERIC: grouping should use ; as separator: $value";
}
}
} elsif ($line =~ m/^\s*(decimal_point)\s+(\S+)\s*$/) {
} elsif ($line =~ m/^\s*(thousands_sep)\s+(\S+)\s*$/) {
my $sep = $2;
# print "P: '$sep'\n";
warning "LC_NUMERIC: Unusual thousands_sep '$sep' [".
uxx_to_utf8($sep)."]"
unless (grep { $_ eq $sep; } ('""',
'""',
'""',
'""',
'""',
'""'));
} elsif ($line =~ m/^\s*(copy)\s+(\S+)\s*$/) {
} else {
warning "LC_NUMERIC: Unknown keyword '$line'";
}
}
}
sub check_lc_messages {
my @lines = @_;
for (@lines) {
if (m/^\s*(yesexpr|noexpr)\s+(.+)$/) {
my $type = $1;
my $regex = uxx_to_utf8($2);
unless ($regex =~ m/^"\^/) {
error "LC_MESSAGES: $type missing '^' prefix: $regex";
}
unless ($regex =~ m/\[.+\]|\(.+\)/) {
error "LC_MESSAGES: $type missing '[.+]|(.+)' content: $regex";
}
if ($regex =~ m/\.\*"$/) {
warning "LC_MESSAGES: $type have '.*' postfix: $regex";
}
if ($regex =~ m/[0-9]/) {
warning "LC_MESSAGES: $type have numbers in regex: $regex";
}
if ($type eq "yesexpr" && ($regex !~ m/y/ ||
$regex !~ m/Y/)) {
warning "LC_MESSAGES: $type missing 'yY' in content: $regex";
}
if ($type eq "noexpr" && ($regex !~ m/n/ ||
$regex !~ m/N/)) {
warning "LC_MESSAGES: $type missing 'nN' in content: $regex";
}
}
}
}
sub check_order {
my @blocks = @_;
my @order= qw(LC_IDENTIFICATION
LC_CTYPE
LC_COLLATE
LC_MONETARY
LC_NUMERIC
LC_TIME
LC_MESSAGES
LC_PAPER
LC_NAME
LC_ADDRESS
LC_TELEPHONE
LC_MEASUREMENT);
my $pos = 0;
for my $section (@blocks) {
if ($section eq $order[$pos]) {
$pos++;
} else {
$pos = 0;
$pos++ while ($order[$pos] ne $section);
warning "$section: not following section $order[$pos-1]";
}
}
}
sub check_charset {
my @blocks = @_;
for my $section (@blocks) {
if ($section =~ m/^%\s*[Cc]arset:\s*$/) {
return;
}
}
warning "Missing '% Charset: ' info";
}
my $section;
my %sections;
my @blocks = ();
for my $filename (@ARGV) {
open (FILE, "<$filename") || die "Unable to read $filename";
$locale = $filename;
my $buf = undef;
my @lines;
%sections = ();
while () {
chomp;
s/%.*$//; # Remove comments
s/\#.*$//;
if (m%^(.+)/$%) {
$buf .= $1;
next;
}
if (! defined $buf) {
$buf = $_;
} else {
$buf .= $_;
}
if (!$section && m/^(LC_.*)\s*$/) {
$section = $1;
@lines = ();
#print "Found section $section\n";
}
push(@lines, $buf);
if (m/^END (LC_.+)\s*$/) {
if (exists $sections{$section}) {
warning "duplicate section $section";
}
push(@blocks, $section);
$sections{$section} = [@lines];
$section = "";
#print "Stored section $section\n";
}
undef $buf;
}
close(FILE);
# check_order(@blocks);
check_charset(@blocks);
check_lc_identification(@{$sections{'LC_IDENTIFICATION'}});
check_lc_messages(@{$sections{'LC_MESSAGES'}});
check_lc_numeric(@{$sections{'LC_NUMERIC'}});
check_lc_paper(@{$sections{'LC_PAPER'}});
check_lc_measurement(@{$sections{'LC_MEASUREMENT'}});
}