]> pere.pagekite.me Git - homepage.git/blob - linux/glibc/check-locale
Link til gmanespam.
[homepage.git] / linux / glibc / check-locale
1 #!/usr/bin/perl
2 #
3 # Author: Petter Reinholdtsen
4 # Date: 2004-06-01
5 #
6 # Test the content of GNU libc locales, detect some common errors.
7 #
8 # The latest version is available from
9 # <URL:http://www.student.uit.no/~pere/linux/glibc/>.
10
11 use warnings;
12 use strict;
13
14 use vars qw($locale $warncount $errcount);
15
16 sub uxx_to_utf8 {
17 my $line = shift;
18 $line =~ s/<U([0-9A-Za-z]{4})>/pack('U',hex($1))/ge;
19 return $line;
20 }
21
22 sub error {
23 my $msg = shift;
24 $errcount++;
25 print "error: $locale: $msg\n";
26 }
27
28 sub warning {
29 my $msg = shift;
30 $warncount++;
31 print "warning: $locale: $msg\n";
32 }
33
34 sub check_lc_identification {
35 my @lines = @_;
36 for my $line (@lines) {
37 if ($line =~ m/^category\s+(\S+)$/) {
38 warning "LC_IDENTIFICATION: missing quotes around category standard ref: $1"
39 if ($1 !~ m/\".+\"/);
40 }
41 if ($line =~ m/^email\s+(\S+)\s*$/) {
42 my $email = $1;
43 warning "LC_IDENTIFICATION: obsolete email: $email"
44 if ($email =~ m/"?bug-glibc\@gnu.org"?/);
45 }
46 }
47 }
48 sub check_lc_paper {
49 my @lines = @_;
50 my $height = undef;
51 my $width = undef;
52 for (@lines) {
53 $height = $1 if (m/^height\s*(\d+)\s*$/);
54 $width = $1 if (m/^width\s*(\d+)\s*$/);
55 return if (m/^copy\s+/); # Nothing to check
56 }
57
58 if (!defined $width || defined $height) {
59 # warning "LC_PAPER: Missing height or width.";
60 return;
61 }
62
63 if (210 == $width && 297 == $height) { # ISO A4
64 } elsif (216 == $width && 279 == $height) { # US Letter
65 } else {
66 warning "LC_PAPER: unknown paper size.";
67 }
68 }
69
70 sub check_lc_measurement {
71 my @lines = @_;
72 for my $line (@lines) {
73 if ($line =~ m/^\s*(measurement)\s+(\S+)\s*$/) {
74 my $value = $2;
75 if (defined $value && $value !~ m/^\d+$/) {
76 warning "LC_MEASUREMENT: measurements should be number 1 or 2.";
77 } elsif ($value < 1 && 2 < $value) {
78 warning "LC_MEASUREMENT: measurements should be 1 or 2.";
79 }
80 }
81 }
82 }
83
84 sub check_lc_numeric {
85 my @lines = @_;
86 for my $line (@lines) {
87 next if ($line eq "LC_NUMERIC" || $line eq "END LC_NUMERIC");
88 next if ($line =~ m/^$/);
89 if ($line =~ m/^\s*(grouping)\s+(\S+)\s*$/) {
90 my $value = $2;
91 if ($value =~ m/^-?\d+$/) {
92 # Only digits (or - digits)
93 if ( $value < 1 && $value != -1) {
94 warning "LC_NUMERIC: grouping should positive or -1: $value";
95 }
96 } else {
97 if (defined $value && $value !~ m/\d+;\d+/) {
98 warning "LC_NUMERIC: grouping should use ; as separator: $value";
99 }
100 }
101 } elsif ($line =~ m/^\s*(decimal_point)\s+(\S+)\s*$/) {
102 } elsif ($line =~ m/^\s*(thousands_sep)\s+(\S+)\s*$/) {
103 my $sep = $2;
104 # print "P: '$sep'\n";
105 warning "LC_NUMERIC: Unusual thousands_sep '$sep' [".
106 uxx_to_utf8($sep)."]"
107 unless (grep { $_ eq $sep; } ('"<U0020>"',
108 '"<U0027>"',
109 '"<U002C>"',
110 '"<U002E>"',
111 '"<U00A0>"',
112 '""'));
113 } elsif ($line =~ m/^\s*(copy)\s+(\S+)\s*$/) {
114 } else {
115 warning "LC_NUMERIC: Unknown keyword '$line'";
116 }
117 }
118 }
119
120 sub check_lc_messages {
121 my @lines = @_;
122 for (@lines) {
123 if (m/^\s*(yesexpr|noexpr)\s+(.+)$/) {
124 my $type = $1;
125 my $regex = uxx_to_utf8($2);
126 unless ($regex =~ m/^"\^/) {
127 error "LC_MESSAGES: $type missing '^' prefix: $regex";
128 }
129 unless ($regex =~ m/\[.+\]|\(.+\)/) {
130 error "LC_MESSAGES: $type missing '[.+]|(.+)' content: $regex";
131 }
132 if ($regex =~ m/\.\*"$/) {
133 warning "LC_MESSAGES: $type have '.*' postfix: $regex";
134 }
135 if ($regex =~ m/[0-9]/) {
136 warning "LC_MESSAGES: $type have numbers in regex: $regex";
137 }
138 if ($type eq "yesexpr" && ($regex !~ m/y/ ||
139 $regex !~ m/Y/)) {
140 warning "LC_MESSAGES: $type missing 'yY' in content: $regex";
141 }
142 if ($type eq "noexpr" && ($regex !~ m/n/ ||
143 $regex !~ m/N/)) {
144 warning "LC_MESSAGES: $type missing 'nN' in content: $regex";
145 }
146 }
147 }
148 }
149
150 my $section;
151 my %sections;
152 my @blocks = ();
153 for my $filename (@ARGV) {
154 open (FILE, "<$filename") || die "Unable to read $filename";
155 $locale = $filename;
156 my $buf = undef;
157 my @lines;
158 %sections = ();
159 while (<FILE>) {
160 chomp;
161 s/%.*$//; # Remove comments
162 s/\#.*$//;
163 if (m%^(.+)/$%) {
164 $buf .= $1;
165 next;
166 }
167 if (! defined $buf) {
168 $buf = $_;
169 } else {
170 $buf .= $_;
171 }
172 if (!$section && m/^(LC_.*)\s*$/) {
173 $section = $1;
174 @lines = ();
175 #print "Found section $section\n";
176 }
177 push(@lines, $buf);
178 if (m/^END (LC_.+)\s*$/) {
179 if (exists $sections{$section}) {
180 warning "duplicate section $section";
181 }
182 push(@blocks, $section);
183 $sections{$section} = [@lines];
184 $section = "";
185 #print "Stored section $section\n";
186 }
187 undef $buf;
188 }
189 close(FILE);
190
191 check_order(@blocks);
192 check_lc_identification(@{$sections{'LC_IDENTIFICATION'}});
193 check_lc_messages(@{$sections{'LC_MESSAGES'}});
194 check_lc_numeric(@{$sections{'LC_NUMERIC'}});
195 check_lc_paper(@{$sections{'LC_PAPER'}});
196 check_lc_measurement(@{$sections{'LC_MEASUREMENT'}});
197 }
198 sub check_order {
199 my @blocks = @_;
200 my @order= qw(LC_IDENTIFICATION
201 LC_CTYPE
202 LC_COLLATE
203 LC_MONETARY
204 LC_NUMERIC
205 LC_TIME
206 LC_MESSAGES
207 LC_PAPER
208 LC_NAME
209 LC_ADDRESS
210 LC_TELEPHONE
211 LC_MEASUREMENT);
212 my $pos = 0;
213 for my $section (@blocks) {
214 if ($section eq $order[$pos]) {
215 $pos++;
216 } else {
217 $pos = 0;
218 $pos++ while ($order[$pos] ne $section);
219 warning "$section: not following section $order[$pos-1]";
220 }
221 }
222 }