]> pere.pagekite.me Git - homepage.git/blob - linux/glibc/check-locale
Generated.
[homepage.git] / linux / glibc / check-locale
1 #!/usr/bin/perl
2 #
3 # Author: Petter Reinholdtsen
4 # Date: 2004-06-01
5 #
6 # License: GPL v2 or later at your choice
7 #
8 # Test the content of GNU libc locales, detect some common errors.
9 #
10 # The latest version is available from
11 # <URL:http://www.hungry.com/~pere/linux/glibc/>.
12
13 use warnings;
14 use strict;
15
16 use vars qw($locale $warncount $errcount);
17
18 sub uxx_to_utf8 {
19 my $line = shift;
20 $line =~ s/<U([0-9A-Za-z]{4})>/pack('U',hex($1))/ge;
21 return $line;
22 }
23
24 sub error {
25 my $msg = shift;
26 $errcount++;
27 print "error: $locale: $msg\n";
28 }
29
30 sub warning {
31 my $msg = shift;
32 $warncount++;
33 print "warning: $locale: $msg\n";
34 }
35
36 sub check_lc_identification {
37 my @lines = @_;
38 for my $line (@lines) {
39 if ($line =~ m/^category\s+(\S+)$/) {
40 warning "LC_IDENTIFICATION: missing quotes around category standard ref: $1"
41 if ($1 !~ m/\".+\"/);
42 }
43 if ($line =~ m/^email\s+(\S+)\s*$/) {
44 my $email = $1;
45 warning "LC_IDENTIFICATION: obsolete email: $email"
46 if ($email =~ m/"?bug-glibc\@gnu.org"?/);
47 }
48 }
49 }
50 sub check_lc_paper {
51 my @lines = @_;
52 my $height = undef;
53 my $width = undef;
54 for (@lines) {
55 $height = $1 if (m/^height\s*(\d+)\s*$/);
56 $width = $1 if (m/^width\s*(\d+)\s*$/);
57 return if (m/^copy\s+/); # Nothing to check
58 }
59
60 if (!defined $width || defined $height) {
61 # warning "LC_PAPER: Missing height or width.";
62 return;
63 }
64
65 if (210 == $width && 297 == $height) { # ISO A4
66 } elsif (216 == $width && 279 == $height) { # US Letter
67 } else {
68 warning "LC_PAPER: unknown paper size.";
69 }
70 }
71
72 sub check_lc_measurement {
73 my @lines = @_;
74 for my $line (@lines) {
75 if ($line =~ m/^\s*(measurement)\s+(\S+)\s*$/) {
76 my $value = $2;
77 if (defined $value && $value !~ m/^\d+$/) {
78 warning "LC_MEASUREMENT: measurements should be number 1 or 2.";
79 } elsif ($value < 1 && 2 < $value) {
80 warning "LC_MEASUREMENT: measurements should be 1 or 2.";
81 }
82 }
83 }
84 }
85
86 sub check_lc_numeric {
87 my @lines = @_;
88 for my $line (@lines) {
89 next if ($line eq "LC_NUMERIC" || $line eq "END LC_NUMERIC");
90 next if ($line =~ m/^$/);
91 if ($line =~ m/^\s*(grouping)\s+(\S+)\s*$/) {
92 my $value = $2;
93 if ($value =~ m/^-?\d+$/) {
94 # Only digits (or - digits)
95 if ( $value < 1 && $value != -1) {
96 warning "LC_NUMERIC: grouping should positive or -1: $value";
97 }
98 } else {
99 if (defined $value && $value !~ m/\d+;\d+/) {
100 warning "LC_NUMERIC: grouping should use ; as separator: $value";
101 }
102 }
103 } elsif ($line =~ m/^\s*(decimal_point)\s+(\S+)\s*$/) {
104 } elsif ($line =~ m/^\s*(thousands_sep)\s+(\S+)\s*$/) {
105 my $sep = $2;
106 # print "P: '$sep'\n";
107 warning "LC_NUMERIC: Unusual thousands_sep '$sep' [".
108 uxx_to_utf8($sep)."]"
109 unless (grep { $_ eq $sep; } ('"<U0020>"',
110 '"<U0027>"',
111 '"<U002C>"',
112 '"<U002E>"',
113 '"<U00A0>"',
114 '""'));
115 } elsif ($line =~ m/^\s*(copy)\s+(\S+)\s*$/) {
116 } else {
117 warning "LC_NUMERIC: Unknown keyword '$line'";
118 }
119 }
120 }
121
122 sub check_lc_messages {
123 my @lines = @_;
124 for (@lines) {
125 if (m/^\s*(yesexpr|noexpr)\s+(.+)$/) {
126 my $type = $1;
127 my $regex = uxx_to_utf8($2);
128 unless ($regex =~ m/^"\^/) {
129 error "LC_MESSAGES: $type missing '^' prefix: $regex";
130 }
131 unless ($regex =~ m/\[.+\]|\(.+\)/) {
132 error "LC_MESSAGES: $type missing '[.+]|(.+)' content: $regex";
133 }
134 if ($regex =~ m/\.\*"$/) {
135 warning "LC_MESSAGES: $type have '.*' postfix: $regex";
136 }
137 if ($regex =~ m/[0-9]/) {
138 warning "LC_MESSAGES: $type have numbers in regex: $regex";
139 }
140 if ($type eq "yesexpr" && ($regex !~ m/y/ ||
141 $regex !~ m/Y/)) {
142 warning "LC_MESSAGES: $type missing 'yY' in content: $regex";
143 }
144 if ($type eq "noexpr" && ($regex !~ m/n/ ||
145 $regex !~ m/N/)) {
146 warning "LC_MESSAGES: $type missing 'nN' in content: $regex";
147 }
148 }
149 }
150 }
151
152 sub check_order {
153 my @blocks = @_;
154 my @order= qw(LC_IDENTIFICATION
155 LC_CTYPE
156 LC_COLLATE
157 LC_MONETARY
158 LC_NUMERIC
159 LC_TIME
160 LC_MESSAGES
161 LC_PAPER
162 LC_NAME
163 LC_ADDRESS
164 LC_TELEPHONE
165 LC_MEASUREMENT);
166 my $pos = 0;
167 for my $section (@blocks) {
168 if ($section eq $order[$pos]) {
169 $pos++;
170 } else {
171 $pos = 0;
172 $pos++ while ($order[$pos] ne $section);
173 warning "$section: not following section $order[$pos-1]";
174 }
175 }
176 }
177
178 sub check_charset {
179 my @blocks = @_;
180 for my $section (@blocks) {
181 if ($section =~ m/^%\s*[Cc]arset:\s*$/) {
182 return;
183 }
184 }
185 warning "Missing '% Charset: <charset>' info";
186 }
187
188 my $section;
189 my %sections;
190 my @blocks = ();
191 for my $filename (@ARGV) {
192 open (FILE, "<$filename") || die "Unable to read $filename";
193 $locale = $filename;
194 my $buf = undef;
195 my @lines;
196 %sections = ();
197 while (<FILE>) {
198 chomp;
199 s/%.*$//; # Remove comments
200 s/\#.*$//;
201 if (m%^(.+)/$%) {
202 $buf .= $1;
203 next;
204 }
205 if (! defined $buf) {
206 $buf = $_;
207 } else {
208 $buf .= $_;
209 }
210 if (!$section && m/^(LC_.*)\s*$/) {
211 $section = $1;
212 @lines = ();
213 #print "Found section $section\n";
214 }
215 push(@lines, $buf);
216 if (m/^END (LC_.+)\s*$/) {
217 if (exists $sections{$section}) {
218 warning "duplicate section $section";
219 }
220 push(@blocks, $section);
221 $sections{$section} = [@lines];
222 $section = "";
223 #print "Stored section $section\n";
224 }
225 undef $buf;
226 }
227 close(FILE);
228
229 # check_order(@blocks);
230 check_charset(@blocks);
231 check_lc_identification(@{$sections{'LC_IDENTIFICATION'}});
232 check_lc_messages(@{$sections{'LC_MESSAGES'}});
233 check_lc_numeric(@{$sections{'LC_NUMERIC'}});
234 check_lc_paper(@{$sections{'LC_PAPER'}});
235 check_lc_measurement(@{$sections{'LC_MEASUREMENT'}});
236 }