Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
1 | pmbaty | 1 | #!/usr/bin/perl -w |
2 | |||
3 | use warnings; |
||
4 | use strict; |
||
5 | |||
6 | my $HASHBUCKETS1_16 = 256; |
||
7 | my $HASHBUCKETS1_32 = 16; |
||
8 | my $HASHBUCKETS2_16 = 16; |
||
9 | my $HASHBUCKETS3_16 = 4; |
||
10 | |||
11 | print <<__EOF__; |
||
12 | /* |
||
13 | * This file is part of PhysicsFS (https://icculus.org/physfs/) |
||
14 | * |
||
15 | * This data generated by physfs/extras/makecasefoldhashtable.pl ... |
||
16 | * Do not manually edit this file! |
||
17 | * |
||
18 | * Please see the file LICENSE.txt in the source's root directory. |
||
19 | */ |
||
20 | |||
21 | #ifndef _INCLUDE_PHYSFS_CASEFOLDING_H_ |
||
22 | #define _INCLUDE_PHYSFS_CASEFOLDING_H_ |
||
23 | |||
24 | #ifndef __PHYSICSFS_INTERNAL__ |
||
25 | #error Do not include this header from your applications. |
||
26 | #endif |
||
27 | |||
28 | /* We build three simple hashmaps here: one that maps Unicode codepoints to |
||
29 | a one, two, or three lowercase codepoints. To retrieve this info: look at |
||
30 | case_fold_hashX, where X is 1, 2, or 3. Most foldable codepoints fold to one, |
||
31 | a few dozen fold to two, and a handful fold to three. If the codepoint isn't |
||
32 | in any of these hashes, it doesn't fold (no separate upper and lowercase). |
||
33 | |||
34 | Almost all these codepoints fit into 16 bits, so we hash them as such to save |
||
35 | memory. If a codepoint is > 0xFFFF, we have separate hashes for them, |
||
36 | since there are (currently) only about 120 of them and (currently) all of them |
||
37 | map to a single lowercase codepoint. */ |
||
38 | |||
39 | typedef struct CaseFoldMapping1_32 |
||
40 | { |
||
41 | PHYSFS_uint32 from; |
||
42 | PHYSFS_uint32 to0; |
||
43 | } CaseFoldMapping1_32; |
||
44 | |||
45 | typedef struct CaseFoldMapping1_16 |
||
46 | { |
||
47 | PHYSFS_uint16 from; |
||
48 | PHYSFS_uint16 to0; |
||
49 | } CaseFoldMapping1_16; |
||
50 | |||
51 | typedef struct CaseFoldMapping2_16 |
||
52 | { |
||
53 | PHYSFS_uint16 from; |
||
54 | PHYSFS_uint16 to0; |
||
55 | PHYSFS_uint16 to1; |
||
56 | } CaseFoldMapping2_16; |
||
57 | |||
58 | typedef struct CaseFoldMapping3_16 |
||
59 | { |
||
60 | PHYSFS_uint16 from; |
||
61 | PHYSFS_uint16 to0; |
||
62 | PHYSFS_uint16 to1; |
||
63 | PHYSFS_uint16 to2; |
||
64 | } CaseFoldMapping3_16; |
||
65 | |||
66 | typedef struct CaseFoldHashBucket1_16 |
||
67 | { |
||
68 | const CaseFoldMapping1_16 *list; |
||
69 | const PHYSFS_uint8 count; |
||
70 | } CaseFoldHashBucket1_16; |
||
71 | |||
72 | typedef struct CaseFoldHashBucket1_32 |
||
73 | { |
||
74 | const CaseFoldMapping1_32 *list; |
||
75 | const PHYSFS_uint8 count; |
||
76 | } CaseFoldHashBucket1_32; |
||
77 | |||
78 | typedef struct CaseFoldHashBucket2_16 |
||
79 | { |
||
80 | const CaseFoldMapping2_16 *list; |
||
81 | const PHYSFS_uint8 count; |
||
82 | } CaseFoldHashBucket2_16; |
||
83 | |||
84 | typedef struct CaseFoldHashBucket3_16 |
||
85 | { |
||
86 | const CaseFoldMapping3_16 *list; |
||
87 | const PHYSFS_uint8 count; |
||
88 | } CaseFoldHashBucket3_16; |
||
89 | |||
90 | __EOF__ |
||
91 | |||
92 | |||
93 | my @foldPairs1_16; |
||
94 | my @foldPairs2_16; |
||
95 | my @foldPairs3_16; |
||
96 | my @foldPairs1_32; |
||
97 | |||
98 | for (my $i = 0; $i < $HASHBUCKETS1_16; $i++) { |
||
99 | $foldPairs1_16[$i] = ''; |
||
100 | } |
||
101 | |||
102 | for (my $i = 0; $i < $HASHBUCKETS1_32; $i++) { |
||
103 | $foldPairs1_32[$i] = ''; |
||
104 | } |
||
105 | |||
106 | for (my $i = 0; $i < $HASHBUCKETS2_16; $i++) { |
||
107 | $foldPairs2_16[$i] = ''; |
||
108 | } |
||
109 | |||
110 | for (my $i = 0; $i < $HASHBUCKETS3_16; $i++) { |
||
111 | $foldPairs3_16[$i] = ''; |
||
112 | } |
||
113 | |||
114 | open(FH,'<','casefolding.txt') or die("failed to open casefolding.txt: $!\n"); |
||
115 | while (<FH>) { |
||
116 | chomp; |
||
117 | # strip comments from textfile... |
||
118 | s/\#.*\Z//; |
||
119 | |||
120 | # strip whitespace... |
||
121 | s/\A\s+//; |
||
122 | s/\s+\Z//; |
||
123 | |||
124 | next if not /\A([a-fA-F0-9]+)\;\s*(.)\;\s*(.+)\;/; |
||
125 | my ($code, $status, $mapping) = ($1, $2, $3); |
||
126 | |||
127 | my $hexxed = hex($code); |
||
128 | #print("// code '$code' status '$status' mapping '$mapping'\n"); |
||
129 | |||
130 | if (($status eq 'C') or ($status eq 'F')) { |
||
131 | my ($map1, $map2, $map3) = (undef, undef, undef); |
||
132 | $map1 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//; |
||
133 | $map2 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//; |
||
134 | $map3 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//; |
||
135 | die("mapping space too small for '$code'\n") if ($mapping ne ''); |
||
136 | die("problem parsing mapping for '$code'\n") if (not defined($map1)); |
||
137 | |||
138 | if ($hexxed < 128) { |
||
139 | # Just ignore these, we'll handle the low-ASCII ones ourselves. |
||
140 | } elsif ($hexxed > 0xFFFF) { |
||
141 | # We just need to add the 32-bit 2 and/or 3 codepoint maps if this die()'s here. |
||
142 | die("Uhoh, a codepoint > 0xFFFF that folds to multiple codepoints! Fixme.") if defined($map2); |
||
143 | my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS1_32-1)); |
||
144 | #print("// hexxed '$hexxed' hashed1 '$hashed'\n"); |
||
145 | $foldPairs1_32[$hashed] .= " { 0x$code, 0x$map1 },\n"; |
||
146 | } elsif (not defined($map2)) { |
||
147 | my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS1_16-1)); |
||
148 | #print("// hexxed '$hexxed' hashed1 '$hashed'\n"); |
||
149 | $foldPairs1_16[$hashed] .= " { 0x$code, 0x$map1 },\n"; |
||
150 | } elsif (not defined($map3)) { |
||
151 | my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS2_16-1)); |
||
152 | #print("// hexxed '$hexxed' hashed2 '$hashed'\n"); |
||
153 | $foldPairs2_16[$hashed] .= " { 0x$code, 0x$map1, 0x$map2 },\n"; |
||
154 | } else { |
||
155 | my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS3_16-1)); |
||
156 | #print("// hexxed '$hexxed' hashed3 '$hashed'\n"); |
||
157 | $foldPairs3_16[$hashed] .= " { 0x$code, 0x$map1, 0x$map2, 0x$map3 },\n"; |
||
158 | } |
||
159 | } |
||
160 | } |
||
161 | close(FH); |
||
162 | |||
163 | for (my $i = 0; $i < $HASHBUCKETS1_16; $i++) { |
||
164 | $foldPairs1_16[$i] =~ s/,\n\Z//; |
||
165 | my $str = $foldPairs1_16[$i]; |
||
166 | next if $str eq ''; |
||
167 | my $num = '000' . $i; |
||
168 | $num =~ s/\A.*?(\d\d\d)\Z/$1/; |
||
169 | my $sym = "case_fold1_16_${num}"; |
||
170 | print("static const CaseFoldMapping1_16 ${sym}[] = {\n$str\n};\n\n"); |
||
171 | } |
||
172 | |||
173 | for (my $i = 0; $i < $HASHBUCKETS1_32; $i++) { |
||
174 | $foldPairs1_32[$i] =~ s/,\n\Z//; |
||
175 | my $str = $foldPairs1_32[$i]; |
||
176 | next if $str eq ''; |
||
177 | my $num = '000' . $i; |
||
178 | $num =~ s/\A.*?(\d\d\d)\Z/$1/; |
||
179 | my $sym = "case_fold1_32_${num}"; |
||
180 | print("static const CaseFoldMapping1_32 ${sym}[] = {\n$str\n};\n\n"); |
||
181 | } |
||
182 | |||
183 | for (my $i = 0; $i < $HASHBUCKETS2_16; $i++) { |
||
184 | $foldPairs2_16[$i] =~ s/,\n\Z//; |
||
185 | my $str = $foldPairs2_16[$i]; |
||
186 | next if $str eq ''; |
||
187 | my $num = '000' . $i; |
||
188 | $num =~ s/\A.*?(\d\d\d)\Z/$1/; |
||
189 | my $sym = "case_fold2_16_${num}"; |
||
190 | print("static const CaseFoldMapping2_16 ${sym}[] = {\n$str\n};\n\n"); |
||
191 | } |
||
192 | |||
193 | for (my $i = 0; $i < $HASHBUCKETS3_16; $i++) { |
||
194 | $foldPairs3_16[$i] =~ s/,\n\Z//; |
||
195 | my $str = $foldPairs3_16[$i]; |
||
196 | next if $str eq ''; |
||
197 | my $num = '000' . $i; |
||
198 | $num =~ s/\A.*?(\d\d\d)\Z/$1/; |
||
199 | my $sym = "case_fold3_16_${num}"; |
||
200 | print("static const CaseFoldMapping3_16 ${sym}[] = {\n$str\n};\n\n"); |
||
201 | } |
||
202 | |||
203 | print("static const CaseFoldHashBucket1_16 case_fold_hash1_16[] = {\n"); |
||
204 | |||
205 | for (my $i = 0; $i < $HASHBUCKETS1_16; $i++) { |
||
206 | my $str = $foldPairs1_16[$i]; |
||
207 | if ($str eq '') { |
||
208 | print(" { NULL, 0 },\n"); |
||
209 | } else { |
||
210 | my $num = '000' . $i; |
||
211 | $num =~ s/\A.*?(\d\d\d)\Z/$1/; |
||
212 | my $sym = "case_fold1_16_${num}"; |
||
213 | print(" { $sym, __PHYSFS_ARRAYLEN($sym) },\n"); |
||
214 | } |
||
215 | } |
||
216 | print("};\n\n"); |
||
217 | |||
218 | |||
219 | print("static const CaseFoldHashBucket1_32 case_fold_hash1_32[] = {\n"); |
||
220 | |||
221 | for (my $i = 0; $i < $HASHBUCKETS1_32; $i++) { |
||
222 | my $str = $foldPairs1_32[$i]; |
||
223 | if ($str eq '') { |
||
224 | print(" { NULL, 0 },\n"); |
||
225 | } else { |
||
226 | my $num = '000' . $i; |
||
227 | $num =~ s/\A.*?(\d\d\d)\Z/$1/; |
||
228 | my $sym = "case_fold1_32_${num}"; |
||
229 | print(" { $sym, __PHYSFS_ARRAYLEN($sym) },\n"); |
||
230 | } |
||
231 | } |
||
232 | print("};\n\n"); |
||
233 | |||
234 | |||
235 | print("static const CaseFoldHashBucket2_16 case_fold_hash2_16[] = {\n"); |
||
236 | |||
237 | for (my $i = 0; $i < $HASHBUCKETS2_16; $i++) { |
||
238 | my $str = $foldPairs2_16[$i]; |
||
239 | if ($str eq '') { |
||
240 | print(" { NULL, 0 },\n"); |
||
241 | } else { |
||
242 | my $num = '000' . $i; |
||
243 | $num =~ s/\A.*?(\d\d\d)\Z/$1/; |
||
244 | my $sym = "case_fold2_16_${num}"; |
||
245 | print(" { $sym, __PHYSFS_ARRAYLEN($sym) },\n"); |
||
246 | } |
||
247 | } |
||
248 | print("};\n\n"); |
||
249 | |||
250 | print("static const CaseFoldHashBucket3_16 case_fold_hash3_16[] = {\n"); |
||
251 | |||
252 | for (my $i = 0; $i < $HASHBUCKETS3_16; $i++) { |
||
253 | my $str = $foldPairs3_16[$i]; |
||
254 | if ($str eq '') { |
||
255 | print(" { NULL, 0 },\n"); |
||
256 | } else { |
||
257 | my $num = '000' . $i; |
||
258 | $num =~ s/\A.*?(\d\d\d)\Z/$1/; |
||
259 | my $sym = "case_fold3_16_${num}"; |
||
260 | print(" { $sym, __PHYSFS_ARRAYLEN($sym) },\n"); |
||
261 | } |
||
262 | } |
||
263 | print("};\n\n"); |
||
264 | |||
265 | print <<__EOF__; |
||
266 | |||
267 | #endif /* _INCLUDE_PHYSFS_CASEFOLDING_H_ */ |
||
268 | |||
269 | /* end of physfs_casefolding.h ... */ |
||
270 | |||
271 | __EOF__ |
||
272 | |||
273 | exit 0; |
||
274 | |||
275 | # end of makecashfoldhashtable.pl ... |
||
276 |