Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 1 | pmbaty | 1 | #!/usr/bin/perl -w |
| 2 | |||
| 3 | use warnings; |
||
| 4 | use strict; |
||
| 5 | |||
| 6 | my $HASHBUCKETS1_16 = 256; |
||
| 7 | my $HASHBUCKETS1_32 = 16; |
||
| 8 | my $HASHBUCKETS2_16 = 16; |
||
| 9 | my $HASHBUCKETS3_16 = 4; |
||
| 10 | |||
| 11 | print <<__EOF__; |
||
| 12 | /* |
||
| 13 | * This file is part of PhysicsFS (https://icculus.org/physfs/) |
||
| 14 | * |
||
| 15 | * This data generated by physfs/extras/makecasefoldhashtable.pl ... |
||
| 16 | * Do not manually edit this file! |
||
| 17 | * |
||
| 18 | * Please see the file LICENSE.txt in the source's root directory. |
||
| 19 | */ |
||
| 20 | |||
| 21 | #ifndef _INCLUDE_PHYSFS_CASEFOLDING_H_ |
||
| 22 | #define _INCLUDE_PHYSFS_CASEFOLDING_H_ |
||
| 23 | |||
| 24 | #ifndef __PHYSICSFS_INTERNAL__ |
||
| 25 | #error Do not include this header from your applications. |
||
| 26 | #endif |
||
| 27 | |||
| 28 | /* We build three simple hashmaps here: one that maps Unicode codepoints to |
||
| 29 | a one, two, or three lowercase codepoints. To retrieve this info: look at |
||
| 30 | case_fold_hashX, where X is 1, 2, or 3. Most foldable codepoints fold to one, |
||
| 31 | a few dozen fold to two, and a handful fold to three. If the codepoint isn't |
||
| 32 | in any of these hashes, it doesn't fold (no separate upper and lowercase). |
||
| 33 | |||
| 34 | Almost all these codepoints fit into 16 bits, so we hash them as such to save |
||
| 35 | memory. If a codepoint is > 0xFFFF, we have separate hashes for them, |
||
| 36 | since there are (currently) only about 120 of them and (currently) all of them |
||
| 37 | map to a single lowercase codepoint. */ |
||
| 38 | |||
| 39 | typedef struct CaseFoldMapping1_32 |
||
| 40 | { |
||
| 41 | PHYSFS_uint32 from; |
||
| 42 | PHYSFS_uint32 to0; |
||
| 43 | } CaseFoldMapping1_32; |
||
| 44 | |||
| 45 | typedef struct CaseFoldMapping1_16 |
||
| 46 | { |
||
| 47 | PHYSFS_uint16 from; |
||
| 48 | PHYSFS_uint16 to0; |
||
| 49 | } CaseFoldMapping1_16; |
||
| 50 | |||
| 51 | typedef struct CaseFoldMapping2_16 |
||
| 52 | { |
||
| 53 | PHYSFS_uint16 from; |
||
| 54 | PHYSFS_uint16 to0; |
||
| 55 | PHYSFS_uint16 to1; |
||
| 56 | } CaseFoldMapping2_16; |
||
| 57 | |||
| 58 | typedef struct CaseFoldMapping3_16 |
||
| 59 | { |
||
| 60 | PHYSFS_uint16 from; |
||
| 61 | PHYSFS_uint16 to0; |
||
| 62 | PHYSFS_uint16 to1; |
||
| 63 | PHYSFS_uint16 to2; |
||
| 64 | } CaseFoldMapping3_16; |
||
| 65 | |||
| 66 | typedef struct CaseFoldHashBucket1_16 |
||
| 67 | { |
||
| 68 | const CaseFoldMapping1_16 *list; |
||
| 69 | const PHYSFS_uint8 count; |
||
| 70 | } CaseFoldHashBucket1_16; |
||
| 71 | |||
| 72 | typedef struct CaseFoldHashBucket1_32 |
||
| 73 | { |
||
| 74 | const CaseFoldMapping1_32 *list; |
||
| 75 | const PHYSFS_uint8 count; |
||
| 76 | } CaseFoldHashBucket1_32; |
||
| 77 | |||
| 78 | typedef struct CaseFoldHashBucket2_16 |
||
| 79 | { |
||
| 80 | const CaseFoldMapping2_16 *list; |
||
| 81 | const PHYSFS_uint8 count; |
||
| 82 | } CaseFoldHashBucket2_16; |
||
| 83 | |||
| 84 | typedef struct CaseFoldHashBucket3_16 |
||
| 85 | { |
||
| 86 | const CaseFoldMapping3_16 *list; |
||
| 87 | const PHYSFS_uint8 count; |
||
| 88 | } CaseFoldHashBucket3_16; |
||
| 89 | |||
| 90 | __EOF__ |
||
| 91 | |||
| 92 | |||
| 93 | my @foldPairs1_16; |
||
| 94 | my @foldPairs2_16; |
||
| 95 | my @foldPairs3_16; |
||
| 96 | my @foldPairs1_32; |
||
| 97 | |||
| 98 | for (my $i = 0; $i < $HASHBUCKETS1_16; $i++) { |
||
| 99 | $foldPairs1_16[$i] = ''; |
||
| 100 | } |
||
| 101 | |||
| 102 | for (my $i = 0; $i < $HASHBUCKETS1_32; $i++) { |
||
| 103 | $foldPairs1_32[$i] = ''; |
||
| 104 | } |
||
| 105 | |||
| 106 | for (my $i = 0; $i < $HASHBUCKETS2_16; $i++) { |
||
| 107 | $foldPairs2_16[$i] = ''; |
||
| 108 | } |
||
| 109 | |||
| 110 | for (my $i = 0; $i < $HASHBUCKETS3_16; $i++) { |
||
| 111 | $foldPairs3_16[$i] = ''; |
||
| 112 | } |
||
| 113 | |||
| 114 | open(FH,'<','casefolding.txt') or die("failed to open casefolding.txt: $!\n"); |
||
| 115 | while (<FH>) { |
||
| 116 | chomp; |
||
| 117 | # strip comments from textfile... |
||
| 118 | s/\#.*\Z//; |
||
| 119 | |||
| 120 | # strip whitespace... |
||
| 121 | s/\A\s+//; |
||
| 122 | s/\s+\Z//; |
||
| 123 | |||
| 124 | next if not /\A([a-fA-F0-9]+)\;\s*(.)\;\s*(.+)\;/; |
||
| 125 | my ($code, $status, $mapping) = ($1, $2, $3); |
||
| 126 | |||
| 127 | my $hexxed = hex($code); |
||
| 128 | #print("// code '$code' status '$status' mapping '$mapping'\n"); |
||
| 129 | |||
| 130 | if (($status eq 'C') or ($status eq 'F')) { |
||
| 131 | my ($map1, $map2, $map3) = (undef, undef, undef); |
||
| 132 | $map1 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//; |
||
| 133 | $map2 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//; |
||
| 134 | $map3 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//; |
||
| 135 | die("mapping space too small for '$code'\n") if ($mapping ne ''); |
||
| 136 | die("problem parsing mapping for '$code'\n") if (not defined($map1)); |
||
| 137 | |||
| 138 | if ($hexxed < 128) { |
||
| 139 | # Just ignore these, we'll handle the low-ASCII ones ourselves. |
||
| 140 | } elsif ($hexxed > 0xFFFF) { |
||
| 141 | # We just need to add the 32-bit 2 and/or 3 codepoint maps if this die()'s here. |
||
| 142 | die("Uhoh, a codepoint > 0xFFFF that folds to multiple codepoints! Fixme.") if defined($map2); |
||
| 143 | my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS1_32-1)); |
||
| 144 | #print("// hexxed '$hexxed' hashed1 '$hashed'\n"); |
||
| 145 | $foldPairs1_32[$hashed] .= " { 0x$code, 0x$map1 },\n"; |
||
| 146 | } elsif (not defined($map2)) { |
||
| 147 | my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS1_16-1)); |
||
| 148 | #print("// hexxed '$hexxed' hashed1 '$hashed'\n"); |
||
| 149 | $foldPairs1_16[$hashed] .= " { 0x$code, 0x$map1 },\n"; |
||
| 150 | } elsif (not defined($map3)) { |
||
| 151 | my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS2_16-1)); |
||
| 152 | #print("// hexxed '$hexxed' hashed2 '$hashed'\n"); |
||
| 153 | $foldPairs2_16[$hashed] .= " { 0x$code, 0x$map1, 0x$map2 },\n"; |
||
| 154 | } else { |
||
| 155 | my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS3_16-1)); |
||
| 156 | #print("// hexxed '$hexxed' hashed3 '$hashed'\n"); |
||
| 157 | $foldPairs3_16[$hashed] .= " { 0x$code, 0x$map1, 0x$map2, 0x$map3 },\n"; |
||
| 158 | } |
||
| 159 | } |
||
| 160 | } |
||
| 161 | close(FH); |
||
| 162 | |||
| 163 | for (my $i = 0; $i < $HASHBUCKETS1_16; $i++) { |
||
| 164 | $foldPairs1_16[$i] =~ s/,\n\Z//; |
||
| 165 | my $str = $foldPairs1_16[$i]; |
||
| 166 | next if $str eq ''; |
||
| 167 | my $num = '000' . $i; |
||
| 168 | $num =~ s/\A.*?(\d\d\d)\Z/$1/; |
||
| 169 | my $sym = "case_fold1_16_${num}"; |
||
| 170 | print("static const CaseFoldMapping1_16 ${sym}[] = {\n$str\n};\n\n"); |
||
| 171 | } |
||
| 172 | |||
| 173 | for (my $i = 0; $i < $HASHBUCKETS1_32; $i++) { |
||
| 174 | $foldPairs1_32[$i] =~ s/,\n\Z//; |
||
| 175 | my $str = $foldPairs1_32[$i]; |
||
| 176 | next if $str eq ''; |
||
| 177 | my $num = '000' . $i; |
||
| 178 | $num =~ s/\A.*?(\d\d\d)\Z/$1/; |
||
| 179 | my $sym = "case_fold1_32_${num}"; |
||
| 180 | print("static const CaseFoldMapping1_32 ${sym}[] = {\n$str\n};\n\n"); |
||
| 181 | } |
||
| 182 | |||
| 183 | for (my $i = 0; $i < $HASHBUCKETS2_16; $i++) { |
||
| 184 | $foldPairs2_16[$i] =~ s/,\n\Z//; |
||
| 185 | my $str = $foldPairs2_16[$i]; |
||
| 186 | next if $str eq ''; |
||
| 187 | my $num = '000' . $i; |
||
| 188 | $num =~ s/\A.*?(\d\d\d)\Z/$1/; |
||
| 189 | my $sym = "case_fold2_16_${num}"; |
||
| 190 | print("static const CaseFoldMapping2_16 ${sym}[] = {\n$str\n};\n\n"); |
||
| 191 | } |
||
| 192 | |||
| 193 | for (my $i = 0; $i < $HASHBUCKETS3_16; $i++) { |
||
| 194 | $foldPairs3_16[$i] =~ s/,\n\Z//; |
||
| 195 | my $str = $foldPairs3_16[$i]; |
||
| 196 | next if $str eq ''; |
||
| 197 | my $num = '000' . $i; |
||
| 198 | $num =~ s/\A.*?(\d\d\d)\Z/$1/; |
||
| 199 | my $sym = "case_fold3_16_${num}"; |
||
| 200 | print("static const CaseFoldMapping3_16 ${sym}[] = {\n$str\n};\n\n"); |
||
| 201 | } |
||
| 202 | |||
| 203 | print("static const CaseFoldHashBucket1_16 case_fold_hash1_16[] = {\n"); |
||
| 204 | |||
| 205 | for (my $i = 0; $i < $HASHBUCKETS1_16; $i++) { |
||
| 206 | my $str = $foldPairs1_16[$i]; |
||
| 207 | if ($str eq '') { |
||
| 208 | print(" { NULL, 0 },\n"); |
||
| 209 | } else { |
||
| 210 | my $num = '000' . $i; |
||
| 211 | $num =~ s/\A.*?(\d\d\d)\Z/$1/; |
||
| 212 | my $sym = "case_fold1_16_${num}"; |
||
| 213 | print(" { $sym, __PHYSFS_ARRAYLEN($sym) },\n"); |
||
| 214 | } |
||
| 215 | } |
||
| 216 | print("};\n\n"); |
||
| 217 | |||
| 218 | |||
| 219 | print("static const CaseFoldHashBucket1_32 case_fold_hash1_32[] = {\n"); |
||
| 220 | |||
| 221 | for (my $i = 0; $i < $HASHBUCKETS1_32; $i++) { |
||
| 222 | my $str = $foldPairs1_32[$i]; |
||
| 223 | if ($str eq '') { |
||
| 224 | print(" { NULL, 0 },\n"); |
||
| 225 | } else { |
||
| 226 | my $num = '000' . $i; |
||
| 227 | $num =~ s/\A.*?(\d\d\d)\Z/$1/; |
||
| 228 | my $sym = "case_fold1_32_${num}"; |
||
| 229 | print(" { $sym, __PHYSFS_ARRAYLEN($sym) },\n"); |
||
| 230 | } |
||
| 231 | } |
||
| 232 | print("};\n\n"); |
||
| 233 | |||
| 234 | |||
| 235 | print("static const CaseFoldHashBucket2_16 case_fold_hash2_16[] = {\n"); |
||
| 236 | |||
| 237 | for (my $i = 0; $i < $HASHBUCKETS2_16; $i++) { |
||
| 238 | my $str = $foldPairs2_16[$i]; |
||
| 239 | if ($str eq '') { |
||
| 240 | print(" { NULL, 0 },\n"); |
||
| 241 | } else { |
||
| 242 | my $num = '000' . $i; |
||
| 243 | $num =~ s/\A.*?(\d\d\d)\Z/$1/; |
||
| 244 | my $sym = "case_fold2_16_${num}"; |
||
| 245 | print(" { $sym, __PHYSFS_ARRAYLEN($sym) },\n"); |
||
| 246 | } |
||
| 247 | } |
||
| 248 | print("};\n\n"); |
||
| 249 | |||
| 250 | print("static const CaseFoldHashBucket3_16 case_fold_hash3_16[] = {\n"); |
||
| 251 | |||
| 252 | for (my $i = 0; $i < $HASHBUCKETS3_16; $i++) { |
||
| 253 | my $str = $foldPairs3_16[$i]; |
||
| 254 | if ($str eq '') { |
||
| 255 | print(" { NULL, 0 },\n"); |
||
| 256 | } else { |
||
| 257 | my $num = '000' . $i; |
||
| 258 | $num =~ s/\A.*?(\d\d\d)\Z/$1/; |
||
| 259 | my $sym = "case_fold3_16_${num}"; |
||
| 260 | print(" { $sym, __PHYSFS_ARRAYLEN($sym) },\n"); |
||
| 261 | } |
||
| 262 | } |
||
| 263 | print("};\n\n"); |
||
| 264 | |||
| 265 | print <<__EOF__; |
||
| 266 | |||
| 267 | #endif /* _INCLUDE_PHYSFS_CASEFOLDING_H_ */ |
||
| 268 | |||
| 269 | /* end of physfs_casefolding.h ... */ |
||
| 270 | |||
| 271 | __EOF__ |
||
| 272 | |||
| 273 | exit 0; |
||
| 274 | |||
| 275 | # end of makecashfoldhashtable.pl ... |
||
| 276 |