Subversion Repositories Games.Descent

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
1 pmbaty 1
#!/usr/bin/perl -w
2
 
3
use warnings;
4
use strict;
5
 
6
my $HASHBUCKETS1_16 = 256;
7
my $HASHBUCKETS1_32 = 16;
8
my $HASHBUCKETS2_16 = 16;
9
my $HASHBUCKETS3_16 = 4;
10
 
11
print <<__EOF__;
12
/*
13
 * This file is part of PhysicsFS (https://icculus.org/physfs/)
14
 *
15
 * This data generated by physfs/extras/makecasefoldhashtable.pl ...
16
 * Do not manually edit this file!
17
 *
18
 * Please see the file LICENSE.txt in the source's root directory.
19
 */
20
 
21
#ifndef _INCLUDE_PHYSFS_CASEFOLDING_H_
22
#define _INCLUDE_PHYSFS_CASEFOLDING_H_
23
 
24
#ifndef __PHYSICSFS_INTERNAL__
25
#error Do not include this header from your applications.
26
#endif
27
 
28
/* We build three simple hashmaps here: one that maps Unicode codepoints to
29
a one, two, or three lowercase codepoints. To retrieve this info: look at
30
case_fold_hashX, where X is 1, 2, or 3. Most foldable codepoints fold to one,
31
a few dozen fold to two, and a handful fold to three. If the codepoint isn't
32
in any of these hashes, it doesn't fold (no separate upper and lowercase).
33
 
34
Almost all these codepoints fit into 16 bits, so we hash them as such to save
35
memory. If a codepoint is > 0xFFFF, we have separate hashes for them,
36
since there are (currently) only about 120 of them and (currently) all of them
37
map to a single lowercase codepoint. */
38
 
39
typedef struct CaseFoldMapping1_32
40
{
41
    PHYSFS_uint32 from;
42
    PHYSFS_uint32 to0;
43
} CaseFoldMapping1_32;
44
 
45
typedef struct CaseFoldMapping1_16
46
{
47
    PHYSFS_uint16 from;
48
    PHYSFS_uint16 to0;
49
} CaseFoldMapping1_16;
50
 
51
typedef struct CaseFoldMapping2_16
52
{
53
    PHYSFS_uint16 from;
54
    PHYSFS_uint16 to0;
55
    PHYSFS_uint16 to1;
56
} CaseFoldMapping2_16;
57
 
58
typedef struct CaseFoldMapping3_16
59
{
60
    PHYSFS_uint16 from;
61
    PHYSFS_uint16 to0;
62
    PHYSFS_uint16 to1;
63
    PHYSFS_uint16 to2;
64
} CaseFoldMapping3_16;
65
 
66
typedef struct CaseFoldHashBucket1_16
67
{
68
    const CaseFoldMapping1_16 *list;
69
    const PHYSFS_uint8 count;
70
} CaseFoldHashBucket1_16;
71
 
72
typedef struct CaseFoldHashBucket1_32
73
{
74
    const CaseFoldMapping1_32 *list;
75
    const PHYSFS_uint8 count;
76
} CaseFoldHashBucket1_32;
77
 
78
typedef struct CaseFoldHashBucket2_16
79
{
80
    const CaseFoldMapping2_16 *list;
81
    const PHYSFS_uint8 count;
82
} CaseFoldHashBucket2_16;
83
 
84
typedef struct CaseFoldHashBucket3_16
85
{
86
    const CaseFoldMapping3_16 *list;
87
    const PHYSFS_uint8 count;
88
} CaseFoldHashBucket3_16;
89
 
90
__EOF__
91
 
92
 
93
my @foldPairs1_16;
94
my @foldPairs2_16;
95
my @foldPairs3_16;
96
my @foldPairs1_32;
97
 
98
for (my $i = 0; $i < $HASHBUCKETS1_16; $i++) {
99
    $foldPairs1_16[$i] = '';
100
}
101
 
102
for (my $i = 0; $i < $HASHBUCKETS1_32; $i++) {
103
    $foldPairs1_32[$i] = '';
104
}
105
 
106
for (my $i = 0; $i < $HASHBUCKETS2_16; $i++) {
107
    $foldPairs2_16[$i] = '';
108
}
109
 
110
for (my $i = 0; $i < $HASHBUCKETS3_16; $i++) {
111
    $foldPairs3_16[$i] = '';
112
}
113
 
114
open(FH,'<','casefolding.txt') or die("failed to open casefolding.txt: $!\n");
115
while (<FH>) {
116
    chomp;
117
    # strip comments from textfile...
118
    s/\#.*\Z//;
119
 
120
    # strip whitespace...
121
    s/\A\s+//;
122
    s/\s+\Z//;
123
 
124
    next if not /\A([a-fA-F0-9]+)\;\s*(.)\;\s*(.+)\;/;
125
    my ($code, $status, $mapping) = ($1, $2, $3);
126
 
127
    my $hexxed = hex($code);
128
    #print("// code '$code'   status '$status'   mapping '$mapping'\n");
129
 
130
    if (($status eq 'C') or ($status eq 'F')) {
131
        my ($map1, $map2, $map3) = (undef, undef, undef);
132
        $map1 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//;
133
        $map2 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//;
134
        $map3 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//;
135
        die("mapping space too small for '$code'\n") if ($mapping ne '');
136
        die("problem parsing mapping for '$code'\n") if (not defined($map1));
137
 
138
        if ($hexxed < 128) {
139
            # Just ignore these, we'll handle the low-ASCII ones ourselves.
140
        } elsif ($hexxed > 0xFFFF) {
141
            # We just need to add the 32-bit 2 and/or 3 codepoint maps if this die()'s here.
142
            die("Uhoh, a codepoint > 0xFFFF that folds to multiple codepoints! Fixme.") if defined($map2);
143
            my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS1_32-1));
144
            #print("// hexxed '$hexxed'  hashed1 '$hashed'\n");
145
            $foldPairs1_32[$hashed] .= "    { 0x$code, 0x$map1 },\n";
146
        } elsif (not defined($map2)) {
147
            my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS1_16-1));
148
            #print("// hexxed '$hexxed'  hashed1 '$hashed'\n");
149
            $foldPairs1_16[$hashed] .= "    { 0x$code, 0x$map1 },\n";
150
        } elsif (not defined($map3)) {
151
            my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS2_16-1));
152
            #print("// hexxed '$hexxed'  hashed2 '$hashed'\n");
153
            $foldPairs2_16[$hashed] .= "    { 0x$code, 0x$map1, 0x$map2 },\n";
154
        } else {
155
            my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS3_16-1));
156
            #print("// hexxed '$hexxed'  hashed3 '$hashed'\n");
157
            $foldPairs3_16[$hashed] .= "    { 0x$code, 0x$map1, 0x$map2, 0x$map3 },\n";
158
        }
159
    }
160
}
161
close(FH);
162
 
163
for (my $i = 0; $i < $HASHBUCKETS1_16; $i++) {
164
    $foldPairs1_16[$i] =~ s/,\n\Z//;
165
    my $str = $foldPairs1_16[$i];
166
    next if $str eq '';
167
    my $num = '000' . $i;
168
    $num =~ s/\A.*?(\d\d\d)\Z/$1/;
169
    my $sym = "case_fold1_16_${num}";
170
    print("static const CaseFoldMapping1_16 ${sym}[] = {\n$str\n};\n\n");
171
}
172
 
173
for (my $i = 0; $i < $HASHBUCKETS1_32; $i++) {
174
    $foldPairs1_32[$i] =~ s/,\n\Z//;
175
    my $str = $foldPairs1_32[$i];
176
    next if $str eq '';
177
    my $num = '000' . $i;
178
    $num =~ s/\A.*?(\d\d\d)\Z/$1/;
179
    my $sym = "case_fold1_32_${num}";
180
    print("static const CaseFoldMapping1_32 ${sym}[] = {\n$str\n};\n\n");
181
}
182
 
183
for (my $i = 0; $i < $HASHBUCKETS2_16; $i++) {
184
    $foldPairs2_16[$i] =~ s/,\n\Z//;
185
    my $str = $foldPairs2_16[$i];
186
    next if $str eq '';
187
    my $num = '000' . $i;
188
    $num =~ s/\A.*?(\d\d\d)\Z/$1/;
189
    my $sym = "case_fold2_16_${num}";
190
    print("static const CaseFoldMapping2_16 ${sym}[] = {\n$str\n};\n\n");
191
}
192
 
193
for (my $i = 0; $i < $HASHBUCKETS3_16; $i++) {
194
    $foldPairs3_16[$i] =~ s/,\n\Z//;
195
    my $str = $foldPairs3_16[$i];
196
    next if $str eq '';
197
    my $num = '000' . $i;
198
    $num =~ s/\A.*?(\d\d\d)\Z/$1/;
199
    my $sym = "case_fold3_16_${num}";
200
    print("static const CaseFoldMapping3_16 ${sym}[] = {\n$str\n};\n\n");
201
}
202
 
203
print("static const CaseFoldHashBucket1_16 case_fold_hash1_16[] = {\n");
204
 
205
for (my $i = 0; $i < $HASHBUCKETS1_16; $i++) {
206
    my $str = $foldPairs1_16[$i];
207
    if ($str eq '') {
208
        print("    { NULL, 0 },\n");
209
    } else {
210
        my $num = '000' . $i;
211
        $num =~ s/\A.*?(\d\d\d)\Z/$1/;
212
        my $sym = "case_fold1_16_${num}";
213
        print("    { $sym, __PHYSFS_ARRAYLEN($sym) },\n");
214
    }
215
}
216
print("};\n\n");
217
 
218
 
219
print("static const CaseFoldHashBucket1_32 case_fold_hash1_32[] = {\n");
220
 
221
for (my $i = 0; $i < $HASHBUCKETS1_32; $i++) {
222
    my $str = $foldPairs1_32[$i];
223
    if ($str eq '') {
224
        print("    { NULL, 0 },\n");
225
    } else {
226
        my $num = '000' . $i;
227
        $num =~ s/\A.*?(\d\d\d)\Z/$1/;
228
        my $sym = "case_fold1_32_${num}";
229
        print("    { $sym, __PHYSFS_ARRAYLEN($sym) },\n");
230
    }
231
}
232
print("};\n\n");
233
 
234
 
235
print("static const CaseFoldHashBucket2_16 case_fold_hash2_16[] = {\n");
236
 
237
for (my $i = 0; $i < $HASHBUCKETS2_16; $i++) {
238
    my $str = $foldPairs2_16[$i];
239
    if ($str eq '') {
240
        print("    { NULL, 0 },\n");
241
    } else {
242
        my $num = '000' . $i;
243
        $num =~ s/\A.*?(\d\d\d)\Z/$1/;
244
        my $sym = "case_fold2_16_${num}";
245
        print("    { $sym, __PHYSFS_ARRAYLEN($sym) },\n");
246
    }
247
}
248
print("};\n\n");
249
 
250
print("static const CaseFoldHashBucket3_16 case_fold_hash3_16[] = {\n");
251
 
252
for (my $i = 0; $i < $HASHBUCKETS3_16; $i++) {
253
    my $str = $foldPairs3_16[$i];
254
    if ($str eq '') {
255
        print("    { NULL, 0 },\n");
256
    } else {
257
        my $num = '000' . $i;
258
        $num =~ s/\A.*?(\d\d\d)\Z/$1/;
259
        my $sym = "case_fold3_16_${num}";
260
        print("    { $sym, __PHYSFS_ARRAYLEN($sym) },\n");
261
    }
262
}
263
print("};\n\n");
264
 
265
print <<__EOF__;
266
 
267
#endif  /* _INCLUDE_PHYSFS_CASEFOLDING_H_ */
268
 
269
/* end of physfs_casefolding.h ... */
270
 
271
__EOF__
272
 
273
exit 0;
274
 
275
# end of makecashfoldhashtable.pl ...
276