Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
26 | pmbaty | 1 | /* ucl_mchw.ch -- matching functions using a window |
2 | |||
3 | This file is part of the UCL data compression library. |
||
4 | |||
5 | Copyright (C) 1996-2004 Markus Franz Xaver Johannes Oberhumer |
||
6 | All Rights Reserved. |
||
7 | |||
8 | The UCL library is free software; you can redistribute it and/or |
||
9 | modify it under the terms of the GNU General Public License as |
||
10 | published by the Free Software Foundation; either version 2 of |
||
11 | the License, or (at your option) any later version. |
||
12 | |||
13 | The UCL library is distributed in the hope that it will be useful, |
||
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
16 | GNU General Public License for more details. |
||
17 | |||
18 | You should have received a copy of the GNU General Public License |
||
19 | along with the UCL library; see the file COPYING. |
||
20 | If not, write to the Free Software Foundation, Inc., |
||
21 | 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
||
22 | |||
23 | Markus F.X.J. Oberhumer |
||
24 | <markus@oberhumer.com> |
||
25 | http://www.oberhumer.com/opensource/ucl/ |
||
26 | */ |
||
27 | |||
28 | |||
29 | /*********************************************************************** |
||
30 | // |
||
31 | ************************************************************************/ |
||
32 | |||
33 | typedef struct |
||
34 | { |
||
35 | int init; |
||
36 | |||
37 | ucl_uint look; /* bytes in lookahead buffer */ |
||
38 | |||
39 | ucl_uint m_len; |
||
40 | ucl_uint m_off; |
||
41 | |||
42 | ucl_uint last_m_len; |
||
43 | ucl_uint last_m_off; |
||
44 | |||
45 | const ucl_bytep bp; |
||
46 | const ucl_bytep ip; |
||
47 | const ucl_bytep in; |
||
48 | const ucl_bytep in_end; |
||
49 | ucl_bytep out; |
||
50 | |||
51 | ucl_uint32 bb_b; |
||
52 | unsigned bb_k; |
||
53 | unsigned bb_c_endian; |
||
54 | unsigned bb_c_s; |
||
55 | unsigned bb_c_s8; |
||
56 | ucl_bytep bb_p; |
||
57 | ucl_bytep bb_op; |
||
58 | |||
59 | struct ucl_compress_config_t conf; |
||
60 | ucl_uintp result; |
||
61 | |||
62 | ucl_progress_callback_p cb; |
||
63 | |||
64 | ucl_uint textsize; /* text size counter */ |
||
65 | ucl_uint codesize; /* code size counter */ |
||
66 | ucl_uint printcount; /* counter for reporting progress every 1K bytes */ |
||
67 | |||
68 | /* some stats */ |
||
69 | unsigned long lit_bytes; |
||
70 | unsigned long match_bytes; |
||
71 | unsigned long rep_bytes; |
||
72 | unsigned long lazy; |
||
73 | } |
||
74 | UCL_COMPRESS_T; |
||
75 | |||
76 | |||
77 | |||
78 | #if (ACC_OS_TOS && (ACC_CC_PUREC || ACC_CC_TURBOC)) |
||
79 | /* the cast is needed to work around a code generation bug */ |
||
80 | #define getbyte(c) ((c).ip < (c).in_end ? (int) (unsigned) *((c).ip)++ : (-1)) |
||
81 | #else |
||
82 | #define getbyte(c) ((c).ip < (c).in_end ? *((c).ip)++ : (-1)) |
||
83 | #endif |
||
84 | |||
85 | #include "ucl_swd.ch" |
||
86 | |||
87 | |||
88 | |||
89 | /*********************************************************************** |
||
90 | // |
||
91 | ************************************************************************/ |
||
92 | |||
93 | static int |
||
94 | init_match ( UCL_COMPRESS_T *c, ucl_swd_t *s, |
||
95 | const ucl_bytep dict, ucl_uint dict_len, |
||
96 | ucl_uint32 flags ) |
||
97 | { |
||
98 | int r; |
||
99 | |||
100 | assert(!c->init); |
||
101 | c->init = 1; |
||
102 | |||
103 | s->c = c; |
||
104 | |||
105 | c->last_m_len = c->last_m_off = 0; |
||
106 | |||
107 | c->textsize = c->codesize = c->printcount = 0; |
||
108 | c->lit_bytes = c->match_bytes = c->rep_bytes = 0; |
||
109 | c->lazy = 0; |
||
110 | |||
111 | r = swd_init(s,dict,dict_len); |
||
112 | if (r != UCL_E_OK) |
||
113 | { |
||
114 | swd_exit(s); |
||
115 | return r; |
||
116 | } |
||
117 | |||
118 | s->use_best_off = (flags & 1) ? 1 : 0; |
||
119 | return UCL_E_OK; |
||
120 | } |
||
121 | |||
122 | |||
123 | /*********************************************************************** |
||
124 | // |
||
125 | ************************************************************************/ |
||
126 | |||
127 | static int |
||
128 | find_match ( UCL_COMPRESS_T *c, ucl_swd_t *s, |
||
129 | ucl_uint this_len, ucl_uint skip ) |
||
130 | { |
||
131 | assert(c->init); |
||
132 | |||
133 | if (skip > 0) |
||
134 | { |
||
135 | assert(this_len >= skip); |
||
136 | swd_accept(s, this_len - skip); |
||
137 | c->textsize += this_len - skip + 1; |
||
138 | } |
||
139 | else |
||
140 | { |
||
141 | assert(this_len <= 1); |
||
142 | c->textsize += this_len - skip; |
||
143 | } |
||
144 | |||
145 | s->m_len = SWD_THRESHOLD; |
||
146 | #ifdef SWD_BEST_OFF |
||
147 | if (s->use_best_off) |
||
148 | memset(s->best_pos,0,sizeof(s->best_pos)); |
||
149 | #endif |
||
150 | swd_findbest(s); |
||
151 | c->m_len = s->m_len; |
||
152 | #if defined(__UCL_CHECKER) |
||
153 | /* s->m_off may be uninitialized if we didn't find a match, |
||
154 | * but then its value will never be used. |
||
155 | */ |
||
156 | c->m_off = (s->m_len == SWD_THRESHOLD) ? 0 : s->m_off; |
||
157 | #else |
||
158 | c->m_off = s->m_off; |
||
159 | #endif |
||
160 | |||
161 | swd_getbyte(s); |
||
162 | |||
163 | if (s->b_char < 0) |
||
164 | { |
||
165 | c->look = 0; |
||
166 | c->m_len = 0; |
||
167 | swd_exit(s); |
||
168 | } |
||
169 | else |
||
170 | { |
||
171 | c->look = s->look + 1; |
||
172 | } |
||
173 | c->bp = c->ip - c->look; |
||
174 | |||
175 | #if 0 |
||
176 | /* brute force match search */ |
||
177 | if (c->m_len > SWD_THRESHOLD && c->m_len + 1 <= c->look) |
||
178 | { |
||
179 | const ucl_bytep ip = c->bp; |
||
180 | const ucl_bytep m = c->bp - c->m_off; |
||
181 | const ucl_bytep in = c->in; |
||
182 | |||
183 | if (ip - in > s->n) |
||
184 | in = ip - s->n; |
||
185 | for (;;) |
||
186 | { |
||
187 | while (*in != *ip) |
||
188 | in++; |
||
189 | if (in == ip) |
||
190 | break; |
||
191 | if (in != m) |
||
192 | if (memcmp(in,ip,c->m_len+1) == 0) |
||
193 | printf("%p %p %p %5d\n",in,ip,m,c->m_len); |
||
194 | in++; |
||
195 | } |
||
196 | } |
||
197 | #endif |
||
198 | |||
199 | if (c->cb && c->textsize > c->printcount) |
||
200 | { |
||
201 | (*c->cb->callback)(c->textsize,c->codesize,3,c->cb->user); |
||
202 | c->printcount += 1024; |
||
203 | } |
||
204 | |||
205 | return UCL_E_OK; |
||
206 | } |
||
207 | |||
208 | |||
209 | /*********************************************************************** |
||
210 | // bit buffer |
||
211 | ************************************************************************/ |
||
212 | |||
213 | static int bbConfig(UCL_COMPRESS_T *c, int endian, int bitsize) |
||
214 | { |
||
215 | if (endian != -1) |
||
216 | { |
||
217 | if (endian != 0) |
||
218 | return UCL_E_ERROR; |
||
219 | c->bb_c_endian = endian; |
||
220 | } |
||
221 | if (bitsize != -1) |
||
222 | { |
||
223 | if (bitsize != 8 && bitsize != 16 && bitsize != 32) |
||
224 | return UCL_E_ERROR; |
||
225 | c->bb_c_s = bitsize; |
||
226 | c->bb_c_s8 = bitsize / 8; |
||
227 | } |
||
228 | c->bb_b = 0; c->bb_k = 0; |
||
229 | c->bb_p = NULL; |
||
230 | c->bb_op = NULL; |
||
231 | return UCL_E_OK; |
||
232 | } |
||
233 | |||
234 | |||
235 | static void bbWriteBits(UCL_COMPRESS_T *c) |
||
236 | { |
||
237 | ucl_bytep p = c->bb_p; |
||
238 | ucl_uint32 b = c->bb_b; |
||
239 | |||
240 | p[0] = UCL_BYTE(b >> 0); |
||
241 | if (c->bb_c_s >= 16) |
||
242 | { |
||
243 | p[1] = UCL_BYTE(b >> 8); |
||
244 | if (c->bb_c_s == 32) |
||
245 | { |
||
246 | p[2] = UCL_BYTE(b >> 16); |
||
247 | p[3] = UCL_BYTE(b >> 24); |
||
248 | } |
||
249 | } |
||
250 | } |
||
251 | |||
252 | |||
253 | static void bbPutBit(UCL_COMPRESS_T *c, unsigned bit) |
||
254 | { |
||
255 | assert(bit == 0 || bit == 1); |
||
256 | assert(c->bb_k <= c->bb_c_s); |
||
257 | |||
258 | if (c->bb_k < c->bb_c_s) |
||
259 | { |
||
260 | if (c->bb_k == 0) |
||
261 | { |
||
262 | assert(c->bb_p == NULL); |
||
263 | c->bb_p = c->bb_op; |
||
264 | c->bb_op += c->bb_c_s8; |
||
265 | } |
||
266 | assert(c->bb_p != NULL); |
||
267 | assert(c->bb_p + c->bb_c_s8 <= c->bb_op); |
||
268 | |||
269 | c->bb_b = (c->bb_b << 1) + bit; |
||
270 | c->bb_k++; |
||
271 | } |
||
272 | else |
||
273 | { |
||
274 | assert(c->bb_p != NULL); |
||
275 | assert(c->bb_p + c->bb_c_s8 <= c->bb_op); |
||
276 | |||
277 | bbWriteBits(c); |
||
278 | c->bb_p = c->bb_op; |
||
279 | c->bb_op += c->bb_c_s8; |
||
280 | c->bb_b = bit; |
||
281 | c->bb_k = 1; |
||
282 | } |
||
283 | } |
||
284 | |||
285 | |||
286 | static void bbPutByte(UCL_COMPRESS_T *c, unsigned b) |
||
287 | { |
||
288 | /**printf("putbyte %p %p %x (%d)\n", op, bb_p, x, bb_k);*/ |
||
289 | assert(c->bb_p == NULL || c->bb_p + c->bb_c_s8 <= c->bb_op); |
||
290 | *c->bb_op++ = UCL_BYTE(b); |
||
291 | } |
||
292 | |||
293 | |||
294 | static void bbFlushBits(UCL_COMPRESS_T *c, unsigned filler_bit) |
||
295 | { |
||
296 | if (c->bb_k > 0) |
||
297 | { |
||
298 | assert(c->bb_k <= c->bb_c_s); |
||
299 | while (c->bb_k != c->bb_c_s) |
||
300 | bbPutBit(c, filler_bit); |
||
301 | bbWriteBits(c); |
||
302 | c->bb_k = 0; |
||
303 | } |
||
304 | c->bb_p = NULL; |
||
305 | } |
||
306 | |||
307 | |||
308 | |||
309 | /* |
||
310 | vi:ts=4:et |
||
311 | */ |
||
312 |