Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 26 | pmbaty | 1 | /* ucl_mchw.ch -- matching functions using a window |
| 2 | |||
| 3 | This file is part of the UCL data compression library. |
||
| 4 | |||
| 5 | Copyright (C) 1996-2004 Markus Franz Xaver Johannes Oberhumer |
||
| 6 | All Rights Reserved. |
||
| 7 | |||
| 8 | The UCL library is free software; you can redistribute it and/or |
||
| 9 | modify it under the terms of the GNU General Public License as |
||
| 10 | published by the Free Software Foundation; either version 2 of |
||
| 11 | the License, or (at your option) any later version. |
||
| 12 | |||
| 13 | The UCL library is distributed in the hope that it will be useful, |
||
| 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
| 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
| 16 | GNU General Public License for more details. |
||
| 17 | |||
| 18 | You should have received a copy of the GNU General Public License |
||
| 19 | along with the UCL library; see the file COPYING. |
||
| 20 | If not, write to the Free Software Foundation, Inc., |
||
| 21 | 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
||
| 22 | |||
| 23 | Markus F.X.J. Oberhumer |
||
| 24 | <markus@oberhumer.com> |
||
| 25 | http://www.oberhumer.com/opensource/ucl/ |
||
| 26 | */ |
||
| 27 | |||
| 28 | |||
| 29 | /*********************************************************************** |
||
| 30 | // |
||
| 31 | ************************************************************************/ |
||
| 32 | |||
| 33 | typedef struct |
||
| 34 | { |
||
| 35 | int init; |
||
| 36 | |||
| 37 | ucl_uint look; /* bytes in lookahead buffer */ |
||
| 38 | |||
| 39 | ucl_uint m_len; |
||
| 40 | ucl_uint m_off; |
||
| 41 | |||
| 42 | ucl_uint last_m_len; |
||
| 43 | ucl_uint last_m_off; |
||
| 44 | |||
| 45 | const ucl_bytep bp; |
||
| 46 | const ucl_bytep ip; |
||
| 47 | const ucl_bytep in; |
||
| 48 | const ucl_bytep in_end; |
||
| 49 | ucl_bytep out; |
||
| 50 | |||
| 51 | ucl_uint32 bb_b; |
||
| 52 | unsigned bb_k; |
||
| 53 | unsigned bb_c_endian; |
||
| 54 | unsigned bb_c_s; |
||
| 55 | unsigned bb_c_s8; |
||
| 56 | ucl_bytep bb_p; |
||
| 57 | ucl_bytep bb_op; |
||
| 58 | |||
| 59 | struct ucl_compress_config_t conf; |
||
| 60 | ucl_uintp result; |
||
| 61 | |||
| 62 | ucl_progress_callback_p cb; |
||
| 63 | |||
| 64 | ucl_uint textsize; /* text size counter */ |
||
| 65 | ucl_uint codesize; /* code size counter */ |
||
| 66 | ucl_uint printcount; /* counter for reporting progress every 1K bytes */ |
||
| 67 | |||
| 68 | /* some stats */ |
||
| 69 | unsigned long lit_bytes; |
||
| 70 | unsigned long match_bytes; |
||
| 71 | unsigned long rep_bytes; |
||
| 72 | unsigned long lazy; |
||
| 73 | } |
||
| 74 | UCL_COMPRESS_T; |
||
| 75 | |||
| 76 | |||
| 77 | |||
| 78 | #if (ACC_OS_TOS && (ACC_CC_PUREC || ACC_CC_TURBOC)) |
||
| 79 | /* the cast is needed to work around a code generation bug */ |
||
| 80 | #define getbyte(c) ((c).ip < (c).in_end ? (int) (unsigned) *((c).ip)++ : (-1)) |
||
| 81 | #else |
||
| 82 | #define getbyte(c) ((c).ip < (c).in_end ? *((c).ip)++ : (-1)) |
||
| 83 | #endif |
||
| 84 | |||
| 85 | #include "ucl_swd.ch" |
||
| 86 | |||
| 87 | |||
| 88 | |||
| 89 | /*********************************************************************** |
||
| 90 | // |
||
| 91 | ************************************************************************/ |
||
| 92 | |||
| 93 | static int |
||
| 94 | init_match ( UCL_COMPRESS_T *c, ucl_swd_t *s, |
||
| 95 | const ucl_bytep dict, ucl_uint dict_len, |
||
| 96 | ucl_uint32 flags ) |
||
| 97 | { |
||
| 98 | int r; |
||
| 99 | |||
| 100 | assert(!c->init); |
||
| 101 | c->init = 1; |
||
| 102 | |||
| 103 | s->c = c; |
||
| 104 | |||
| 105 | c->last_m_len = c->last_m_off = 0; |
||
| 106 | |||
| 107 | c->textsize = c->codesize = c->printcount = 0; |
||
| 108 | c->lit_bytes = c->match_bytes = c->rep_bytes = 0; |
||
| 109 | c->lazy = 0; |
||
| 110 | |||
| 111 | r = swd_init(s,dict,dict_len); |
||
| 112 | if (r != UCL_E_OK) |
||
| 113 | { |
||
| 114 | swd_exit(s); |
||
| 115 | return r; |
||
| 116 | } |
||
| 117 | |||
| 118 | s->use_best_off = (flags & 1) ? 1 : 0; |
||
| 119 | return UCL_E_OK; |
||
| 120 | } |
||
| 121 | |||
| 122 | |||
| 123 | /*********************************************************************** |
||
| 124 | // |
||
| 125 | ************************************************************************/ |
||
| 126 | |||
| 127 | static int |
||
| 128 | find_match ( UCL_COMPRESS_T *c, ucl_swd_t *s, |
||
| 129 | ucl_uint this_len, ucl_uint skip ) |
||
| 130 | { |
||
| 131 | assert(c->init); |
||
| 132 | |||
| 133 | if (skip > 0) |
||
| 134 | { |
||
| 135 | assert(this_len >= skip); |
||
| 136 | swd_accept(s, this_len - skip); |
||
| 137 | c->textsize += this_len - skip + 1; |
||
| 138 | } |
||
| 139 | else |
||
| 140 | { |
||
| 141 | assert(this_len <= 1); |
||
| 142 | c->textsize += this_len - skip; |
||
| 143 | } |
||
| 144 | |||
| 145 | s->m_len = SWD_THRESHOLD; |
||
| 146 | #ifdef SWD_BEST_OFF |
||
| 147 | if (s->use_best_off) |
||
| 148 | memset(s->best_pos,0,sizeof(s->best_pos)); |
||
| 149 | #endif |
||
| 150 | swd_findbest(s); |
||
| 151 | c->m_len = s->m_len; |
||
| 152 | #if defined(__UCL_CHECKER) |
||
| 153 | /* s->m_off may be uninitialized if we didn't find a match, |
||
| 154 | * but then its value will never be used. |
||
| 155 | */ |
||
| 156 | c->m_off = (s->m_len == SWD_THRESHOLD) ? 0 : s->m_off; |
||
| 157 | #else |
||
| 158 | c->m_off = s->m_off; |
||
| 159 | #endif |
||
| 160 | |||
| 161 | swd_getbyte(s); |
||
| 162 | |||
| 163 | if (s->b_char < 0) |
||
| 164 | { |
||
| 165 | c->look = 0; |
||
| 166 | c->m_len = 0; |
||
| 167 | swd_exit(s); |
||
| 168 | } |
||
| 169 | else |
||
| 170 | { |
||
| 171 | c->look = s->look + 1; |
||
| 172 | } |
||
| 173 | c->bp = c->ip - c->look; |
||
| 174 | |||
| 175 | #if 0 |
||
| 176 | /* brute force match search */ |
||
| 177 | if (c->m_len > SWD_THRESHOLD && c->m_len + 1 <= c->look) |
||
| 178 | { |
||
| 179 | const ucl_bytep ip = c->bp; |
||
| 180 | const ucl_bytep m = c->bp - c->m_off; |
||
| 181 | const ucl_bytep in = c->in; |
||
| 182 | |||
| 183 | if (ip - in > s->n) |
||
| 184 | in = ip - s->n; |
||
| 185 | for (;;) |
||
| 186 | { |
||
| 187 | while (*in != *ip) |
||
| 188 | in++; |
||
| 189 | if (in == ip) |
||
| 190 | break; |
||
| 191 | if (in != m) |
||
| 192 | if (memcmp(in,ip,c->m_len+1) == 0) |
||
| 193 | printf("%p %p %p %5d\n",in,ip,m,c->m_len); |
||
| 194 | in++; |
||
| 195 | } |
||
| 196 | } |
||
| 197 | #endif |
||
| 198 | |||
| 199 | if (c->cb && c->textsize > c->printcount) |
||
| 200 | { |
||
| 201 | (*c->cb->callback)(c->textsize,c->codesize,3,c->cb->user); |
||
| 202 | c->printcount += 1024; |
||
| 203 | } |
||
| 204 | |||
| 205 | return UCL_E_OK; |
||
| 206 | } |
||
| 207 | |||
| 208 | |||
| 209 | /*********************************************************************** |
||
| 210 | // bit buffer |
||
| 211 | ************************************************************************/ |
||
| 212 | |||
| 213 | static int bbConfig(UCL_COMPRESS_T *c, int endian, int bitsize) |
||
| 214 | { |
||
| 215 | if (endian != -1) |
||
| 216 | { |
||
| 217 | if (endian != 0) |
||
| 218 | return UCL_E_ERROR; |
||
| 219 | c->bb_c_endian = endian; |
||
| 220 | } |
||
| 221 | if (bitsize != -1) |
||
| 222 | { |
||
| 223 | if (bitsize != 8 && bitsize != 16 && bitsize != 32) |
||
| 224 | return UCL_E_ERROR; |
||
| 225 | c->bb_c_s = bitsize; |
||
| 226 | c->bb_c_s8 = bitsize / 8; |
||
| 227 | } |
||
| 228 | c->bb_b = 0; c->bb_k = 0; |
||
| 229 | c->bb_p = NULL; |
||
| 230 | c->bb_op = NULL; |
||
| 231 | return UCL_E_OK; |
||
| 232 | } |
||
| 233 | |||
| 234 | |||
| 235 | static void bbWriteBits(UCL_COMPRESS_T *c) |
||
| 236 | { |
||
| 237 | ucl_bytep p = c->bb_p; |
||
| 238 | ucl_uint32 b = c->bb_b; |
||
| 239 | |||
| 240 | p[0] = UCL_BYTE(b >> 0); |
||
| 241 | if (c->bb_c_s >= 16) |
||
| 242 | { |
||
| 243 | p[1] = UCL_BYTE(b >> 8); |
||
| 244 | if (c->bb_c_s == 32) |
||
| 245 | { |
||
| 246 | p[2] = UCL_BYTE(b >> 16); |
||
| 247 | p[3] = UCL_BYTE(b >> 24); |
||
| 248 | } |
||
| 249 | } |
||
| 250 | } |
||
| 251 | |||
| 252 | |||
| 253 | static void bbPutBit(UCL_COMPRESS_T *c, unsigned bit) |
||
| 254 | { |
||
| 255 | assert(bit == 0 || bit == 1); |
||
| 256 | assert(c->bb_k <= c->bb_c_s); |
||
| 257 | |||
| 258 | if (c->bb_k < c->bb_c_s) |
||
| 259 | { |
||
| 260 | if (c->bb_k == 0) |
||
| 261 | { |
||
| 262 | assert(c->bb_p == NULL); |
||
| 263 | c->bb_p = c->bb_op; |
||
| 264 | c->bb_op += c->bb_c_s8; |
||
| 265 | } |
||
| 266 | assert(c->bb_p != NULL); |
||
| 267 | assert(c->bb_p + c->bb_c_s8 <= c->bb_op); |
||
| 268 | |||
| 269 | c->bb_b = (c->bb_b << 1) + bit; |
||
| 270 | c->bb_k++; |
||
| 271 | } |
||
| 272 | else |
||
| 273 | { |
||
| 274 | assert(c->bb_p != NULL); |
||
| 275 | assert(c->bb_p + c->bb_c_s8 <= c->bb_op); |
||
| 276 | |||
| 277 | bbWriteBits(c); |
||
| 278 | c->bb_p = c->bb_op; |
||
| 279 | c->bb_op += c->bb_c_s8; |
||
| 280 | c->bb_b = bit; |
||
| 281 | c->bb_k = 1; |
||
| 282 | } |
||
| 283 | } |
||
| 284 | |||
| 285 | |||
| 286 | static void bbPutByte(UCL_COMPRESS_T *c, unsigned b) |
||
| 287 | { |
||
| 288 | /**printf("putbyte %p %p %x (%d)\n", op, bb_p, x, bb_k);*/ |
||
| 289 | assert(c->bb_p == NULL || c->bb_p + c->bb_c_s8 <= c->bb_op); |
||
| 290 | *c->bb_op++ = UCL_BYTE(b); |
||
| 291 | } |
||
| 292 | |||
| 293 | |||
| 294 | static void bbFlushBits(UCL_COMPRESS_T *c, unsigned filler_bit) |
||
| 295 | { |
||
| 296 | if (c->bb_k > 0) |
||
| 297 | { |
||
| 298 | assert(c->bb_k <= c->bb_c_s); |
||
| 299 | while (c->bb_k != c->bb_c_s) |
||
| 300 | bbPutBit(c, filler_bit); |
||
| 301 | bbWriteBits(c); |
||
| 302 | c->bb_k = 0; |
||
| 303 | } |
||
| 304 | c->bb_p = NULL; |
||
| 305 | } |
||
| 306 | |||
| 307 | |||
| 308 | |||
| 309 | /* |
||
| 310 | vi:ts=4:et |
||
| 311 | */ |
||
| 312 |