Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line | 
|---|---|---|---|
| 26 | pmbaty | 1 | /* ucl_mchw.ch -- matching functions using a window | 
| 2 | |||
| 3 | This file is part of the UCL data compression library. | ||
| 4 | |||
| 5 | Copyright (C) 1996-2004 Markus Franz Xaver Johannes Oberhumer | ||
| 6 | All Rights Reserved. | ||
| 7 | |||
| 8 | The UCL library is free software; you can redistribute it and/or | ||
| 9 | modify it under the terms of the GNU General Public License as | ||
| 10 | published by the Free Software Foundation; either version 2 of | ||
| 11 | the License, or (at your option) any later version. | ||
| 12 | |||
| 13 | The UCL library is distributed in the hope that it will be useful, | ||
| 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 16 | GNU General Public License for more details. | ||
| 17 | |||
| 18 | You should have received a copy of the GNU General Public License | ||
| 19 | along with the UCL library; see the file COPYING. | ||
| 20 | If not, write to the Free Software Foundation, Inc., | ||
| 21 | 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
| 22 | |||
| 23 | Markus F.X.J. Oberhumer | ||
| 24 | <markus@oberhumer.com> | ||
| 25 | http://www.oberhumer.com/opensource/ucl/ | ||
| 26 | */ | ||
| 27 | |||
| 28 | |||
| 29 | /*********************************************************************** | ||
| 30 | // | ||
| 31 | ************************************************************************/ | ||
| 32 | |||
| 33 | typedef struct | ||
| 34 | { | ||
| 35 | int init; | ||
| 36 | |||
| 37 | ucl_uint look; /* bytes in lookahead buffer */ | ||
| 38 | |||
| 39 | ucl_uint m_len; | ||
| 40 | ucl_uint m_off; | ||
| 41 | |||
| 42 | ucl_uint last_m_len; | ||
| 43 | ucl_uint last_m_off; | ||
| 44 | |||
| 45 | const ucl_bytep bp; | ||
| 46 | const ucl_bytep ip; | ||
| 47 | const ucl_bytep in; | ||
| 48 | const ucl_bytep in_end; | ||
| 49 | ucl_bytep out; | ||
| 50 | |||
| 51 | ucl_uint32 bb_b; | ||
| 52 | unsigned bb_k; | ||
| 53 | unsigned bb_c_endian; | ||
| 54 | unsigned bb_c_s; | ||
| 55 | unsigned bb_c_s8; | ||
| 56 | ucl_bytep bb_p; | ||
| 57 | ucl_bytep bb_op; | ||
| 58 | |||
| 59 | struct ucl_compress_config_t conf; | ||
| 60 | ucl_uintp result; | ||
| 61 | |||
| 62 | ucl_progress_callback_p cb; | ||
| 63 | |||
| 64 | ucl_uint textsize; /* text size counter */ | ||
| 65 | ucl_uint codesize; /* code size counter */ | ||
| 66 | ucl_uint printcount; /* counter for reporting progress every 1K bytes */ | ||
| 67 | |||
| 68 | /* some stats */ | ||
| 69 | unsigned long lit_bytes; | ||
| 70 | unsigned long match_bytes; | ||
| 71 | unsigned long rep_bytes; | ||
| 72 | unsigned long lazy; | ||
| 73 | } | ||
| 74 | UCL_COMPRESS_T; | ||
| 75 | |||
| 76 | |||
| 77 | |||
| 78 | #if (ACC_OS_TOS && (ACC_CC_PUREC || ACC_CC_TURBOC)) | ||
| 79 | /* the cast is needed to work around a code generation bug */ | ||
| 80 | #define getbyte(c) ((c).ip < (c).in_end ? (int) (unsigned) *((c).ip)++ : (-1)) | ||
| 81 | #else | ||
| 82 | #define getbyte(c) ((c).ip < (c).in_end ? *((c).ip)++ : (-1)) | ||
| 83 | #endif | ||
| 84 | |||
| 85 | #include "ucl_swd.ch" | ||
| 86 | |||
| 87 | |||
| 88 | |||
| 89 | /*********************************************************************** | ||
| 90 | // | ||
| 91 | ************************************************************************/ | ||
| 92 | |||
| 93 | static int | ||
| 94 | init_match ( UCL_COMPRESS_T *c, ucl_swd_t *s, | ||
| 95 | const ucl_bytep dict, ucl_uint dict_len, | ||
| 96 | ucl_uint32 flags ) | ||
| 97 | { | ||
| 98 | int r; | ||
| 99 | |||
| 100 | assert(!c->init); | ||
| 101 | c->init = 1; | ||
| 102 | |||
| 103 | s->c = c; | ||
| 104 | |||
| 105 | c->last_m_len = c->last_m_off = 0; | ||
| 106 | |||
| 107 | c->textsize = c->codesize = c->printcount = 0; | ||
| 108 | c->lit_bytes = c->match_bytes = c->rep_bytes = 0; | ||
| 109 | c->lazy = 0; | ||
| 110 | |||
| 111 | r = swd_init(s,dict,dict_len); | ||
| 112 | if (r != UCL_E_OK) | ||
| 113 |     { | ||
| 114 | swd_exit(s); | ||
| 115 | return r; | ||
| 116 | } | ||
| 117 | |||
| 118 | s->use_best_off = (flags & 1) ? 1 : 0; | ||
| 119 | return UCL_E_OK; | ||
| 120 | } | ||
| 121 | |||
| 122 | |||
| 123 | /*********************************************************************** | ||
| 124 | // | ||
| 125 | ************************************************************************/ | ||
| 126 | |||
| 127 | static int | ||
| 128 | find_match ( UCL_COMPRESS_T *c, ucl_swd_t *s, | ||
| 129 | ucl_uint this_len, ucl_uint skip ) | ||
| 130 | { | ||
| 131 | assert(c->init); | ||
| 132 | |||
| 133 | if (skip > 0) | ||
| 134 |     { | ||
| 135 | assert(this_len >= skip); | ||
| 136 | swd_accept(s, this_len - skip); | ||
| 137 | c->textsize += this_len - skip + 1; | ||
| 138 | } | ||
| 139 | else | ||
| 140 |     { | ||
| 141 | assert(this_len <= 1); | ||
| 142 | c->textsize += this_len - skip; | ||
| 143 | } | ||
| 144 | |||
| 145 | s->m_len = SWD_THRESHOLD; | ||
| 146 | #ifdef SWD_BEST_OFF | ||
| 147 | if (s->use_best_off) | ||
| 148 | memset(s->best_pos,0,sizeof(s->best_pos)); | ||
| 149 | #endif | ||
| 150 | swd_findbest(s); | ||
| 151 | c->m_len = s->m_len; | ||
| 152 | #if defined(__UCL_CHECKER) | ||
| 153 | /* s->m_off may be uninitialized if we didn't find a match, | ||
| 154 | * but then its value will never be used. | ||
| 155 | */ | ||
| 156 | c->m_off = (s->m_len == SWD_THRESHOLD) ? 0 : s->m_off; | ||
| 157 | #else | ||
| 158 | c->m_off = s->m_off; | ||
| 159 | #endif | ||
| 160 | |||
| 161 | swd_getbyte(s); | ||
| 162 | |||
| 163 | if (s->b_char < 0) | ||
| 164 |     { | ||
| 165 | c->look = 0; | ||
| 166 | c->m_len = 0; | ||
| 167 | swd_exit(s); | ||
| 168 | } | ||
| 169 | else | ||
| 170 |     { | ||
| 171 | c->look = s->look + 1; | ||
| 172 | } | ||
| 173 | c->bp = c->ip - c->look; | ||
| 174 | |||
| 175 | #if 0 | ||
| 176 | /* brute force match search */ | ||
| 177 | if (c->m_len > SWD_THRESHOLD && c->m_len + 1 <= c->look) | ||
| 178 |     { | ||
| 179 | const ucl_bytep ip = c->bp; | ||
| 180 | const ucl_bytep m = c->bp - c->m_off; | ||
| 181 | const ucl_bytep in = c->in; | ||
| 182 | |||
| 183 | if (ip - in > s->n) | ||
| 184 | in = ip - s->n; | ||
| 185 | for (;;) | ||
| 186 |         { | ||
| 187 | while (*in != *ip) | ||
| 188 | in++; | ||
| 189 | if (in == ip) | ||
| 190 | break; | ||
| 191 | if (in != m) | ||
| 192 | if (memcmp(in,ip,c->m_len+1) == 0) | ||
| 193 |                     printf("%p %p %p %5d\n",in,ip,m,c->m_len); | ||
| 194 | in++; | ||
| 195 | } | ||
| 196 | } | ||
| 197 | #endif | ||
| 198 | |||
| 199 | if (c->cb && c->textsize > c->printcount) | ||
| 200 |     { | ||
| 201 | (*c->cb->callback)(c->textsize,c->codesize,3,c->cb->user); | ||
| 202 | c->printcount += 1024; | ||
| 203 | } | ||
| 204 | |||
| 205 | return UCL_E_OK; | ||
| 206 | } | ||
| 207 | |||
| 208 | |||
| 209 | /*********************************************************************** | ||
| 210 | // bit buffer | ||
| 211 | ************************************************************************/ | ||
| 212 | |||
| 213 | static int bbConfig(UCL_COMPRESS_T *c, int endian, int bitsize) | ||
| 214 | { | ||
| 215 | if (endian != -1) | ||
| 216 |     { | ||
| 217 | if (endian != 0) | ||
| 218 | return UCL_E_ERROR; | ||
| 219 | c->bb_c_endian = endian; | ||
| 220 | } | ||
| 221 | if (bitsize != -1) | ||
| 222 |     { | ||
| 223 | if (bitsize != 8 && bitsize != 16 && bitsize != 32) | ||
| 224 | return UCL_E_ERROR; | ||
| 225 | c->bb_c_s = bitsize; | ||
| 226 | c->bb_c_s8 = bitsize / 8; | ||
| 227 | } | ||
| 228 | c->bb_b = 0; c->bb_k = 0; | ||
| 229 | c->bb_p = NULL; | ||
| 230 | c->bb_op = NULL; | ||
| 231 | return UCL_E_OK; | ||
| 232 | } | ||
| 233 | |||
| 234 | |||
| 235 | static void bbWriteBits(UCL_COMPRESS_T *c) | ||
| 236 | { | ||
| 237 | ucl_bytep p = c->bb_p; | ||
| 238 | ucl_uint32 b = c->bb_b; | ||
| 239 | |||
| 240 | p[0] = UCL_BYTE(b >> 0); | ||
| 241 | if (c->bb_c_s >= 16) | ||
| 242 |     { | ||
| 243 | p[1] = UCL_BYTE(b >> 8); | ||
| 244 | if (c->bb_c_s == 32) | ||
| 245 |         { | ||
| 246 | p[2] = UCL_BYTE(b >> 16); | ||
| 247 | p[3] = UCL_BYTE(b >> 24); | ||
| 248 | } | ||
| 249 | } | ||
| 250 | } | ||
| 251 | |||
| 252 | |||
| 253 | static void bbPutBit(UCL_COMPRESS_T *c, unsigned bit) | ||
| 254 | { | ||
| 255 | assert(bit == 0 || bit == 1); | ||
| 256 | assert(c->bb_k <= c->bb_c_s); | ||
| 257 | |||
| 258 | if (c->bb_k < c->bb_c_s) | ||
| 259 |     { | ||
| 260 | if (c->bb_k == 0) | ||
| 261 |         { | ||
| 262 | assert(c->bb_p == NULL); | ||
| 263 | c->bb_p = c->bb_op; | ||
| 264 | c->bb_op += c->bb_c_s8; | ||
| 265 | } | ||
| 266 | assert(c->bb_p != NULL); | ||
| 267 | assert(c->bb_p + c->bb_c_s8 <= c->bb_op); | ||
| 268 | |||
| 269 | c->bb_b = (c->bb_b << 1) + bit; | ||
| 270 | c->bb_k++; | ||
| 271 | } | ||
| 272 | else | ||
| 273 |     { | ||
| 274 | assert(c->bb_p != NULL); | ||
| 275 | assert(c->bb_p + c->bb_c_s8 <= c->bb_op); | ||
| 276 | |||
| 277 | bbWriteBits(c); | ||
| 278 | c->bb_p = c->bb_op; | ||
| 279 | c->bb_op += c->bb_c_s8; | ||
| 280 | c->bb_b = bit; | ||
| 281 | c->bb_k = 1; | ||
| 282 | } | ||
| 283 | } | ||
| 284 | |||
| 285 | |||
| 286 | static void bbPutByte(UCL_COMPRESS_T *c, unsigned b) | ||
| 287 | { | ||
| 288 |     /**printf("putbyte %p %p %x  (%d)\n", op, bb_p, x, bb_k);*/ | ||
| 289 | assert(c->bb_p == NULL || c->bb_p + c->bb_c_s8 <= c->bb_op); | ||
| 290 | *c->bb_op++ = UCL_BYTE(b); | ||
| 291 | } | ||
| 292 | |||
| 293 | |||
| 294 | static void bbFlushBits(UCL_COMPRESS_T *c, unsigned filler_bit) | ||
| 295 | { | ||
| 296 | if (c->bb_k > 0) | ||
| 297 |     { | ||
| 298 | assert(c->bb_k <= c->bb_c_s); | ||
| 299 | while (c->bb_k != c->bb_c_s) | ||
| 300 | bbPutBit(c, filler_bit); | ||
| 301 | bbWriteBits(c); | ||
| 302 | c->bb_k = 0; | ||
| 303 | } | ||
| 304 | c->bb_p = NULL; | ||
| 305 | } | ||
| 306 | |||
| 307 | |||
| 308 | |||
| 309 | /* | ||
| 310 | vi:ts=4:et | ||
| 311 | */ | ||
| 312 |