Subversion Repositories freemyipod

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
160 theseven 1
/* ucl_swd.c -- sliding window dictionary
2
 
3
   This file is part of the UCL data compression library.
4
 
5
   Copyright (C) 1996-2002 Markus Franz Xaver Johannes Oberhumer
6
   All Rights Reserved.
7
 
8
   The UCL library is free software; you can redistribute it and/or
9
   modify it under the terms of the GNU General Public License as
10
   published by the Free Software Foundation; either version 2 of
11
   the License, or (at your option) any later version.
12
 
13
   The UCL library is distributed in the hope that it will be useful,
14
   but WITHOUT ANY WARRANTY; without even the implied warranty of
15
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
   GNU General Public License for more details.
17
 
18
   You should have received a copy of the GNU General Public License
19
   along with the UCL library; see the file COPYING.
20
   If not, write to the Free Software Foundation, Inc.,
21
   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22
 
23
   Markus F.X.J. Oberhumer
24
   <markus@oberhumer.com>
25
 */
26
 
27
 
28
#if (UCL_UINT_MAX < UCL_0xffffffffL)
29
#  error "UCL_UINT_MAX"
30
#endif
31
 
32
 
33
/***********************************************************************
34
//
35
************************************************************************/
36
 
37
#ifndef SWD_N
38
#  define SWD_N             N
39
#endif
40
#ifndef SWD_F
41
#  define SWD_F             F
42
#endif
43
#ifndef SWD_THRESHOLD
44
#  define SWD_THRESHOLD     THRESHOLD
45
#endif
46
 
47
/* unsigned type for dictionary access - don't waste memory here */
48
#if (SWD_N + SWD_F + SWD_F < USHRT_MAX)
49
   typedef unsigned short   swd_uint;
50
#  define SWD_UINT_MAX      USHRT_MAX
51
#else
52
   typedef ucl_uint         swd_uint;
53
#  define SWD_UINT_MAX      UCL_UINT_MAX
54
#endif
55
#define SWD_UINT(x)         ((swd_uint)(x))
56
 
57
 
58
#ifndef SWD_HSIZE
59
#  define SWD_HSIZE         16384
60
#endif
61
#ifndef SWD_MAX_CHAIN
62
#  define SWD_MAX_CHAIN     2048
63
#endif
64
 
65
#if !defined(HEAD3)
66
#if 1
67
#  define HEAD3(b,p) \
68
    (((0x9f5f*(((((ucl_uint32)b[p]<<5)^b[p+1])<<5)^b[p+2]))>>5) & (SWD_HSIZE-1))
69
#else
70
#  define HEAD3(b,p) \
71
    (((0x9f5f*(((((ucl_uint32)b[p+2]<<5)^b[p+1])<<5)^b[p]))>>5) & (SWD_HSIZE-1))
72
#endif
73
#endif
74
 
75
#if (SWD_THRESHOLD == 1) && !defined(HEAD2)
76
#  if 1 && defined(UCL_UNALIGNED_OK_2)
77
#    define HEAD2(b,p)      (* (const ucl_ushortp) &(b[p]))
78
#  else
79
#    define HEAD2(b,p)      (b[p] ^ ((unsigned)b[p+1]<<8))
80
#  endif
81
#  define NIL2              SWD_UINT_MAX
82
#endif
83
 
84
 
85
#if defined(__UCL_CHECKER)
86
   /* malloc arrays of the exact size to detect any overrun */
87
#  ifndef SWD_USE_MALLOC
88
#    define SWD_USE_MALLOC
89
#  endif
90
#endif
91
 
92
 
93
typedef struct
94
{
95
/* public - "built-in" */
96
    ucl_uint n;
97
    ucl_uint f;
98
    ucl_uint threshold;
99
 
100
/* public - configuration */
101
    ucl_uint max_chain;
102
    ucl_uint nice_length;
103
    ucl_bool use_best_off;
104
    ucl_uint lazy_insert;
105
 
106
/* public - output */
107
    ucl_uint m_len;
108
    ucl_uint m_off;
109
    ucl_uint look;
110
    int b_char;
111
#if defined(SWD_BEST_OFF)
112
    ucl_uint best_off[ SWD_BEST_OFF ];
113
#endif
114
 
115
/* semi public */
116
    UCL_COMPRESS_T *c;
117
    ucl_uint m_pos;
118
#if defined(SWD_BEST_OFF)
119
    ucl_uint best_pos[ SWD_BEST_OFF ];
120
#endif
121
 
122
/* private */
123
    const ucl_byte *dict;
124
    const ucl_byte *dict_end;
125
    ucl_uint dict_len;
126
 
127
/* private */
128
    ucl_uint ip;                /* input pointer (lookahead) */
129
    ucl_uint bp;                /* buffer pointer */
130
    ucl_uint rp;                /* remove pointer */
131
    ucl_uint b_size;
132
 
133
    unsigned char *b_wrap;
134
 
135
    ucl_uint node_count;
136
    ucl_uint first_rp;
137
 
138
#if defined(SWD_USE_MALLOC)
139
    unsigned char *b;
140
    swd_uint *head3;
141
    swd_uint *succ3;
142
    swd_uint *best3;
143
    swd_uint *llen3;
144
#ifdef HEAD2
145
    swd_uint *head2;
146
#endif
147
#else
148
    unsigned char b [ SWD_N + SWD_F + SWD_F ];
149
    swd_uint head3 [ SWD_HSIZE ];
150
    swd_uint succ3 [ SWD_N + SWD_F ];
151
    swd_uint best3 [ SWD_N + SWD_F ];
152
    swd_uint llen3 [ SWD_HSIZE ];
153
#ifdef HEAD2
154
    swd_uint head2 [ UCL_UINT32_C(65536) ];
155
#endif
156
#endif
157
}
158
ucl_swd_t;
159
 
160
 
161
/* Access macro for head3.
162
 * head3[key] may be uninitialized if the list is emtpy,
163
 * but then its value will never be used.
164
 */
165
#if defined(__UCL_CHECKER)
166
#  define s_head3(s,key) \
167
        ((s->llen3[key] == 0) ? SWD_UINT_MAX : s->head3[key])
168
#else
169
#  define s_head3(s,key)        s->head3[key]
170
#endif
171
 
172
 
173
/***********************************************************************
174
//
175
************************************************************************/
176
 
177
static
178
void swd_initdict(ucl_swd_t *s, const ucl_byte *dict, ucl_uint dict_len)
179
{
180
    s->dict = s->dict_end = NULL;
181
    s->dict_len = 0;
182
 
183
    if (!dict || dict_len <= 0)
184
        return;
185
    if (dict_len > s->n)
186
    {
187
        dict += dict_len - s->n;
188
        dict_len = s->n;
189
    }
190
 
191
    s->dict = dict;
192
    s->dict_len = dict_len;
193
    s->dict_end = dict + dict_len;
194
    ucl_memcpy(s->b,dict,dict_len);
195
    s->ip = dict_len;
196
}
197
 
198
 
199
static
200
void swd_insertdict(ucl_swd_t *s, ucl_uint node, ucl_uint len)
201
{
202
    ucl_uint key;
203
 
204
    s->node_count = s->n - len;
205
    s->first_rp = node;
206
 
207
    while (len-- > 0)
208
    {
209
        key = HEAD3(s->b,node);
210
        s->succ3[node] = s_head3(s,key);
211
        s->head3[key] = SWD_UINT(node);
212
        s->best3[node] = SWD_UINT(s->f + 1);
213
        s->llen3[key]++;
214
        assert(s->llen3[key] <= s->n);
215
 
216
#ifdef HEAD2
217
        key = HEAD2(s->b,node);
218
        s->head2[key] = SWD_UINT(node);
219
#endif
220
 
221
        node++;
222
    }
223
}
224
 
225
 
226
/***********************************************************************
227
//
228
************************************************************************/
229
 
230
static
231
int swd_init(ucl_swd_t *s, const ucl_byte *dict, ucl_uint dict_len)
232
{
233
    ucl_uint i = 0;
234
    int c = 0;
235
 
236
    if (s->n == 0)
237
        s->n = SWD_N;
238
    if (s->f == 0)
239
        s->f = SWD_F;
240
    s->threshold = SWD_THRESHOLD;
241
    if (s->n > SWD_N || s->f > SWD_F)
242
        return UCL_E_INVALID_ARGUMENT;
243
 
244
#if defined(SWD_USE_MALLOC)
245
    s->b = (unsigned char *) ucl_alloc(s->n + s->f + s->f, 1);
246
    s->head3 = (swd_uint *) ucl_alloc(SWD_HSIZE, sizeof(*s->head3));
247
    s->succ3 = (swd_uint *) ucl_alloc(s->n + s->f, sizeof(*s->succ3));
248
    s->best3 = (swd_uint *) ucl_alloc(s->n + s->f, sizeof(*s->best3));
249
    s->llen3 = (swd_uint *) ucl_alloc(SWD_HSIZE, sizeof(*s->llen3));
250
    if (!s->b || !s->head3  || !s->succ3 || !s->best3 || !s->llen3)
251
        return UCL_E_OUT_OF_MEMORY;
252
#ifdef HEAD2
253
    s->head2 = (swd_uint *) ucl_alloc(UCL_UINT32_C(65536), sizeof(*s->head2));
254
    if (!s->head2)
255
        return UCL_E_OUT_OF_MEMORY;
256
#endif
257
#endif
258
 
259
    /* defaults */
260
    s->max_chain = SWD_MAX_CHAIN;
261
    s->nice_length = s->f;
262
    s->use_best_off = 0;
263
    s->lazy_insert = 0;
264
 
265
    s->b_size = s->n + s->f;
266
    if (s->b_size + s->f >= SWD_UINT_MAX)
267
        return UCL_E_ERROR;
268
    s->b_wrap = s->b + s->b_size;
269
    s->node_count = s->n;
270
 
271
    ucl_memset(s->llen3, 0, sizeof(s->llen3[0]) * SWD_HSIZE);
272
#ifdef HEAD2
273
#if 1
274
    ucl_memset(s->head2, 0xff, sizeof(s->head2[0]) * UCL_UINT32_C(65536));
275
    assert(s->head2[0] == NIL2);
276
#else
277
    for (i = 0; i < UCL_UINT32_C(65536); i++)
278
        s->head2[i] = NIL2;
279
#endif
280
#endif
281
 
282
    s->ip = 0;
283
    swd_initdict(s,dict,dict_len);
284
    s->bp = s->ip;
285
    s->first_rp = s->ip;
286
 
287
    assert(s->ip + s->f <= s->b_size);
288
#if 1
289
    s->look = (ucl_uint) (s->c->in_end - s->c->ip);
290
    if (s->look > 0)
291
    {
292
        if (s->look > s->f)
293
            s->look = s->f;
294
        ucl_memcpy(&s->b[s->ip],s->c->ip,s->look);
295
        s->c->ip += s->look;
296
        s->ip += s->look;
297
    }
298
#else
299
    s->look = 0;
300
    while (s->look < s->f)
301
    {
302
        if ((c = getbyte(*(s->c))) < 0)
303
            break;
304
        s->b[s->ip] = UCL_BYTE(c);
305
        s->ip++;
306
        s->look++;
307
    }
308
#endif
309
    if (s->ip == s->b_size)
310
        s->ip = 0;
311
 
312
    if (s->look >= 2 && s->dict_len > 0)
313
        swd_insertdict(s,0,s->dict_len);
314
 
315
    s->rp = s->first_rp;
316
    if (s->rp >= s->node_count)
317
        s->rp -= s->node_count;
318
    else
319
        s->rp += s->b_size - s->node_count;
320
 
321
#if defined(__UCL_CHECKER)
322
    /* initialize memory for the first few HEAD3 (if s->ip is not far
323
     * enough ahead to do this job for us). The value doesn't matter. */
324
    if (s->look < 3)
325
        ucl_memset(&s->b[s->bp+s->look],0,3);
326
#endif
327
 
328
    UCL_UNUSED(i);
329
    UCL_UNUSED(c);
330
    return UCL_E_OK;
331
}
332
 
333
 
334
static
335
void swd_exit(ucl_swd_t *s)
336
{
337
#if defined(SWD_USE_MALLOC)
338
    /* free in reverse order of allocations */
339
#ifdef HEAD2
340
    ucl_free(s->head2); s->head2 = NULL;
341
#endif
342
    ucl_free(s->llen3); s->llen3 = NULL;
343
    ucl_free(s->best3); s->best3 = NULL;
344
    ucl_free(s->succ3); s->succ3 = NULL;
345
    ucl_free(s->head3); s->head3 = NULL;
346
    ucl_free(s->b); s->b = NULL;
347
#else
348
    UCL_UNUSED(s);
349
#endif
350
}
351
 
352
 
353
#define swd_pos2off(s,pos) \
354
    (s->bp > (pos) ? s->bp - (pos) : s->b_size - ((pos) - s->bp))
355
 
356
 
357
/***********************************************************************
358
//
359
************************************************************************/
360
 
361
static __inline__
362
void swd_getbyte(ucl_swd_t *s)
363
{
364
    int c;
365
 
366
    if ((c = getbyte(*(s->c))) < 0)
367
    {
368
        if (s->look > 0)
369
            --s->look;
370
#if defined(__UCL_CHECKER)
371
        /* initialize memory - value doesn't matter */
372
        s->b[s->ip] = 0;
373
        if (s->ip < s->f)
374
            s->b_wrap[s->ip] = 0;
375
#endif
376
    }
377
    else
378
    {
379
        s->b[s->ip] = UCL_BYTE(c);
380
        if (s->ip < s->f)
381
            s->b_wrap[s->ip] = UCL_BYTE(c);
382
    }
383
    if (++s->ip == s->b_size)
384
        s->ip = 0;
385
    if (++s->bp == s->b_size)
386
        s->bp = 0;
387
    if (++s->rp == s->b_size)
388
        s->rp = 0;
389
}
390
 
391
 
392
/***********************************************************************
393
// remove node from lists
394
************************************************************************/
395
 
396
static __inline__
397
void swd_remove_node(ucl_swd_t *s, ucl_uint node)
398
{
399
    if (s->node_count == 0)
400
    {
401
        ucl_uint key;
402
 
403
#ifdef UCL_DEBUG
404
        if (s->first_rp != UCL_UINT_MAX)
405
        {
406
            if (node != s->first_rp)
407
                printf("Remove %5d: %5d %5d %5d %5d  %6d %6d\n",
408
                        node, s->rp, s->ip, s->bp, s->first_rp,
409
                        s->ip - node, s->ip - s->bp);
410
            assert(node == s->first_rp);
411
            s->first_rp = UCL_UINT_MAX;
412
        }
413
#endif
414
 
415
        key = HEAD3(s->b,node);
416
        assert(s->llen3[key] > 0);
417
        --s->llen3[key];
418
 
419
#ifdef HEAD2
420
        key = HEAD2(s->b,node);
421
        assert(s->head2[key] != NIL2);
422
        if ((ucl_uint) s->head2[key] == node)
423
            s->head2[key] = NIL2;
424
#endif
425
    }
426
    else
427
        --s->node_count;
428
}
429
 
430
 
431
/***********************************************************************
432
//
433
************************************************************************/
434
 
435
static
436
void swd_accept(ucl_swd_t *s, ucl_uint n)
437
{
438
    assert(n <= s->look);
439
 
440
    if (n > 0) do
441
    {
442
        ucl_uint key;
443
 
444
        swd_remove_node(s,s->rp);
445
 
446
        /* add bp into HEAD3 */
447
        key = HEAD3(s->b,s->bp);
448
        s->succ3[s->bp] = s_head3(s,key);
449
        s->head3[key] = SWD_UINT(s->bp);
450
        s->best3[s->bp] = SWD_UINT(s->f + 1);
451
        s->llen3[key]++;
452
        assert(s->llen3[key] <= s->n);
453
 
454
#ifdef HEAD2
455
        /* add bp into HEAD2 */
456
        key = HEAD2(s->b,s->bp);
457
        s->head2[key] = SWD_UINT(s->bp);
458
#endif
459
 
460
        swd_getbyte(s);
461
    } while (--n > 0);
462
}
463
 
464
 
465
/***********************************************************************
466
//
467
************************************************************************/
468
 
469
static
470
void swd_search(ucl_swd_t *s, ucl_uint node, ucl_uint cnt)
471
{
472
#if 0 && defined(__GNUC__) && defined(__i386__)
473
    register const unsigned char *p1 __asm__("%edi");
474
    register const unsigned char *p2 __asm__("%esi");
475
    register const unsigned char *px __asm__("%edx");
476
#else
477
    const unsigned char *p1;
478
    const unsigned char *p2;
479
    const unsigned char *px;
480
#endif
481
    ucl_uint m_len = s->m_len;
482
    const unsigned char * b  = s->b;
483
    const unsigned char * bp = s->b + s->bp;
484
    const unsigned char * bx = s->b + s->bp + s->look;
485
    unsigned char scan_end1;
486
 
487
    assert(s->m_len > 0);
488
 
489
    scan_end1 = bp[m_len - 1];
490
    for ( ; cnt-- > 0; node = s->succ3[node])
491
    {
492
        p1 = bp;
493
        p2 = b + node;
494
        px = bx;
495
 
496
        assert(m_len < s->look);
497
 
498
        if (
499
#if 1
500
            p2[m_len - 1] == scan_end1 &&
501
            p2[m_len] == p1[m_len] &&
502
#endif
503
            p2[0] == p1[0] &&
504
            p2[1] == p1[1])
505
        {
506
            ucl_uint i;
507
            assert(ucl_memcmp(bp,&b[node],3) == 0);
508
 
509
#if 0 && defined(UCL_UNALIGNED_OK_4)
510
            p1 += 3; p2 += 3;
511
            while (p1 < px && * (const ucl_uint32p) p1 == * (const ucl_uint32p) p2)
512
                p1 += 4, p2 += 4;
513
            while (p1 < px && *p1 == *p2)
514
                p1 += 1, p2 += 1;
515
#else
516
            p1 += 2; p2 += 2;
517
            do {} while (++p1 < px && *p1 == *++p2);
518
#endif
519
            i = p1 - bp;
520
 
521
#ifdef UCL_DEBUG
522
            if (ucl_memcmp(bp,&b[node],i) != 0)
523
                printf("%5ld %5ld %02x%02x %02x%02x\n",
524
                        (long)s->bp, (long) node,
525
                        bp[0], bp[1], b[node], b[node+1]);
526
#endif
527
            assert(ucl_memcmp(bp,&b[node],i) == 0);
528
 
529
#if defined(SWD_BEST_OFF)
530
            if (i < SWD_BEST_OFF)
531
            {
532
                if (s->best_pos[i] == 0)
533
                    s->best_pos[i] = node + 1;
534
            }
535
#endif
536
            if (i > m_len)
537
            {
538
                s->m_len = m_len = i;
539
                s->m_pos = node;
540
                if (m_len == s->look)
541
                    return;
542
                if (m_len >= s->nice_length)
543
                    return;
544
                if (m_len > (ucl_uint) s->best3[node])
545
                    return;
546
                scan_end1 = bp[m_len - 1];
547
            }
548
        }
549
    }
550
}
551
 
552
 
553
/***********************************************************************
554
//
555
************************************************************************/
556
 
557
#ifdef HEAD2
558
 
559
static
560
ucl_bool swd_search2(ucl_swd_t *s)
561
{
562
    ucl_uint key;
563
 
564
    assert(s->look >= 2);
565
    assert(s->m_len > 0);
566
 
567
    key = s->head2[ HEAD2(s->b,s->bp) ];
568
    if (key == NIL2)
569
        return 0;
570
#ifdef UCL_DEBUG
571
    if (ucl_memcmp(&s->b[s->bp],&s->b[key],2) != 0)
572
        printf("%5ld %5ld %02x%02x %02x%02x\n", (long)s->bp, (long)key,
573
                s->b[s->bp], s->b[s->bp+1], s->b[key], s->b[key+1]);
574
#endif
575
    assert(ucl_memcmp(&s->b[s->bp],&s->b[key],2) == 0);
576
#if defined(SWD_BEST_OFF)
577
    if (s->best_pos[2] == 0)
578
        s->best_pos[2] = key + 1;
579
#endif
580
 
581
    if (s->m_len < 2)
582
    {
583
        s->m_len = 2;
584
        s->m_pos = key;
585
    }
586
    return 1;
587
}
588
 
589
#endif
590
 
591
 
592
/***********************************************************************
593
//
594
************************************************************************/
595
 
596
static
597
void swd_findbest(ucl_swd_t *s)
598
{
599
    ucl_uint key;
600
    ucl_uint cnt, node;
601
    ucl_uint len;
602
 
603
    assert(s->m_len > 0);
604
 
605
    /* get current head, add bp into HEAD3 */
606
    key = HEAD3(s->b,s->bp);
607
    node = s->succ3[s->bp] = s_head3(s,key);
608
    cnt = s->llen3[key]++;
609
    assert(s->llen3[key] <= s->n + s->f);
610
    if (cnt > s->max_chain && s->max_chain > 0)
611
        cnt = s->max_chain;
612
    s->head3[key] = SWD_UINT(s->bp);
613
 
614
    s->b_char = s->b[s->bp];
615
    len = s->m_len;
616
    if (s->m_len >= s->look)
617
    {
618
        if (s->look == 0)
619
            s->b_char = -1;
620
        s->m_off = 0;
621
        s->best3[s->bp] = SWD_UINT(s->f + 1);
622
    }
623
    else
624
    {
625
#ifdef HEAD2
626
        if (swd_search2(s))
627
#endif
628
            if (s->look >= 3)
629
                swd_search(s,node,cnt);
630
        if (s->m_len > len)
631
            s->m_off = swd_pos2off(s,s->m_pos);
632
        s->best3[s->bp] = SWD_UINT(s->m_len);
633
 
634
#if defined(SWD_BEST_OFF)
635
        if (s->use_best_off)
636
        {
637
            int i;
638
            for (i = 2; i < SWD_BEST_OFF; i++)
639
                if (s->best_pos[i] > 0)
640
                    s->best_off[i] = swd_pos2off(s,s->best_pos[i]-1);
641
                else
642
                    s->best_off[i] = 0;
643
        }
644
#endif
645
    }
646
 
647
    swd_remove_node(s,s->rp);
648
 
649
#ifdef HEAD2
650
    /* add bp into HEAD2 */
651
    key = HEAD2(s->b,s->bp);
652
    s->head2[key] = SWD_UINT(s->bp);
653
#endif
654
}
655
 
656
 
657
#undef HEAD3
658
#undef HEAD2
659
#undef s_head3
660
 
661
 
662
/*
663
vi:ts=4:et
664
*/
665