Statistics
| Revision:

gvsig-raster / libjni-potrace / trunk / libjni-potrace / resources / potrace-1.8 / src / lzw.c @ 1780

History | View | Annotate | Download (10.8 KB)

1
/* Copyright (C) 2001-2007 Peter Selinger.
2
   This file is part of Potrace. It is free software and it is covered
3
   by the GNU General Public License. See the file COPYING for details. */
4

    
5
/* $Id: lzw.c 147 2007-04-09 00:44:09Z selinger $ */
6

    
7
/* code for adaptive LZW compression, as used in PostScript. */
8

    
9
#include <stdlib.h>
10
#include <stdio.h>
11
#include <errno.h>
12
#include <string.h>
13

    
14
#include "lists.h"
15
#include "bitops.h"
16
#include "lzw.h"
17

    
18
/* ---------------------------------------------------------------------- */
19
/* compression state specification */
20

    
21
/* The compression algorithm follows the following specification,
22
   expressed as a state machine. A state is a triple {s,d,n}, where s
23
   is a string of input symbols, d is a dictionary, which is a
24
   function from strings to output symbols, and n is the dictionary
25
   size, or equivalently, the next unused output symbol. There are
26
   also special states init and stop. (emit.c[b]ode) is a function
27
   which emits the code code as a b-bit value into the output
28
   bitstream. hibit(n) returns the least number of binary digits
29
   required to represent n.
30

31
   init ---> {[], newdict, 258}
32

33
     where [] is the empty string, and newdict maps each of the 256
34
     singleton strings to itself. (Note that there are two special
35
     output symbols 256 and 257, so that the next available one is
36
     258). Note: hibit(258)=9.
37

38
   {[], d, n} (input c) ---> (emit[hibit(n)] 256) {c, d, n}
39

40
   {s,d,n} (input c) ---> {s*c,d,n}
41

42
     if s!=[], s*c is in the domain of d. Here s*c is the strings s
43
     extended by the character c.
44

45
   {s,d,n} (input c) ---> (emit.c[hibit(n)]ode) {c,d',n+1}
46

47
     if s!=[], s*c is not in the domain of d, and hibit(n+2) <= 12.
48
     Here, d(s)=code, and d'=d+{s*c->n}.
49

50
   {s,d,n} (input c) ---> 
51
           (emit.c[hibit(n)]ode) (emit[hibit(n+1)] 256) {c, newdict, 258}
52

53
     if s!=[], s*c is not in the domain of d, and hibit(n+2) > 12.
54
     Here, d(s)=code, and d'=d+{s*c->n}.
55

56
   {s,d,n} (input EOD) ---> (emit.c[hibit(n)]ode) (emit[hibit(n+1)] 257) stop
57

58
     where s != [], code = d(s). Here, EOD stands for end-of-data.
59

60
   {[],d,n} (input EOD) ---> (emit[hibit(n)] 256) (emit[hibit(258)] 257) stop
61

62
   Notes: 
63

64
   * Each reachable state {s,d,n} satisfies hibit(n+1) <= 12.
65
   * Only codes of 12 or fewer bits are emitted.
66
   * Each reachable state {s,d,n} satisfies s=[] or s is in the domain of d.
67
   * The domain of d is always prefix closed (except for the empty prefix)
68
   * The state machine is deterministic and non-blocking.
69

70
*/
71
   
72

    
73
/* ---------------------------------------------------------------------- */
74
/* private state */
75

    
76
#define BITBUF_TYPE unsigned int
77

    
78
/* the dictionary is implemented as a tree of strings under the prefix
79
   order. The tree is in turns represented as a linked list of
80
   lzw_dict_t structures, with "children" pointing to a node's first
81
   child, and "next" pointing to a node's next sibling. As an
82
   optimization, the top-level nodes (singleton strings) are
83
   implemented lazily, i.e., the corresponding entry is not actually
84
   created until it is accessed. */
85

    
86
struct lzw_dict_s {
87
  char c;      /* last character of string represented by this entry */
88
  int code;    /* code for the string represented by this entry */
89
  int freq;    /* how often searched? For optimization only */
90
  struct lzw_dict_s *children;  /* list of sub-entries */
91
  struct lzw_dict_s *next;      /* for making a linked list */
92
};
93
typedef struct lzw_dict_s lzw_dict_t;
94

    
95
/* A state {s,d,n} is represented by the "dictionary state" part of
96
   the lzw_state_t structure. Here, s is a pointer directly to the node
97
   of the dictionary structure corresponding to the string s, or NULL
98
   if s=[]. Further, the lzw_state_t structure contains a buffer of
99
   pending output bits, and a flag indicating whether the EOD (end of
100
   data) has been reached in the input. */
101

    
102
struct lzw_state_s {
103
  /* dictionary state */
104
  int n;           /* current size of the dictionary */
105
  lzw_dict_t *d;     /* pointer to dictionary */
106
  lzw_dict_t *s;     /* pointer to current string, or NULL at beginning */
107

    
108
  /* buffers for pending output */
109
  BITBUF_TYPE buf; /* bits scheduled for output - left aligned, 0 padded */
110
  int bufsize;     /* number of bits scheduled for output. */
111
  int eod;         /* flush buffer? */
112
};
113
typedef struct lzw_state_s lzw_state_t;
114

    
115
/* ---------------------------------------------------------------------- */
116
/* auxiliary functions which operate on dictionary states */
117

    
118
/* recursively free an lzw_dict_t object */
119
static void lzw_free_dict(lzw_dict_t *s) {
120
  lzw_dict_t *e;
121

    
122
  list_forall_unlink(e, s) {
123
    lzw_free_dict(e->children);
124
    free(e);
125
  }
126
}
127

    
128
/* re-initialize the lzw state's dictionary state to "newdict",
129
   freeing any old dictionary. */
130
static void lzw_clear_table(lzw_state_t *st) {
131

    
132
  lzw_free_dict(st->d);
133
  st->d = NULL;
134
  st->n = 258;
135
  st->s = NULL;
136
}
137

    
138
/* ---------------------------------------------------------------------- */
139
/* auxiliary functions for reading/writing the bit buffer */
140

    
141
/* write the code to the bit buffer. Precondition st->bufsize <= 7.
142
   Note: this does not change the dictionary state; in particular,
143
   n must be updated between consecutive calls. */
144
static inline void lzw_emit(int code, lzw_state_t *st) {
145
  BITBUF_TYPE mask;
146
  int bits = hibit(st->n);
147

    
148
  /* fill bit buffer */
149
  mask = (1 << bits) - 1;
150
  code &= mask;
151

    
152
  st->buf |= code << (8*sizeof(BITBUF_TYPE) - st->bufsize - bits);
153
  st->bufsize += bits;
154
}
155

    
156
/* transfer one byte from bit buffer to output. Precondition:
157
   s->avail_out > 0. */
158
static inline void lzw_read_bitbuf(lzw_stream_t *s) {
159
  int ch;
160
  lzw_state_t *st = (lzw_state_t *)s->internal;
161

    
162
  ch = st->buf >> (8*sizeof(BITBUF_TYPE)-8);
163
  st->buf <<= 8;
164
  st->bufsize -= 8;
165

    
166
  s->next_out[0] = ch;
167
  s->next_out++;
168
  s->avail_out--;
169
}
170

    
171
/* ---------------------------------------------------------------------- */
172
/* The following functions implement the state machine. */
173

    
174
/* perform state transition of the state st on input character
175
   ch. This updates the dictionary state and/or writes to the bit
176
   buffer. Precondition: st->bufsize <= 7. Return 0 on success, or 1
177
   on error with errno set. */
178
static int lzw_encode_char(lzw_state_t *st, char c) {
179
  lzw_dict_t *e;
180

    
181
  /* st = {s,d,n}. hibit(n+1)<=12. */
182

    
183
  /* {[], d, n} (input c) ---> (emit[hibit(n)] 256) {c, d, n} */
184
  if (st->s == NULL) {
185
    lzw_emit(256, st);
186
    goto singleton;  /* enter singleton state c */
187
  } 
188

    
189
  /* {s,d,n} (input c) ---> {s*c,d,n} */
190
  list_find(e, st->s->children, e->c == c);
191
  if (e) {
192
    e->freq++;
193
    st->s = e;
194
    return 0;
195
  }
196

    
197
  /* {s,d,n} (input c) ---> (emit.c[hibit(n)]ode) {c,d',n+1} */
198
  /* {s,d,n} (input c) --->
199
             (emit.c[hibit(n)]ode) (emit[hibit(n+1)] 256) {c, newdict, 258} */
200

    
201
  lzw_emit(st->s->code, st); /* 9-12 bits */
202
  if (st->n >= 4094) {   /* hibit(n+2) > 12 */  /* ### was: 4093 */
203
    st->n++;
204
    lzw_emit(256, st);
205
    goto dictfull;    /* reset dictionary and enter singleton state c */
206
  }
207

    
208
  /* insert new code in dictionary, if possible */
209
  e = (lzw_dict_t *)malloc(sizeof(lzw_dict_t));
210
  if (!e) {
211
    return 1;
212
  }
213
  e->c = c;
214
  e->code = st->n;
215
  e->freq = 1;
216
  e->children = NULL;
217
  list_prepend(st->s->children, e);
218
  st->n++;
219
  goto singleton;  /* enter singleton state c */
220

    
221
 dictfull:    /* reset dictionary and enter singleton state c */
222
  lzw_clear_table(st);
223
  /* fall through */
224
  
225
 singleton:   /* enter singleton state c */
226
  list_find(e, st->d, e->c == c);
227
  if (!e) {  /* not found: lazily add it */
228
    e = (lzw_dict_t *)malloc(sizeof(lzw_dict_t));
229
    if (!e) {
230
      return 1;
231
    }
232
    e->c = c;
233
    e->code = (int)(unsigned char)c;
234
    e->freq = 0;
235
    e->children = NULL;
236
    list_prepend(st->d, e);
237
  }
238
  e->freq++;
239
  st->s = e;
240
  return 0;
241
}
242

    
243
/* perform state transition of the state st on input EOD. The leaves
244
   the dictionary state undefined and writes to the bit buffer.
245
   Precondition: st->bufsize <= 7. This function must be called
246
   exactly once, at the end of the stream. */
247
static void lzw_encode_eod(lzw_state_t *st) {
248

    
249
  /* {[],d,n} (input EOD) ---> 
250
              (emit[hibit(n)] 256) (emit[hibit(n+1)] 257) stop */
251
  if (st->s == NULL) {
252
    lzw_emit(256, st);  /* 9 bits */
253
    st->n=258;
254
    lzw_emit(257, st);  /* 9 bits */
255
    return;
256
  } 
257

    
258
  /* {s,d,n} (input EOD) ---> 
259
             (emit.c[hibit(n)]ode) (emit[hibit(n+1)] 257) stop */
260

    
261
  lzw_emit(st->s->code, st); /* 9-12 bits */
262
  st->n++;
263
  lzw_emit(257, st);  /* 9-12 bits */
264
  return;
265
}
266

    
267
/* ---------------------------------------------------------------------- */
268
/* User visible functions. These implement a buffer interface. See
269
   lzw.h for the API description. */
270

    
271
lzw_stream_t *lzw_init(void) {
272
  lzw_stream_t *s = NULL;
273
  lzw_state_t *st = NULL;
274

    
275
  s = (lzw_stream_t *)malloc(sizeof(lzw_stream_t));
276
  if (s==NULL) {
277
    goto fail;
278
  }
279
  st = (lzw_state_t *)malloc(sizeof(lzw_state_t));
280
  if (st==NULL) {
281
    goto fail;
282
  }
283
  st->buf = 0;
284
  st->bufsize = 0;
285
  st->eod = 0;
286
  st->d = NULL;
287
  lzw_clear_table(st);
288
  s->internal = (void *) st;
289
  return s;
290

    
291
 fail:
292
  free(s);
293
  free(st);
294
  return NULL;
295
}
296

    
297
int lzw_compress(lzw_stream_t *s, int mode) {
298
  int r;
299
  lzw_state_t *st = (lzw_state_t *)s->internal;
300

    
301
  while (st->eod == 0) {
302
    /* empty bit buffer */
303
    while (st->bufsize > 7) {
304
      if (s->avail_out == 0) {
305
        return 0;
306
      } else {
307
        lzw_read_bitbuf(s);
308
      }
309
    }
310
    /* fill bit buffer */
311
    if (s->avail_in == 0) {
312
      break;
313
    } else {
314
      r = lzw_encode_char(st, s->next_in[0]);
315
      if (r) {
316
        if (r==2) {
317
          errno = EINVAL;
318
        }
319
        return 1;
320
      }
321
      s->next_in++;
322
      s->avail_in--;
323
    }
324
  }
325

    
326
  if (mode==LZW_EOD && st->eod == 0) {
327
    st->eod = 1;
328
    lzw_encode_eod(st);
329
  }
330

    
331
  /* flush bit buffer */
332
  if (st->eod) {
333
    while (st->bufsize > 0) {
334
      if (s->avail_out == 0) {
335
        return 0;
336
      } else {
337
        lzw_read_bitbuf(s);
338
      }
339
    }
340
  }
341

    
342
  return 0;
343
}
344

    
345
void lzw_free(lzw_stream_t *s) {
346
  lzw_state_t *st = (lzw_state_t *)s->internal;
347

    
348
  lzw_free_dict(st->d);
349
  free(st);
350
  free(s);
351
}
352

    
353
/* ---------------------------------------------------------------------- */
354
/* main function for testing and illustration purposes */
355

    
356
#ifdef LZW_MAIN
357

    
358
int main() {
359
  lzw_stream_t *s;
360
  int ch;
361
  char inbuf[100];
362
  char outbuf[100];
363
  int i, r;
364
  int mode;
365

    
366
  s = lzw_init();
367
  if (!s) {
368
    goto error;
369
  }
370
  mode = LZW_NORMAL;
371

    
372
  while (1) {
373
    /* fill inbuf */
374
    for (i=0; i<100; i++) {
375
      ch = fgetc(stdin);
376
      if (ch==EOF) {
377
        break;
378
      }
379
      inbuf[i] = ch;
380
    }
381
    if (i<100) {   /* end of input */
382
      mode = LZW_EOD;
383
    }
384

    
385
    /* compress */
386
    s->next_in = inbuf;
387
    s->avail_in = i;
388
    do {
389
      s->next_out = outbuf;
390
      s->avail_out = 100;
391
      r = lzw_compress(s, mode);
392
      if (r) {
393
        goto error;
394
      }
395
      fwrite(outbuf, 1, 100-s->avail_out, stdout);
396
    } while (s->avail_out==0);
397
    if (mode == LZW_EOD) {
398
      break;
399
    }
400
  }
401
  fflush(stdout);
402
  lzw_free(s);
403

    
404
  return 0;
405

    
406
 error:
407
  fprintf(stderr, "lzw: %s\n", strerror(errno));
408
  lzw_free(s);
409
  return 1;
410

    
411
}
412
#endif
413