gvsig-raster / libjni-potrace / trunk / libjni-potrace / resources / potrace-1.8 / src / lzw.c @ 1780
History | View | Annotate | Download (10.8 KB)
1 |
/* Copyright (C) 2001-2007 Peter Selinger.
|
---|---|
2 |
This file is part of Potrace. It is free software and it is covered
|
3 |
by the GNU General Public License. See the file COPYING for details. */
|
4 |
|
5 |
/* $Id: lzw.c 147 2007-04-09 00:44:09Z selinger $ */
|
6 |
|
7 |
/* code for adaptive LZW compression, as used in PostScript. */
|
8 |
|
9 |
#include <stdlib.h> |
10 |
#include <stdio.h> |
11 |
#include <errno.h> |
12 |
#include <string.h> |
13 |
|
14 |
#include "lists.h" |
15 |
#include "bitops.h" |
16 |
#include "lzw.h" |
17 |
|
18 |
/* ---------------------------------------------------------------------- */
|
19 |
/* compression state specification */
|
20 |
|
21 |
/* The compression algorithm follows the following specification,
|
22 |
expressed as a state machine. A state is a triple {s,d,n}, where s
|
23 |
is a string of input symbols, d is a dictionary, which is a
|
24 |
function from strings to output symbols, and n is the dictionary
|
25 |
size, or equivalently, the next unused output symbol. There are
|
26 |
also special states init and stop. (emit.c[b]ode) is a function
|
27 |
which emits the code code as a b-bit value into the output
|
28 |
bitstream. hibit(n) returns the least number of binary digits
|
29 |
required to represent n.
|
30 |
|
31 |
init ---> {[], newdict, 258}
|
32 |
|
33 |
where [] is the empty string, and newdict maps each of the 256
|
34 |
singleton strings to itself. (Note that there are two special
|
35 |
output symbols 256 and 257, so that the next available one is
|
36 |
258). Note: hibit(258)=9.
|
37 |
|
38 |
{[], d, n} (input c) ---> (emit[hibit(n)] 256) {c, d, n}
|
39 |
|
40 |
{s,d,n} (input c) ---> {s*c,d,n}
|
41 |
|
42 |
if s!=[], s*c is in the domain of d. Here s*c is the strings s
|
43 |
extended by the character c.
|
44 |
|
45 |
{s,d,n} (input c) ---> (emit.c[hibit(n)]ode) {c,d',n+1}
|
46 |
|
47 |
if s!=[], s*c is not in the domain of d, and hibit(n+2) <= 12.
|
48 |
Here, d(s)=code, and d'=d+{s*c->n}.
|
49 |
|
50 |
{s,d,n} (input c) --->
|
51 |
(emit.c[hibit(n)]ode) (emit[hibit(n+1)] 256) {c, newdict, 258}
|
52 |
|
53 |
if s!=[], s*c is not in the domain of d, and hibit(n+2) > 12.
|
54 |
Here, d(s)=code, and d'=d+{s*c->n}.
|
55 |
|
56 |
{s,d,n} (input EOD) ---> (emit.c[hibit(n)]ode) (emit[hibit(n+1)] 257) stop
|
57 |
|
58 |
where s != [], code = d(s). Here, EOD stands for end-of-data.
|
59 |
|
60 |
{[],d,n} (input EOD) ---> (emit[hibit(n)] 256) (emit[hibit(258)] 257) stop
|
61 |
|
62 |
Notes:
|
63 |
|
64 |
* Each reachable state {s,d,n} satisfies hibit(n+1) <= 12.
|
65 |
* Only codes of 12 or fewer bits are emitted.
|
66 |
* Each reachable state {s,d,n} satisfies s=[] or s is in the domain of d.
|
67 |
* The domain of d is always prefix closed (except for the empty prefix)
|
68 |
* The state machine is deterministic and non-blocking.
|
69 |
|
70 |
*/
|
71 |
|
72 |
|
73 |
/* ---------------------------------------------------------------------- */
|
74 |
/* private state */
|
75 |
|
76 |
#define BITBUF_TYPE unsigned int |
77 |
|
78 |
/* the dictionary is implemented as a tree of strings under the prefix
|
79 |
order. The tree is in turns represented as a linked list of
|
80 |
lzw_dict_t structures, with "children" pointing to a node's first
|
81 |
child, and "next" pointing to a node's next sibling. As an
|
82 |
optimization, the top-level nodes (singleton strings) are
|
83 |
implemented lazily, i.e., the corresponding entry is not actually
|
84 |
created until it is accessed. */
|
85 |
|
86 |
struct lzw_dict_s {
|
87 |
char c; /* last character of string represented by this entry */ |
88 |
int code; /* code for the string represented by this entry */ |
89 |
int freq; /* how often searched? For optimization only */ |
90 |
struct lzw_dict_s *children; /* list of sub-entries */ |
91 |
struct lzw_dict_s *next; /* for making a linked list */ |
92 |
}; |
93 |
typedef struct lzw_dict_s lzw_dict_t; |
94 |
|
95 |
/* A state {s,d,n} is represented by the "dictionary state" part of
|
96 |
the lzw_state_t structure. Here, s is a pointer directly to the node
|
97 |
of the dictionary structure corresponding to the string s, or NULL
|
98 |
if s=[]. Further, the lzw_state_t structure contains a buffer of
|
99 |
pending output bits, and a flag indicating whether the EOD (end of
|
100 |
data) has been reached in the input. */
|
101 |
|
102 |
struct lzw_state_s {
|
103 |
/* dictionary state */
|
104 |
int n; /* current size of the dictionary */ |
105 |
lzw_dict_t *d; /* pointer to dictionary */
|
106 |
lzw_dict_t *s; /* pointer to current string, or NULL at beginning */
|
107 |
|
108 |
/* buffers for pending output */
|
109 |
BITBUF_TYPE buf; /* bits scheduled for output - left aligned, 0 padded */
|
110 |
int bufsize; /* number of bits scheduled for output. */ |
111 |
int eod; /* flush buffer? */ |
112 |
}; |
113 |
typedef struct lzw_state_s lzw_state_t; |
114 |
|
115 |
/* ---------------------------------------------------------------------- */
|
116 |
/* auxiliary functions which operate on dictionary states */
|
117 |
|
118 |
/* recursively free an lzw_dict_t object */
|
119 |
static void lzw_free_dict(lzw_dict_t *s) { |
120 |
lzw_dict_t *e; |
121 |
|
122 |
list_forall_unlink(e, s) { |
123 |
lzw_free_dict(e->children); |
124 |
free(e); |
125 |
} |
126 |
} |
127 |
|
128 |
/* re-initialize the lzw state's dictionary state to "newdict",
|
129 |
freeing any old dictionary. */
|
130 |
static void lzw_clear_table(lzw_state_t *st) { |
131 |
|
132 |
lzw_free_dict(st->d); |
133 |
st->d = NULL;
|
134 |
st->n = 258;
|
135 |
st->s = NULL;
|
136 |
} |
137 |
|
138 |
/* ---------------------------------------------------------------------- */
|
139 |
/* auxiliary functions for reading/writing the bit buffer */
|
140 |
|
141 |
/* write the code to the bit buffer. Precondition st->bufsize <= 7.
|
142 |
Note: this does not change the dictionary state; in particular,
|
143 |
n must be updated between consecutive calls. */
|
144 |
static inline void lzw_emit(int code, lzw_state_t *st) { |
145 |
BITBUF_TYPE mask; |
146 |
int bits = hibit(st->n);
|
147 |
|
148 |
/* fill bit buffer */
|
149 |
mask = (1 << bits) - 1; |
150 |
code &= mask; |
151 |
|
152 |
st->buf |= code << (8*sizeof(BITBUF_TYPE) - st->bufsize - bits); |
153 |
st->bufsize += bits; |
154 |
} |
155 |
|
156 |
/* transfer one byte from bit buffer to output. Precondition:
|
157 |
s->avail_out > 0. */
|
158 |
static inline void lzw_read_bitbuf(lzw_stream_t *s) { |
159 |
int ch;
|
160 |
lzw_state_t *st = (lzw_state_t *)s->internal; |
161 |
|
162 |
ch = st->buf >> (8*sizeof(BITBUF_TYPE)-8); |
163 |
st->buf <<= 8;
|
164 |
st->bufsize -= 8;
|
165 |
|
166 |
s->next_out[0] = ch;
|
167 |
s->next_out++; |
168 |
s->avail_out--; |
169 |
} |
170 |
|
171 |
/* ---------------------------------------------------------------------- */
|
172 |
/* The following functions implement the state machine. */
|
173 |
|
174 |
/* perform state transition of the state st on input character
|
175 |
ch. This updates the dictionary state and/or writes to the bit
|
176 |
buffer. Precondition: st->bufsize <= 7. Return 0 on success, or 1
|
177 |
on error with errno set. */
|
178 |
static int lzw_encode_char(lzw_state_t *st, char c) { |
179 |
lzw_dict_t *e; |
180 |
|
181 |
/* st = {s,d,n}. hibit(n+1)<=12. */
|
182 |
|
183 |
/* {[], d, n} (input c) ---> (emit[hibit(n)] 256) {c, d, n} */
|
184 |
if (st->s == NULL) { |
185 |
lzw_emit(256, st);
|
186 |
goto singleton; /* enter singleton state c */ |
187 |
} |
188 |
|
189 |
/* {s,d,n} (input c) ---> {s*c,d,n} */
|
190 |
list_find(e, st->s->children, e->c == c); |
191 |
if (e) {
|
192 |
e->freq++; |
193 |
st->s = e; |
194 |
return 0; |
195 |
} |
196 |
|
197 |
/* {s,d,n} (input c) ---> (emit.c[hibit(n)]ode) {c,d',n+1} */
|
198 |
/* {s,d,n} (input c) --->
|
199 |
(emit.c[hibit(n)]ode) (emit[hibit(n+1)] 256) {c, newdict, 258} */
|
200 |
|
201 |
lzw_emit(st->s->code, st); /* 9-12 bits */
|
202 |
if (st->n >= 4094) { /* hibit(n+2) > 12 */ /* ### was: 4093 */ |
203 |
st->n++; |
204 |
lzw_emit(256, st);
|
205 |
goto dictfull; /* reset dictionary and enter singleton state c */ |
206 |
} |
207 |
|
208 |
/* insert new code in dictionary, if possible */
|
209 |
e = (lzw_dict_t *)malloc(sizeof(lzw_dict_t));
|
210 |
if (!e) {
|
211 |
return 1; |
212 |
} |
213 |
e->c = c; |
214 |
e->code = st->n; |
215 |
e->freq = 1;
|
216 |
e->children = NULL;
|
217 |
list_prepend(st->s->children, e); |
218 |
st->n++; |
219 |
goto singleton; /* enter singleton state c */ |
220 |
|
221 |
dictfull: /* reset dictionary and enter singleton state c */ |
222 |
lzw_clear_table(st); |
223 |
/* fall through */
|
224 |
|
225 |
singleton: /* enter singleton state c */ |
226 |
list_find(e, st->d, e->c == c); |
227 |
if (!e) { /* not found: lazily add it */ |
228 |
e = (lzw_dict_t *)malloc(sizeof(lzw_dict_t));
|
229 |
if (!e) {
|
230 |
return 1; |
231 |
} |
232 |
e->c = c; |
233 |
e->code = (int)(unsigned char)c; |
234 |
e->freq = 0;
|
235 |
e->children = NULL;
|
236 |
list_prepend(st->d, e); |
237 |
} |
238 |
e->freq++; |
239 |
st->s = e; |
240 |
return 0; |
241 |
} |
242 |
|
243 |
/* perform state transition of the state st on input EOD. The leaves
|
244 |
the dictionary state undefined and writes to the bit buffer.
|
245 |
Precondition: st->bufsize <= 7. This function must be called
|
246 |
exactly once, at the end of the stream. */
|
247 |
static void lzw_encode_eod(lzw_state_t *st) { |
248 |
|
249 |
/* {[],d,n} (input EOD) --->
|
250 |
(emit[hibit(n)] 256) (emit[hibit(n+1)] 257) stop */
|
251 |
if (st->s == NULL) { |
252 |
lzw_emit(256, st); /* 9 bits */ |
253 |
st->n=258;
|
254 |
lzw_emit(257, st); /* 9 bits */ |
255 |
return;
|
256 |
} |
257 |
|
258 |
/* {s,d,n} (input EOD) --->
|
259 |
(emit.c[hibit(n)]ode) (emit[hibit(n+1)] 257) stop */
|
260 |
|
261 |
lzw_emit(st->s->code, st); /* 9-12 bits */
|
262 |
st->n++; |
263 |
lzw_emit(257, st); /* 9-12 bits */ |
264 |
return;
|
265 |
} |
266 |
|
267 |
/* ---------------------------------------------------------------------- */
|
268 |
/* User visible functions. These implement a buffer interface. See
|
269 |
lzw.h for the API description. */
|
270 |
|
271 |
lzw_stream_t *lzw_init(void) {
|
272 |
lzw_stream_t *s = NULL;
|
273 |
lzw_state_t *st = NULL;
|
274 |
|
275 |
s = (lzw_stream_t *)malloc(sizeof(lzw_stream_t));
|
276 |
if (s==NULL) { |
277 |
goto fail;
|
278 |
} |
279 |
st = (lzw_state_t *)malloc(sizeof(lzw_state_t));
|
280 |
if (st==NULL) { |
281 |
goto fail;
|
282 |
} |
283 |
st->buf = 0;
|
284 |
st->bufsize = 0;
|
285 |
st->eod = 0;
|
286 |
st->d = NULL;
|
287 |
lzw_clear_table(st); |
288 |
s->internal = (void *) st;
|
289 |
return s;
|
290 |
|
291 |
fail:
|
292 |
free(s); |
293 |
free(st); |
294 |
return NULL; |
295 |
} |
296 |
|
297 |
int lzw_compress(lzw_stream_t *s, int mode) { |
298 |
int r;
|
299 |
lzw_state_t *st = (lzw_state_t *)s->internal; |
300 |
|
301 |
while (st->eod == 0) { |
302 |
/* empty bit buffer */
|
303 |
while (st->bufsize > 7) { |
304 |
if (s->avail_out == 0) { |
305 |
return 0; |
306 |
} else {
|
307 |
lzw_read_bitbuf(s); |
308 |
} |
309 |
} |
310 |
/* fill bit buffer */
|
311 |
if (s->avail_in == 0) { |
312 |
break;
|
313 |
} else {
|
314 |
r = lzw_encode_char(st, s->next_in[0]);
|
315 |
if (r) {
|
316 |
if (r==2) { |
317 |
errno = EINVAL; |
318 |
} |
319 |
return 1; |
320 |
} |
321 |
s->next_in++; |
322 |
s->avail_in--; |
323 |
} |
324 |
} |
325 |
|
326 |
if (mode==LZW_EOD && st->eod == 0) { |
327 |
st->eod = 1;
|
328 |
lzw_encode_eod(st); |
329 |
} |
330 |
|
331 |
/* flush bit buffer */
|
332 |
if (st->eod) {
|
333 |
while (st->bufsize > 0) { |
334 |
if (s->avail_out == 0) { |
335 |
return 0; |
336 |
} else {
|
337 |
lzw_read_bitbuf(s); |
338 |
} |
339 |
} |
340 |
} |
341 |
|
342 |
return 0; |
343 |
} |
344 |
|
345 |
void lzw_free(lzw_stream_t *s) {
|
346 |
lzw_state_t *st = (lzw_state_t *)s->internal; |
347 |
|
348 |
lzw_free_dict(st->d); |
349 |
free(st); |
350 |
free(s); |
351 |
} |
352 |
|
353 |
/* ---------------------------------------------------------------------- */
|
354 |
/* main function for testing and illustration purposes */
|
355 |
|
356 |
#ifdef LZW_MAIN
|
357 |
|
358 |
int main() {
|
359 |
lzw_stream_t *s; |
360 |
int ch;
|
361 |
char inbuf[100]; |
362 |
char outbuf[100]; |
363 |
int i, r;
|
364 |
int mode;
|
365 |
|
366 |
s = lzw_init(); |
367 |
if (!s) {
|
368 |
goto error;
|
369 |
} |
370 |
mode = LZW_NORMAL; |
371 |
|
372 |
while (1) { |
373 |
/* fill inbuf */
|
374 |
for (i=0; i<100; i++) { |
375 |
ch = fgetc(stdin); |
376 |
if (ch==EOF) { |
377 |
break;
|
378 |
} |
379 |
inbuf[i] = ch; |
380 |
} |
381 |
if (i<100) { /* end of input */ |
382 |
mode = LZW_EOD; |
383 |
} |
384 |
|
385 |
/* compress */
|
386 |
s->next_in = inbuf; |
387 |
s->avail_in = i; |
388 |
do {
|
389 |
s->next_out = outbuf; |
390 |
s->avail_out = 100;
|
391 |
r = lzw_compress(s, mode); |
392 |
if (r) {
|
393 |
goto error;
|
394 |
} |
395 |
fwrite(outbuf, 1, 100-s->avail_out, stdout); |
396 |
} while (s->avail_out==0); |
397 |
if (mode == LZW_EOD) {
|
398 |
break;
|
399 |
} |
400 |
} |
401 |
fflush(stdout); |
402 |
lzw_free(s); |
403 |
|
404 |
return 0; |
405 |
|
406 |
error:
|
407 |
fprintf(stderr, "lzw: %s\n", strerror(errno));
|
408 |
lzw_free(s); |
409 |
return 1; |
410 |
|
411 |
} |
412 |
#endif
|
413 |
|