Statistics
| Revision:

root / tags / v1_0_2_Build_915 / extensions / extScripting / scripts / jython / Lib / gzip.py @ 12217

History | View | Annotate | Download (11.7 KB)

1
"""Functions that read and write gzipped files.
2

3
The user of the file doesn't have to worry about the compression,
4
but random access is not allowed."""
5

    
6
# based on Andrew Kuchling's minigzip.py distributed with the zlib module
7

    
8
import struct, sys, time
9
import zlib
10
import __builtin__
11

    
12
__all__ = ["GzipFile","open"]
13

    
14
FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
15

    
16
READ, WRITE = 1, 2
17

    
18
def write32(output, value):
19
    output.write(struct.pack("<l", value))
20

    
21
def write32u(output, value):
22
    if value < 0:
23
        value = value + 0x100000000L
24
    output.write(struct.pack("<L", value))
25

    
26
def read32(input):
27
    return struct.unpack("<l", input.read(4))[0]
28

    
29
def open(filename, mode="rb", compresslevel=9):
30
    return GzipFile(filename, mode, compresslevel)
31

    
32
class GzipFile:
33

    
34
    myfileobj = None
35

    
36
    def __init__(self, filename=None, mode=None,
37
                 compresslevel=9, fileobj=None):
38
        if fileobj is None:
39
            fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
40
        if filename is None:
41
            if hasattr(fileobj, 'name'): filename = fileobj.name
42
            else: filename = ''
43
        if mode is None:
44
            if hasattr(fileobj, 'mode'): mode = fileobj.mode
45
            else: mode = 'rb'
46

    
47
        if mode[0:1] == 'r':
48
            self.mode = READ
49
            # Set flag indicating start of a new member
50
            self._new_member = 1
51
            self.extrabuf = ""
52
            self.extrasize = 0
53
            self.filename = filename
54

    
55
        elif mode[0:1] == 'w' or mode[0:1] == 'a':
56
            self.mode = WRITE
57
            self._init_write(filename)
58
            self.compress = zlib.compressobj(compresslevel,
59
                                             zlib.DEFLATED,
60
                                             -zlib.MAX_WBITS,
61
                                             zlib.DEF_MEM_LEVEL,
62
                                             0)
63
        else:
64
            raise ValueError, "Mode " + mode + " not supported"
65

    
66
        self.fileobj = fileobj
67

    
68
        if self.mode == WRITE:
69
            self._write_gzip_header()
70

    
71
    def __repr__(self):
72
        s = repr(self.fileobj)
73
        return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
74

    
75
    def _init_write(self, filename):
76
        if filename[-3:] != '.gz':
77
            filename = filename + '.gz'
78
        self.filename = filename
79
        self.crc = zlib.crc32("")
80
        self.size = 0
81
        self.writebuf = []
82
        self.bufsize = 0
83

    
84
    def _write_gzip_header(self):
85
        self.fileobj.write('\037\213')             # magic header
86
        self.fileobj.write('\010')                 # compression method
87
        fname = self.filename[:-3]
88
        flags = 0
89
        if fname:
90
            flags = FNAME
91
        self.fileobj.write(chr(flags))
92
        write32u(self.fileobj, long(time.time()))
93
        self.fileobj.write('\002')
94
        self.fileobj.write('\377')
95
        if fname:
96
            self.fileobj.write(fname + '\000')
97

    
98
    def _init_read(self):
99
        self.crc = zlib.crc32("")
100
        self.size = 0
101

    
102
    def _read_gzip_header(self):
103
        magic = self.fileobj.read(2)
104
        if magic != '\037\213':
105
            raise IOError, 'Not a gzipped file'
106
        method = ord( self.fileobj.read(1) )
107
        if method != 8:
108
            raise IOError, 'Unknown compression method'
109
        flag = ord( self.fileobj.read(1) )
110
        # modtime = self.fileobj.read(4)
111
        # extraflag = self.fileobj.read(1)
112
        # os = self.fileobj.read(1)
113
        self.fileobj.read(6)
114

    
115
        if flag & FEXTRA:
116
            # Read & discard the extra field, if present
117
            xlen=ord(self.fileobj.read(1))
118
            xlen=xlen+256*ord(self.fileobj.read(1))
119
            self.fileobj.read(xlen)
120
        if flag & FNAME:
121
            # Read and discard a null-terminated string containing the filename
122
            while (1):
123
                s=self.fileobj.read(1)
124
                if not s or s=='\000': break
125
        if flag & FCOMMENT:
126
            # Read and discard a null-terminated string containing a comment
127
            while (1):
128
                s=self.fileobj.read(1)
129
                if not s or s=='\000': break
130
        if flag & FHCRC:
131
            self.fileobj.read(2)     # Read & discard the 16-bit header CRC
132

    
133

    
134
    def write(self,data):
135
        if self.fileobj is None:
136
            raise ValueError, "write() on closed GzipFile object"
137
        if len(data) > 0:
138
            self.size = self.size + len(data)
139
            self.crc = zlib.crc32(data, self.crc)
140
            self.fileobj.write( self.compress.compress(data) )
141

    
142
    def writelines(self,lines):
143
        self.write(" ".join(lines))
144

    
145
    def read(self, size=-1):
146
        if self.extrasize <= 0 and self.fileobj is None:
147
            return ''
148

    
149
        readsize = 1024
150
        if size < 0:        # get the whole thing
151
            try:
152
                while 1:
153
                    self._read(readsize)
154
                    readsize = readsize * 2
155
            except EOFError:
156
                size = self.extrasize
157
        else:               # just get some more of it
158
            try:
159
                while size > self.extrasize:
160
                    self._read(readsize)
161
                    readsize = readsize * 2
162
            except EOFError:
163
                if size > self.extrasize:
164
                    size = self.extrasize
165

    
166
        chunk = self.extrabuf[:size]
167
        self.extrabuf = self.extrabuf[size:]
168
        self.extrasize = self.extrasize - size
169

    
170
        return chunk
171

    
172
    def _unread(self, buf):
173
        self.extrabuf = buf + self.extrabuf
174
        self.extrasize = len(buf) + self.extrasize
175

    
176
    def _read(self, size=1024):
177
        if self.fileobj is None: raise EOFError, "Reached EOF"
178

    
179
        if self._new_member:
180
            # If the _new_member flag is set, we have to
181
            # jump to the next member, if there is one.
182
            #
183
            # First, check if we're at the end of the file;
184
            # if so, it's time to stop; no more members to read.
185
            pos = self.fileobj.tell()   # Save current position
186
            self.fileobj.seek(0, 2)     # Seek to end of file
187
            if pos == self.fileobj.tell():
188
                self.fileobj = None
189
                raise EOFError, "Reached EOF"
190
            else:
191
                self.fileobj.seek( pos ) # Return to original position
192

    
193
            self._init_read()
194
            self._read_gzip_header()
195
            self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
196
            self._new_member = 0
197

    
198
        # Read a chunk of data from the file
199
        buf = self.fileobj.read(size)
200

    
201
        # If the EOF has been reached, flush the decompression object
202
        # and mark this object as finished.
203

    
204
        if buf == "":
205
            uncompress = self.decompress.flush()
206
            self._read_eof()
207
            self.fileobj = None
208
            self._add_read_data( uncompress )
209
            raise EOFError, 'Reached EOF'
210

    
211
        uncompress = self.decompress.decompress(buf)
212
        self._add_read_data( uncompress )
213

    
214
        if self.decompress.unused_data != "":
215
            # Ending case: we've come to the end of a member in the file,
216
            # so seek back to the start of the unused data, finish up
217
            # this member, and read a new gzip header.
218
            # (The number of bytes to seek back is the length of the unused
219
            # data, minus 8 because _read_eof() will rewind a further 8 bytes)
220
            self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
221

    
222
            # Check the CRC and file size, and set the flag so we read
223
            # a new member on the next call
224
            self._read_eof()
225
            self._new_member = 1
226

    
227
    def _add_read_data(self, data):
228
        self.crc = zlib.crc32(data, self.crc)
229
        self.extrabuf = self.extrabuf + data
230
        self.extrasize = self.extrasize + len(data)
231
        self.size = self.size + len(data)
232

    
233
    def _read_eof(self):
234
        # We've read to the end of the file, so we have to rewind in order
235
        # to reread the 8 bytes containing the CRC and the file size.
236
        # We check the that the computed CRC and size of the
237
        # uncompressed data matches the stored values.
238
        self.fileobj.seek(-8, 1)
239
        crc32 = read32(self.fileobj)
240
        isize = read32(self.fileobj)
241
        if crc32%0x100000000L != self.crc%0x100000000L:
242
            raise ValueError, "CRC check failed"
243
        elif isize != self.size:
244
            raise ValueError, "Incorrect length of data produced"
245

    
246
    def close(self):
247
        if self.mode == WRITE:
248
            self.fileobj.write(self.compress.flush())
249
            write32(self.fileobj, self.crc)
250
            write32(self.fileobj, self.size)
251
            self.fileobj = None
252
        elif self.mode == READ:
253
            self.fileobj = None
254
        if self.myfileobj:
255
            self.myfileobj.close()
256
            self.myfileobj = None
257

    
258
    def __del__(self):
259
        try:
260
            if (self.myfileobj is None and
261
                self.fileobj is None):
262
                return
263
        except AttributeError:
264
            return
265
        self.close()
266

    
267
    def flush(self):
268
        self.fileobj.flush()
269

    
270
    def isatty(self):
271
        return 0
272

    
273
    def readline(self, size=-1):
274
        if size < 0: size = sys.maxint
275
        bufs = []
276
        orig_size = size
277
        readsize = min(100, size)    # Read from the file in small chunks
278
        while 1:
279
            if size == 0:
280
                return "".join(bufs) # Return resulting line
281

    
282
            c = self.read(readsize)
283
            i = c.find('\n')
284
            if size is not None:
285
                # We set i=size to break out of the loop under two
286
                # conditions: 1) there's no newline, and the chunk is
287
                # larger than size, or 2) there is a newline, but the
288
                # resulting line would be longer than 'size'.
289
                if i==-1 and len(c) > size: i=size-1
290
                elif size <= i: i = size -1
291

    
292
            if i >= 0 or c == '':
293
                bufs.append(c[:i+1])    # Add portion of last chunk
294
                self._unread(c[i+1:])   # Push back rest of chunk
295
                return ''.join(bufs)    # Return resulting line
296

    
297
            # Append chunk to list, decrease 'size',
298
            bufs.append(c)
299
            size = size - len(c)
300
            readsize = min(size, readsize * 2)
301

    
302
    def readlines(self, sizehint=0):
303
        # Negative numbers result in reading all the lines
304
        if sizehint <= 0: sizehint = sys.maxint
305
        L = []
306
        while sizehint > 0:
307
            line = self.readline()
308
            if line == "": break
309
            L.append( line )
310
            sizehint = sizehint - len(line)
311

    
312
        return L
313

    
314
    def writelines(self, L):
315
        for line in L:
316
            self.write(line)
317

    
318

    
319
def _test():
320
    # Act like gzip; with -d, act like gunzip.
321
    # The input file is not deleted, however, nor are any other gzip
322
    # options or features supported.
323
    import sys
324
    args = sys.argv[1:]
325
    decompress = args and args[0] == "-d"
326
    if decompress:
327
        args = args[1:]
328
    if not args:
329
        args = ["-"]
330
    for arg in args:
331
        if decompress:
332
            if arg == "-":
333
                f = GzipFile(filename="", mode="rb", fileobj=sys.stdin)
334
                g = sys.stdout
335
            else:
336
                if arg[-3:] != ".gz":
337
                    print "filename doesn't end in .gz:", `arg`
338
                    continue
339
                f = open(arg, "rb")
340
                g = __builtin__.open(arg[:-3], "wb")
341
        else:
342
            if arg == "-":
343
                f = sys.stdin
344
                g = GzipFile(filename="", mode="wb", fileobj=sys.stdout)
345
            else:
346
                f = __builtin__.open(arg, "rb")
347
                g = open(arg + ".gz", "wb")
348
        while 1:
349
            chunk = f.read(1024)
350
            if not chunk:
351
                break
352
            g.write(chunk)
353
        if g is not sys.stdout:
354
            g.close()
355
        if f is not sys.stdin:
356
            f.close()
357

    
358
if __name__ == '__main__':
359
    _test()