Statistics
| Revision:

gvsig-scripting / org.gvsig.scripting / trunk / org.gvsig.scripting / org.gvsig.scripting.app / org.gvsig.scripting.app.mainplugin / src / main / resources-plugin / scripting / lib / dulwich / protocol.py @ 959

History | View | Annotate | Download (16 KB)

1
# protocol.py -- Shared parts of the git protocols
2
# Copyright (C) 2008 John Carr <john.carr@unrouted.co.uk>
3
# Copyright (C) 2008-2012 Jelmer Vernooij <jelmer@samba.org>
4
#
5
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
6
# General Public License as public by the Free Software Foundation; version 2.0
7
# or (at your option) any later version. You can redistribute it and/or
8
# modify it under the terms of either of these two licenses.
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
#
16
# You should have received a copy of the licenses; if not, see
17
# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
18
# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
19
# License, Version 2.0.
20
#
21

    
22
"""Generic functions for talking the git smart server protocol."""
23

    
24
from io import BytesIO
25
from os import (
26
    SEEK_END,
27
    )
28
import socket
29

    
30
import dulwich
31
from dulwich.errors import (
32
    HangupException,
33
    GitProtocolError,
34
    )
35

    
36
TCP_GIT_PORT = 9418
37

    
38
ZERO_SHA = b"0" * 40
39

    
40
SINGLE_ACK = 0
41
MULTI_ACK = 1
42
MULTI_ACK_DETAILED = 2
43

    
44
# pack data
45
SIDE_BAND_CHANNEL_DATA = 1
46
# progress messages
47
SIDE_BAND_CHANNEL_PROGRESS = 2
48
# fatal error message just before stream aborts
49
SIDE_BAND_CHANNEL_FATAL = 3
50

    
51
CAPABILITY_DELETE_REFS = b'delete-refs'
52
CAPABILITY_INCLUDE_TAG = b'include-tag'
53
CAPABILITY_MULTI_ACK = b'multi_ack'
54
CAPABILITY_MULTI_ACK_DETAILED = b'multi_ack_detailed'
55
CAPABILITY_NO_DONE = b'no-done'
56
CAPABILITY_NO_PROGRESS = b'no-progress'
57
CAPABILITY_OFS_DELTA = b'ofs-delta'
58
CAPABILITY_QUIET = b'quiet'
59
CAPABILITY_REPORT_STATUS = b'report-status'
60
CAPABILITY_SHALLOW = b'shallow'
61
CAPABILITY_SIDE_BAND_64K = b'side-band-64k'
62
CAPABILITY_THIN_PACK = b'thin-pack'
63
CAPABILITY_AGENT = b'agent'
64

    
65
# Magic ref that is used to attach capabilities to when
66
# there are no refs. Should always be ste to ZERO_SHA.
67
CAPABILITIES_REF = b'capabilities^{}'
68

    
69

    
70
def agent_string():
71
    return ('dulwich/%d.%d.%d' % dulwich.__version__).encode('ascii')
72

    
73

    
74
def capability_agent():
75
    return CAPABILITY_AGENT + b'=' + agent_string()
76

    
77

    
78
COMMAND_DEEPEN = b'deepen'
79
COMMAND_SHALLOW = b'shallow'
80
COMMAND_UNSHALLOW = b'unshallow'
81
COMMAND_DONE = b'done'
82
COMMAND_WANT = b'want'
83
COMMAND_HAVE = b'have'
84

    
85

    
86
class ProtocolFile(object):
87
    """A dummy file for network ops that expect file-like objects."""
88

    
89
    def __init__(self, read, write):
90
        self.read = read
91
        self.write = write
92

    
93
    def tell(self):
94
        pass
95

    
96
    def close(self):
97
        pass
98

    
99

    
100
def pkt_line(data):
101
    """Wrap data in a pkt-line.
102

103
    :param data: The data to wrap, as a str or None.
104
    :return: The data prefixed with its length in pkt-line format; if data was
105
        None, returns the flush-pkt ('0000').
106
    """
107
    if data is None:
108
        return b'0000'
109
    return ('%04x' % (len(data) + 4)).encode('ascii') + data
110

    
111

    
112
class Protocol(object):
113
    """Class for interacting with a remote git process over the wire.
114

115
    Parts of the git wire protocol use 'pkt-lines' to communicate. A pkt-line
116
    consists of the length of the line as a 4-byte hex string, followed by the
117
    payload data. The length includes the 4-byte header. The special line '0000'
118
    indicates the end of a section of input and is called a 'flush-pkt'.
119

120
    For details on the pkt-line format, see the cgit distribution:
121
        Documentation/technical/protocol-common.txt
122
    """
123

    
124
    def __init__(self, read, write, close=None, report_activity=None):
125
        self.read = read
126
        self.write = write
127
        self._close = close
128
        self.report_activity = report_activity
129
        self._readahead = None
130

    
131
    def close(self):
132
        if self._close:
133
            self._close()
134

    
135
    def __enter__(self):
136
        return self
137

    
138
    def __exit__(self, exc_type, exc_val, exc_tb):
139
        self.close()
140

    
141
    def read_pkt_line(self):
142
        """Reads a pkt-line from the remote git process.
143

144
        This method may read from the readahead buffer; see unread_pkt_line.
145

146
        :return: The next string from the stream, without the length prefix, or
147
            None for a flush-pkt ('0000').
148
        """
149
        if self._readahead is None:
150
            read = self.read
151
        else:
152
            read = self._readahead.read
153
            self._readahead = None
154

    
155
        try:
156
            sizestr = read(4)
157
            if not sizestr:
158
                raise HangupException()
159
            size = int(sizestr, 16)
160
            if size == 0:
161
                if self.report_activity:
162
                    self.report_activity(4, 'read')
163
                return None
164
            if self.report_activity:
165
                self.report_activity(size, 'read')
166
            pkt_contents = read(size-4)
167
        except socket.error as e:
168
            raise GitProtocolError(e)
169
        else:
170
            if len(pkt_contents) + 4 != size:
171
                raise GitProtocolError(
172
                    'Length of pkt read %04x does not match length prefix %04x' % (len(pkt_contents) + 4, size))
173
            return pkt_contents
174

    
175
    def eof(self):
176
        """Test whether the protocol stream has reached EOF.
177

178
        Note that this refers to the actual stream EOF and not just a flush-pkt.
179

180
        :return: True if the stream is at EOF, False otherwise.
181
        """
182
        try:
183
            next_line = self.read_pkt_line()
184
        except HangupException:
185
            return True
186
        self.unread_pkt_line(next_line)
187
        return False
188

    
189
    def unread_pkt_line(self, data):
190
        """Unread a single line of data into the readahead buffer.
191

192
        This method can be used to unread a single pkt-line into a fixed
193
        readahead buffer.
194

195
        :param data: The data to unread, without the length prefix.
196
        :raise ValueError: If more than one pkt-line is unread.
197
        """
198
        if self._readahead is not None:
199
            raise ValueError('Attempted to unread multiple pkt-lines.')
200
        self._readahead = BytesIO(pkt_line(data))
201

    
202
    def read_pkt_seq(self):
203
        """Read a sequence of pkt-lines from the remote git process.
204

205
        :return: Yields each line of data up to but not including the next flush-pkt.
206
        """
207
        pkt = self.read_pkt_line()
208
        while pkt:
209
            yield pkt
210
            pkt = self.read_pkt_line()
211

    
212
    def write_pkt_line(self, line):
213
        """Sends a pkt-line to the remote git process.
214

215
        :param line: A string containing the data to send, without the length
216
            prefix.
217
        """
218
        try:
219
            line = pkt_line(line)
220
            self.write(line)
221
            if self.report_activity:
222
                self.report_activity(len(line), 'write')
223
        except socket.error as e:
224
            raise GitProtocolError(e)
225

    
226
    def write_file(self):
227
        """Return a writable file-like object for this protocol."""
228

    
229
        class ProtocolFile(object):
230

    
231
            def __init__(self, proto):
232
                self._proto = proto
233
                self._offset = 0
234

    
235
            def write(self, data):
236
                self._proto.write(data)
237
                self._offset += len(data)
238

    
239
            def tell(self):
240
                return self._offset
241

    
242
            def close(self):
243
                pass
244

    
245
        return ProtocolFile(self)
246

    
247
    def write_sideband(self, channel, blob):
248
        """Write multiplexed data to the sideband.
249

250
        :param channel: An int specifying the channel to write to.
251
        :param blob: A blob of data (as a string) to send on this channel.
252
        """
253
        # a pktline can be a max of 65520. a sideband line can therefore be
254
        # 65520-5 = 65515
255
        # WTF: Why have the len in ASCII, but the channel in binary.
256
        while blob:
257
            self.write_pkt_line(bytes(bytearray([channel])) + blob[:65515])
258
            blob = blob[65515:]
259

    
260
    def send_cmd(self, cmd, *args):
261
        """Send a command and some arguments to a git server.
262

263
        Only used for the TCP git protocol (git://).
264

265
        :param cmd: The remote service to access.
266
        :param args: List of arguments to send to remove service.
267
        """
268
        self.write_pkt_line(cmd + b" " + b"".join([(a + b"\0") for a in args]))
269

    
270
    def read_cmd(self):
271
        """Read a command and some arguments from the git client
272

273
        Only used for the TCP git protocol (git://).
274

275
        :return: A tuple of (command, [list of arguments]).
276
        """
277
        line = self.read_pkt_line()
278
        splice_at = line.find(b" ")
279
        cmd, args = line[:splice_at], line[splice_at+1:]
280
        assert args[-1:] == b"\x00"
281
        return cmd, args[:-1].split(b"\0")
282

    
283

    
284
_RBUFSIZE = 8192  # Default read buffer size.
285

    
286

    
287
class ReceivableProtocol(Protocol):
288
    """Variant of Protocol that allows reading up to a size without blocking.
289

290
    This class has a recv() method that behaves like socket.recv() in addition
291
    to a read() method.
292

293
    If you want to read n bytes from the wire and block until exactly n bytes
294
    (or EOF) are read, use read(n). If you want to read at most n bytes from the
295
    wire but don't care if you get less, use recv(n). Note that recv(n) will
296
    still block until at least one byte is read.
297
    """
298

    
299
    def __init__(self, recv, write, report_activity=None, rbufsize=_RBUFSIZE):
300
        super(ReceivableProtocol, self).__init__(self.read, write,
301
                                                 report_activity)
302
        self._recv = recv
303
        self._rbuf = BytesIO()
304
        self._rbufsize = rbufsize
305

    
306
    def read(self, size):
307
        # From _fileobj.read in socket.py in the Python 2.6.5 standard library,
308
        # with the following modifications:
309
        #  - omit the size <= 0 branch
310
        #  - seek back to start rather than 0 in case some buffer has been
311
        #    consumed.
312
        #  - use SEEK_END instead of the magic number.
313
        # Copyright (c) 2001-2010 Python Software Foundation; All Rights Reserved
314
        # Licensed under the Python Software Foundation License.
315
        # TODO: see if buffer is more efficient than cBytesIO.
316
        assert size > 0
317

    
318
        # Our use of BytesIO rather than lists of string objects returned by
319
        # recv() minimizes memory usage and fragmentation that occurs when
320
        # rbufsize is large compared to the typical return value of recv().
321
        buf = self._rbuf
322
        start = buf.tell()
323
        buf.seek(0, SEEK_END)
324
        # buffer may have been partially consumed by recv()
325
        buf_len = buf.tell() - start
326
        if buf_len >= size:
327
            # Already have size bytes in our buffer?  Extract and return.
328
            buf.seek(start)
329
            rv = buf.read(size)
330
            self._rbuf = BytesIO()
331
            self._rbuf.write(buf.read())
332
            self._rbuf.seek(0)
333
            return rv
334

    
335
        self._rbuf = BytesIO()  # reset _rbuf.  we consume it via buf.
336
        while True:
337
            left = size - buf_len
338
            # recv() will malloc the amount of memory given as its
339
            # parameter even though it often returns much less data
340
            # than that.  The returned data string is short lived
341
            # as we copy it into a BytesIO and free it.  This avoids
342
            # fragmentation issues on many platforms.
343
            data = self._recv(left)
344
            if not data:
345
                break
346
            n = len(data)
347
            if n == size and not buf_len:
348
                # Shortcut.  Avoid buffer data copies when:
349
                # - We have no data in our buffer.
350
                # AND
351
                # - Our call to recv returned exactly the
352
                #   number of bytes we were asked to read.
353
                return data
354
            if n == left:
355
                buf.write(data)
356
                del data  # explicit free
357
                break
358
            assert n <= left, "_recv(%d) returned %d bytes" % (left, n)
359
            buf.write(data)
360
            buf_len += n
361
            del data  # explicit free
362
            #assert buf_len == buf.tell()
363
        buf.seek(start)
364
        return buf.read()
365

    
366
    def recv(self, size):
367
        assert size > 0
368

    
369
        buf = self._rbuf
370
        start = buf.tell()
371
        buf.seek(0, SEEK_END)
372
        buf_len = buf.tell()
373
        buf.seek(start)
374

    
375
        left = buf_len - start
376
        if not left:
377
            # only read from the wire if our read buffer is exhausted
378
            data = self._recv(self._rbufsize)
379
            if len(data) == size:
380
                # shortcut: skip the buffer if we read exactly size bytes
381
                return data
382
            buf = BytesIO()
383
            buf.write(data)
384
            buf.seek(0)
385
            del data  # explicit free
386
            self._rbuf = buf
387
        return buf.read(size)
388

    
389

    
390
def extract_capabilities(text):
391
    """Extract a capabilities list from a string, if present.
392

393
    :param text: String to extract from
394
    :return: Tuple with text with capabilities removed and list of capabilities
395
    """
396
    if not b"\0" in text:
397
        return text, []
398
    text, capabilities = text.rstrip().split(b"\0")
399
    return (text, capabilities.strip().split(b" "))
400

    
401

    
402
def extract_want_line_capabilities(text):
403
    """Extract a capabilities list from a want line, if present.
404

405
    Note that want lines have capabilities separated from the rest of the line
406
    by a space instead of a null byte. Thus want lines have the form:
407

408
        want obj-id cap1 cap2 ...
409

410
    :param text: Want line to extract from
411
    :return: Tuple with text with capabilities removed and list of capabilities
412
    """
413
    split_text = text.rstrip().split(b" ")
414
    if len(split_text) < 3:
415
        return text, []
416
    return (b" ".join(split_text[:2]), split_text[2:])
417

    
418

    
419
def ack_type(capabilities):
420
    """Extract the ack type from a capabilities list."""
421
    if b'multi_ack_detailed' in capabilities:
422
        return MULTI_ACK_DETAILED
423
    elif b'multi_ack' in capabilities:
424
        return MULTI_ACK
425
    return SINGLE_ACK
426

    
427

    
428
class BufferedPktLineWriter(object):
429
    """Writer that wraps its data in pkt-lines and has an independent buffer.
430

431
    Consecutive calls to write() wrap the data in a pkt-line and then buffers it
432
    until enough lines have been written such that their total length (including
433
    length prefix) reach the buffer size.
434
    """
435

    
436
    def __init__(self, write, bufsize=65515):
437
        """Initialize the BufferedPktLineWriter.
438

439
        :param write: A write callback for the underlying writer.
440
        :param bufsize: The internal buffer size, including length prefixes.
441
        """
442
        self._write = write
443
        self._bufsize = bufsize
444
        self._wbuf = BytesIO()
445
        self._buflen = 0
446

    
447
    def write(self, data):
448
        """Write data, wrapping it in a pkt-line."""
449
        line = pkt_line(data)
450
        line_len = len(line)
451
        over = self._buflen + line_len - self._bufsize
452
        if over >= 0:
453
            start = line_len - over
454
            self._wbuf.write(line[:start])
455
            self.flush()
456
        else:
457
            start = 0
458
        saved = line[start:]
459
        self._wbuf.write(saved)
460
        self._buflen += len(saved)
461

    
462
    def flush(self):
463
        """Flush all data from the buffer."""
464
        data = self._wbuf.getvalue()
465
        if data:
466
            self._write(data)
467
        self._len = 0
468
        self._wbuf = BytesIO()
469

    
470

    
471
class PktLineParser(object):
472
    """Packet line parser that hands completed packets off to a callback.
473
    """
474

    
475
    def __init__(self, handle_pkt):
476
        self.handle_pkt = handle_pkt
477
        self._readahead = BytesIO()
478

    
479
    def parse(self, data):
480
        """Parse a fragment of data and call back for any completed packets.
481
        """
482
        self._readahead.write(data)
483
        buf = self._readahead.getvalue()
484
        if len(buf) < 4:
485
            return
486
        while len(buf) >= 4:
487
            size = int(buf[:4], 16)
488
            if size == 0:
489
                self.handle_pkt(None)
490
                buf = buf[4:]
491
            elif size <= len(buf):
492
                self.handle_pkt(buf[4:size])
493
                buf = buf[size:]
494
            else:
495
                break
496
        self._readahead = BytesIO()
497
        self._readahead.write(buf)
498

    
499
    def get_tail(self):
500
        """Read back any unused data."""
501
        return self._readahead.getvalue()