Statistics
| Revision:

gvsig-scripting / org.gvsig.scripting / trunk / org.gvsig.scripting / org.gvsig.scripting.app / org.gvsig.scripting.app.mainplugin / src / main / resources-plugin / scripting / lib / dulwich / web.py @ 959

History | View | Annotate | Download (17.1 KB)

1
# web.py -- WSGI smart-http server
2
# Copyright (C) 2010 Google, Inc.
3
# Copyright (C) 2012 Jelmer Vernooij <jelmer@samba.org>
4
#
5
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
6
# General Public License as public by the Free Software Foundation; version 2.0
7
# or (at your option) any later version. You can redistribute it and/or
8
# modify it under the terms of either of these two licenses.
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
#
16
# You should have received a copy of the licenses; if not, see
17
# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
18
# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
19
# License, Version 2.0.
20
#
21

    
22
"""HTTP server for dulwich that implements the git smart HTTP protocol."""
23

    
24
from io import BytesIO
25
import shutil
26
import tempfile
27
import gzip
28
import os
29
import re
30
import sys
31
import time
32
from wsgiref.simple_server import (
33
    WSGIRequestHandler,
34
    ServerHandler,
35
    WSGIServer,
36
    make_server,
37
    )
38

    
39
try:
40
    from urlparse import parse_qs
41
except ImportError:
42
    from urllib.parse import parse_qs
43

    
44

    
45
from dulwich import log_utils
46
from dulwich.protocol import (
47
    ReceivableProtocol,
48
    )
49
from dulwich.repo import (
50
    Repo,
51
    )
52
from dulwich.server import (
53
    DictBackend,
54
    DEFAULT_HANDLERS,
55
    generate_info_refs,
56
    generate_objects_info_packs,
57
    )
58

    
59

    
60
logger = log_utils.getLogger(__name__)
61

    
62

    
63
# HTTP error strings
64
HTTP_OK = '200 OK'
65
HTTP_NOT_FOUND = '404 Not Found'
66
HTTP_FORBIDDEN = '403 Forbidden'
67
HTTP_ERROR = '500 Internal Server Error'
68

    
69

    
70
def date_time_string(timestamp=None):
71
    # From BaseHTTPRequestHandler.date_time_string in BaseHTTPServer.py in the
72
    # Python 2.6.5 standard library, following modifications:
73
    #  - Made a global rather than an instance method.
74
    #  - weekdayname and monthname are renamed and locals rather than class
75
    #    variables.
76
    # Copyright (c) 2001-2010 Python Software Foundation; All Rights Reserved
77
    weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
78
    months = [None,
79
              'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
80
              'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
81
    if timestamp is None:
82
        timestamp = time.time()
83
    year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
84
    return '%s, %02d %3s %4d %02d:%02d:%02d GMD' % (
85
            weekdays[wd], day, months[month], year, hh, mm, ss)
86

    
87

    
88
def url_prefix(mat):
89
    """Extract the URL prefix from a regex match.
90

91
    :param mat: A regex match object.
92
    :returns: The URL prefix, defined as the text before the match in the
93
        original string. Normalized to start with one leading slash and end with
94
        zero.
95
    """
96
    return '/' + mat.string[:mat.start()].strip('/')
97

    
98

    
99
def get_repo(backend, mat):
100
    """Get a Repo instance for the given backend and URL regex match."""
101
    return backend.open_repository(url_prefix(mat))
102

    
103

    
104
def send_file(req, f, content_type):
105
    """Send a file-like object to the request output.
106

107
    :param req: The HTTPGitRequest object to send output to.
108
    :param f: An open file-like object to send; will be closed.
109
    :param content_type: The MIME type for the file.
110
    :return: Iterator over the contents of the file, as chunks.
111
    """
112
    if f is None:
113
        yield req.not_found('File not found')
114
        return
115
    try:
116
        req.respond(HTTP_OK, content_type)
117
        while True:
118
            data = f.read(10240)
119
            if not data:
120
                break
121
            yield data
122
        f.close()
123
    except IOError:
124
        f.close()
125
        yield req.error('Error reading file')
126
    except:
127
        f.close()
128
        raise
129

    
130

    
131
def _url_to_path(url):
132
    return url.replace('/', os.path.sep)
133

    
134

    
135
def get_text_file(req, backend, mat):
136
    req.nocache()
137
    path = _url_to_path(mat.group())
138
    logger.info('Sending plain text file %s', path)
139
    return send_file(req, get_repo(backend, mat).get_named_file(path),
140
                     'text/plain')
141

    
142

    
143
def get_loose_object(req, backend, mat):
144
    sha = (mat.group(1) + mat.group(2)).encode('ascii')
145
    logger.info('Sending loose object %s', sha)
146
    object_store = get_repo(backend, mat).object_store
147
    if not object_store.contains_loose(sha):
148
        yield req.not_found('Object not found')
149
        return
150
    try:
151
        data = object_store[sha].as_legacy_object()
152
    except IOError:
153
        yield req.error('Error reading object')
154
        return
155
    req.cache_forever()
156
    req.respond(HTTP_OK, 'application/x-git-loose-object')
157
    yield data
158

    
159

    
160
def get_pack_file(req, backend, mat):
161
    req.cache_forever()
162
    path = _url_to_path(mat.group())
163
    logger.info('Sending pack file %s', path)
164
    return send_file(req, get_repo(backend, mat).get_named_file(path),
165
                     'application/x-git-packed-objects')
166

    
167

    
168
def get_idx_file(req, backend, mat):
169
    req.cache_forever()
170
    path = _url_to_path(mat.group())
171
    logger.info('Sending pack file %s', path)
172
    return send_file(req, get_repo(backend, mat).get_named_file(path),
173
                     'application/x-git-packed-objects-toc')
174

    
175

    
176
def get_info_refs(req, backend, mat):
177
    params = parse_qs(req.environ['QUERY_STRING'])
178
    service = params.get('service', [None])[0]
179
    if service and not req.dumb:
180
        handler_cls = req.handlers.get(service.encode('ascii'), None)
181
        if handler_cls is None:
182
            yield req.forbidden('Unsupported service')
183
            return
184
        req.nocache()
185
        write = req.respond(HTTP_OK, 'application/x-%s-advertisement' % service)
186
        proto = ReceivableProtocol(BytesIO().read, write)
187
        handler = handler_cls(backend, [url_prefix(mat)], proto,
188
                              http_req=req, advertise_refs=True)
189
        handler.proto.write_pkt_line(b'# service=' + service.encode('ascii') + b'\n')
190
        handler.proto.write_pkt_line(None)
191
        handler.handle()
192
    else:
193
        # non-smart fallback
194
        # TODO: select_getanyfile() (see http-backend.c)
195
        req.nocache()
196
        req.respond(HTTP_OK, 'text/plain')
197
        logger.info('Emulating dumb info/refs')
198
        repo = get_repo(backend, mat)
199
        for text in generate_info_refs(repo):
200
            yield text
201

    
202

    
203
def get_info_packs(req, backend, mat):
204
    req.nocache()
205
    req.respond(HTTP_OK, 'text/plain')
206
    logger.info('Emulating dumb info/packs')
207
    return generate_objects_info_packs(get_repo(backend, mat))
208

    
209

    
210
class _LengthLimitedFile(object):
211
    """Wrapper class to limit the length of reads from a file-like object.
212

213
    This is used to ensure EOF is read from the wsgi.input object once
214
    Content-Length bytes are read. This behavior is required by the WSGI spec
215
    but not implemented in wsgiref as of 2.5.
216
    """
217

    
218
    def __init__(self, input, max_bytes):
219
        self._input = input
220
        self._bytes_avail = max_bytes
221

    
222
    def read(self, size=-1):
223
        if self._bytes_avail <= 0:
224
            return b''
225
        if size == -1 or size > self._bytes_avail:
226
            size = self._bytes_avail
227
        self._bytes_avail -= size
228
        return self._input.read(size)
229

    
230
    # TODO: support more methods as necessary
231

    
232

    
233
def handle_service_request(req, backend, mat):
234
    service = mat.group().lstrip('/')
235
    logger.info('Handling service request for %s', service)
236
    handler_cls = req.handlers.get(service.encode('ascii'), None)
237
    if handler_cls is None:
238
        yield req.forbidden('Unsupported service')
239
        return
240
    req.nocache()
241
    write = req.respond(HTTP_OK, 'application/x-%s-result' % service)
242
    proto = ReceivableProtocol(req.environ['wsgi.input'].read, write)
243
    handler = handler_cls(backend, [url_prefix(mat)], proto, http_req=req)
244
    handler.handle()
245

    
246

    
247
class HTTPGitRequest(object):
248
    """Class encapsulating the state of a single git HTTP request.
249

250
    :ivar environ: the WSGI environment for the request.
251
    """
252

    
253
    def __init__(self, environ, start_response, dumb=False, handlers=None):
254
        self.environ = environ
255
        self.dumb = dumb
256
        self.handlers = handlers
257
        self._start_response = start_response
258
        self._cache_headers = []
259
        self._headers = []
260

    
261
    def add_header(self, name, value):
262
        """Add a header to the response."""
263
        self._headers.append((name, value))
264

    
265
    def respond(self, status=HTTP_OK, content_type=None, headers=None):
266
        """Begin a response with the given status and other headers."""
267
        if headers:
268
            self._headers.extend(headers)
269
        if content_type:
270
            self._headers.append(('Content-Type', content_type))
271
        self._headers.extend(self._cache_headers)
272

    
273
        return self._start_response(status, self._headers)
274

    
275
    def not_found(self, message):
276
        """Begin a HTTP 404 response and return the text of a message."""
277
        self._cache_headers = []
278
        logger.info('Not found: %s', message)
279
        self.respond(HTTP_NOT_FOUND, 'text/plain')
280
        return message.encode('ascii')
281

    
282
    def forbidden(self, message):
283
        """Begin a HTTP 403 response and return the text of a message."""
284
        self._cache_headers = []
285
        logger.info('Forbidden: %s', message)
286
        self.respond(HTTP_FORBIDDEN, 'text/plain')
287
        return message.encode('ascii')
288

    
289
    def error(self, message):
290
        """Begin a HTTP 500 response and return the text of a message."""
291
        self._cache_headers = []
292
        logger.error('Error: %s', message)
293
        self.respond(HTTP_ERROR, 'text/plain')
294
        return message.encode('ascii')
295

    
296
    def nocache(self):
297
        """Set the response to never be cached by the client."""
298
        self._cache_headers = [
299
          ('Expires', 'Fri, 01 Jan 1980 00:00:00 GMT'),
300
          ('Pragma', 'no-cache'),
301
          ('Cache-Control', 'no-cache, max-age=0, must-revalidate'),
302
          ]
303

    
304
    def cache_forever(self):
305
        """Set the response to be cached forever by the client."""
306
        now = time.time()
307
        self._cache_headers = [
308
          ('Date', date_time_string(now)),
309
          ('Expires', date_time_string(now + 31536000)),
310
          ('Cache-Control', 'public, max-age=31536000'),
311
          ]
312

    
313

    
314
class HTTPGitApplication(object):
315
    """Class encapsulating the state of a git WSGI application.
316

317
    :ivar backend: the Backend object backing this application
318
    """
319

    
320
    services = {
321
      ('GET', re.compile('/HEAD$')): get_text_file,
322
      ('GET', re.compile('/info/refs$')): get_info_refs,
323
      ('GET', re.compile('/objects/info/alternates$')): get_text_file,
324
      ('GET', re.compile('/objects/info/http-alternates$')): get_text_file,
325
      ('GET', re.compile('/objects/info/packs$')): get_info_packs,
326
      ('GET', re.compile('/objects/([0-9a-f]{2})/([0-9a-f]{38})$')): get_loose_object,
327
      ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.pack$')): get_pack_file,
328
      ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.idx$')): get_idx_file,
329

    
330
      ('POST', re.compile('/git-upload-pack$')): handle_service_request,
331
      ('POST', re.compile('/git-receive-pack$')): handle_service_request,
332
    }
333

    
334
    def __init__(self, backend, dumb=False, handlers=None, fallback_app=None):
335
        self.backend = backend
336
        self.dumb = dumb
337
        self.handlers = dict(DEFAULT_HANDLERS)
338
        self.fallback_app = fallback_app
339
        if handlers is not None:
340
            self.handlers.update(handlers)
341

    
342
    def __call__(self, environ, start_response):
343
        path = environ['PATH_INFO']
344
        method = environ['REQUEST_METHOD']
345
        req = HTTPGitRequest(environ, start_response, dumb=self.dumb,
346
                             handlers=self.handlers)
347
        # environ['QUERY_STRING'] has qs args
348
        handler = None
349
        for smethod, spath in self.services.keys():
350
            if smethod != method:
351
                continue
352
            mat = spath.search(path)
353
            if mat:
354
                handler = self.services[smethod, spath]
355
                break
356

    
357
        if handler is None:
358
            if self.fallback_app is not None:
359
                return self.fallback_app(environ, start_response)
360
            else:
361
                return [req.not_found('Sorry, that method is not supported')]
362

    
363
        return handler(req, self.backend, mat)
364

    
365

    
366
class GunzipFilter(object):
367
    """WSGI middleware that unzips gzip-encoded requests before
368
    passing on to the underlying application.
369
    """
370

    
371
    def __init__(self, application):
372
        self.app = application
373

    
374
    def __call__(self, environ, start_response):
375
        if environ.get('HTTP_CONTENT_ENCODING', '') == 'gzip':
376
            try:
377
                environ['wsgi.input'].tell()
378
                wsgi_input = environ['wsgi.input']
379
            except (AttributeError, IOError, NotImplementedError):
380
                # The gzip implementation in the standard library of Python 2.x
381
                # requires working '.seek()' and '.tell()' methods on the input
382
                # stream.  Read the data into a temporary file to work around
383
                # this limitation.
384
                wsgi_input = tempfile.SpooledTemporaryFile(16 * 1024 * 1024)
385
                shutil.copyfileobj(environ['wsgi.input'], wsgi_input)
386
                wsgi_input.seek(0)
387

    
388
            environ['wsgi.input'] = gzip.GzipFile(filename=None, fileobj=wsgi_input, mode='r')
389
            del environ['HTTP_CONTENT_ENCODING']
390
            if 'CONTENT_LENGTH' in environ:
391
                del environ['CONTENT_LENGTH']
392

    
393
        return self.app(environ, start_response)
394

    
395

    
396
class LimitedInputFilter(object):
397
    """WSGI middleware that limits the input length of a request to that
398
    specified in Content-Length.
399
    """
400

    
401
    def __init__(self, application):
402
        self.app = application
403

    
404
    def __call__(self, environ, start_response):
405
        # This is not necessary if this app is run from a conforming WSGI
406
        # server. Unfortunately, there's no way to tell that at this point.
407
        # TODO: git may used HTTP/1.1 chunked encoding instead of specifying
408
        # content-length
409
        content_length = environ.get('CONTENT_LENGTH', '')
410
        if content_length:
411
            environ['wsgi.input'] = _LengthLimitedFile(
412
                environ['wsgi.input'], int(content_length))
413
        return self.app(environ, start_response)
414

    
415

    
416
def make_wsgi_chain(*args, **kwargs):
417
    """Factory function to create an instance of HTTPGitApplication,
418
    correctly wrapped with needed middleware.
419
    """
420
    app = HTTPGitApplication(*args, **kwargs)
421
    wrapped_app = LimitedInputFilter(GunzipFilter(app))
422
    return wrapped_app
423

    
424

    
425
class ServerHandlerLogger(ServerHandler):
426
    """ServerHandler that uses dulwich's logger for logging exceptions."""
427

    
428
    def log_exception(self, exc_info):
429
        if sys.version_info < (2, 7):
430
            logger.exception('Exception happened during processing of request')
431
        else:
432
            logger.exception('Exception happened during processing of request',
433
                             exc_info=exc_info)
434

    
435
    def log_message(self, format, *args):
436
        logger.info(format, *args)
437

    
438
    def log_error(self, *args):
439
        logger.error(*args)
440

    
441

    
442
class WSGIRequestHandlerLogger(WSGIRequestHandler):
443
    """WSGIRequestHandler that uses dulwich's logger for logging exceptions."""
444

    
445
    def log_exception(self, exc_info):
446
        logger.exception('Exception happened during processing of request',
447
                         exc_info=exc_info)
448

    
449
    def log_message(self, format, *args):
450
        logger.info(format, *args)
451

    
452
    def log_error(self, *args):
453
        logger.error(*args)
454

    
455
    def handle(self):
456
        """Handle a single HTTP request"""
457

    
458
        self.raw_requestline = self.rfile.readline()
459
        if not self.parse_request(): # An error code has been sent, just exit
460
            return
461

    
462
        handler = ServerHandlerLogger(
463
            self.rfile, self.wfile, self.get_stderr(), self.get_environ()
464
        )
465
        handler.request_handler = self      # backpointer for logging
466
        handler.run(self.server.get_app())
467

    
468

    
469
class WSGIServerLogger(WSGIServer):
470

    
471
    def handle_error(self, request, client_address):
472
        """Handle an error. """
473
        logger.exception('Exception happened during processing of request from %s' % str(client_address))
474

    
475

    
476
def main(argv=sys.argv):
477
    """Entry point for starting an HTTP git server."""
478
    import optparse
479
    parser = optparse.OptionParser()
480
    parser.add_option("-l", "--listen_address", dest="listen_address",
481
                      default="localhost",
482
                      help="Binding IP address.")
483
    parser.add_option("-p", "--port", dest="port", type=int,
484
                      default=8000,
485
                      help="Port to listen on.")
486
    options, args = parser.parse_args(argv)
487

    
488
    if len(args) > 1:
489
        gitdir = args[1]
490
    else:
491
        gitdir = os.getcwd()
492

    
493
    log_utils.default_logging_config()
494
    backend = DictBackend({'/': Repo(gitdir)})
495
    app = make_wsgi_chain(backend)
496
    server = make_server(options.listen_address, options.port, app,
497
                         handler_class=WSGIRequestHandlerLogger,
498
                         server_class=WSGIServerLogger)
499
    logger.info('Listening for HTTP connections on %s:%d',
500
                options.listen_address, options.port)
501
    server.serve_forever()
502

    
503

    
504
if __name__ == '__main__':
505
    main()