gvsig-scripting / org.gvsig.scripting / trunk / org.gvsig.scripting / org.gvsig.scripting.app / org.gvsig.scripting.app.mainplugin / src / main / resources-plugin / scripting / lib / dulwich / web.py @ 959
History | View | Annotate | Download (17.1 KB)
1 | 959 | jjdelcerro | # web.py -- WSGI smart-http server
|
---|---|---|---|
2 | # Copyright (C) 2010 Google, Inc.
|
||
3 | # Copyright (C) 2012 Jelmer Vernooij <jelmer@samba.org>
|
||
4 | #
|
||
5 | # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
|
||
6 | # General Public License as public by the Free Software Foundation; version 2.0
|
||
7 | # or (at your option) any later version. You can redistribute it and/or
|
||
8 | # modify it under the terms of either of these two licenses.
|
||
9 | #
|
||
10 | # Unless required by applicable law or agreed to in writing, software
|
||
11 | # distributed under the License is distributed on an "AS IS" BASIS,
|
||
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
13 | # See the License for the specific language governing permissions and
|
||
14 | # limitations under the License.
|
||
15 | #
|
||
16 | # You should have received a copy of the licenses; if not, see
|
||
17 | # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
|
||
18 | # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
|
||
19 | # License, Version 2.0.
|
||
20 | #
|
||
21 | |||
22 | """HTTP server for dulwich that implements the git smart HTTP protocol."""
|
||
23 | |||
24 | from io import BytesIO |
||
25 | import shutil |
||
26 | import tempfile |
||
27 | import gzip |
||
28 | import os |
||
29 | import re |
||
30 | import sys |
||
31 | import time |
||
32 | from wsgiref.simple_server import ( |
||
33 | WSGIRequestHandler, |
||
34 | ServerHandler, |
||
35 | WSGIServer, |
||
36 | make_server, |
||
37 | ) |
||
38 | |||
39 | try:
|
||
40 | from urlparse import parse_qs |
||
41 | except ImportError: |
||
42 | from urllib.parse import parse_qs |
||
43 | |||
44 | |||
45 | from dulwich import log_utils |
||
46 | from dulwich.protocol import ( |
||
47 | ReceivableProtocol, |
||
48 | ) |
||
49 | from dulwich.repo import ( |
||
50 | Repo, |
||
51 | ) |
||
52 | from dulwich.server import ( |
||
53 | DictBackend, |
||
54 | DEFAULT_HANDLERS, |
||
55 | generate_info_refs, |
||
56 | generate_objects_info_packs, |
||
57 | ) |
||
58 | |||
59 | |||
60 | logger = log_utils.getLogger(__name__) |
||
61 | |||
62 | |||
63 | # HTTP error strings
|
||
64 | HTTP_OK = '200 OK'
|
||
65 | HTTP_NOT_FOUND = '404 Not Found'
|
||
66 | HTTP_FORBIDDEN = '403 Forbidden'
|
||
67 | HTTP_ERROR = '500 Internal Server Error'
|
||
68 | |||
69 | |||
70 | def date_time_string(timestamp=None): |
||
71 | # From BaseHTTPRequestHandler.date_time_string in BaseHTTPServer.py in the
|
||
72 | # Python 2.6.5 standard library, following modifications:
|
||
73 | # - Made a global rather than an instance method.
|
||
74 | # - weekdayname and monthname are renamed and locals rather than class
|
||
75 | # variables.
|
||
76 | # Copyright (c) 2001-2010 Python Software Foundation; All Rights Reserved
|
||
77 | weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] |
||
78 | months = [None,
|
||
79 | 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', |
||
80 | 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] |
||
81 | if timestamp is None: |
||
82 | timestamp = time.time() |
||
83 | year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp) |
||
84 | return '%s, %02d %3s %4d %02d:%02d:%02d GMD' % ( |
||
85 | weekdays[wd], day, months[month], year, hh, mm, ss) |
||
86 | |||
87 | |||
88 | def url_prefix(mat): |
||
89 | """Extract the URL prefix from a regex match.
|
||
90 |
|
||
91 | :param mat: A regex match object.
|
||
92 | :returns: The URL prefix, defined as the text before the match in the
|
||
93 | original string. Normalized to start with one leading slash and end with
|
||
94 | zero.
|
||
95 | """
|
||
96 | return '/' + mat.string[:mat.start()].strip('/') |
||
97 | |||
98 | |||
99 | def get_repo(backend, mat): |
||
100 | """Get a Repo instance for the given backend and URL regex match."""
|
||
101 | return backend.open_repository(url_prefix(mat))
|
||
102 | |||
103 | |||
104 | def send_file(req, f, content_type): |
||
105 | """Send a file-like object to the request output.
|
||
106 |
|
||
107 | :param req: The HTTPGitRequest object to send output to.
|
||
108 | :param f: An open file-like object to send; will be closed.
|
||
109 | :param content_type: The MIME type for the file.
|
||
110 | :return: Iterator over the contents of the file, as chunks.
|
||
111 | """
|
||
112 | if f is None: |
||
113 | yield req.not_found('File not found') |
||
114 | return
|
||
115 | try:
|
||
116 | req.respond(HTTP_OK, content_type) |
||
117 | while True: |
||
118 | data = f.read(10240)
|
||
119 | if not data: |
||
120 | break
|
||
121 | yield data
|
||
122 | f.close() |
||
123 | except IOError: |
||
124 | f.close() |
||
125 | yield req.error('Error reading file') |
||
126 | except:
|
||
127 | f.close() |
||
128 | raise
|
||
129 | |||
130 | |||
131 | def _url_to_path(url): |
||
132 | return url.replace('/', os.path.sep) |
||
133 | |||
134 | |||
135 | def get_text_file(req, backend, mat): |
||
136 | req.nocache() |
||
137 | path = _url_to_path(mat.group()) |
||
138 | logger.info('Sending plain text file %s', path)
|
||
139 | return send_file(req, get_repo(backend, mat).get_named_file(path),
|
||
140 | 'text/plain')
|
||
141 | |||
142 | |||
143 | def get_loose_object(req, backend, mat): |
||
144 | sha = (mat.group(1) + mat.group(2)).encode('ascii') |
||
145 | logger.info('Sending loose object %s', sha)
|
||
146 | object_store = get_repo(backend, mat).object_store |
||
147 | if not object_store.contains_loose(sha): |
||
148 | yield req.not_found('Object not found') |
||
149 | return
|
||
150 | try:
|
||
151 | data = object_store[sha].as_legacy_object() |
||
152 | except IOError: |
||
153 | yield req.error('Error reading object') |
||
154 | return
|
||
155 | req.cache_forever() |
||
156 | req.respond(HTTP_OK, 'application/x-git-loose-object')
|
||
157 | yield data
|
||
158 | |||
159 | |||
160 | def get_pack_file(req, backend, mat): |
||
161 | req.cache_forever() |
||
162 | path = _url_to_path(mat.group()) |
||
163 | logger.info('Sending pack file %s', path)
|
||
164 | return send_file(req, get_repo(backend, mat).get_named_file(path),
|
||
165 | 'application/x-git-packed-objects')
|
||
166 | |||
167 | |||
168 | def get_idx_file(req, backend, mat): |
||
169 | req.cache_forever() |
||
170 | path = _url_to_path(mat.group()) |
||
171 | logger.info('Sending pack file %s', path)
|
||
172 | return send_file(req, get_repo(backend, mat).get_named_file(path),
|
||
173 | 'application/x-git-packed-objects-toc')
|
||
174 | |||
175 | |||
176 | def get_info_refs(req, backend, mat): |
||
177 | params = parse_qs(req.environ['QUERY_STRING'])
|
||
178 | service = params.get('service', [None])[0] |
||
179 | if service and not req.dumb: |
||
180 | handler_cls = req.handlers.get(service.encode('ascii'), None) |
||
181 | if handler_cls is None: |
||
182 | yield req.forbidden('Unsupported service') |
||
183 | return
|
||
184 | req.nocache() |
||
185 | write = req.respond(HTTP_OK, 'application/x-%s-advertisement' % service)
|
||
186 | proto = ReceivableProtocol(BytesIO().read, write) |
||
187 | handler = handler_cls(backend, [url_prefix(mat)], proto, |
||
188 | http_req=req, advertise_refs=True)
|
||
189 | handler.proto.write_pkt_line(b'# service=' + service.encode('ascii') + b'\n') |
||
190 | handler.proto.write_pkt_line(None)
|
||
191 | handler.handle() |
||
192 | else:
|
||
193 | # non-smart fallback
|
||
194 | # TODO: select_getanyfile() (see http-backend.c)
|
||
195 | req.nocache() |
||
196 | req.respond(HTTP_OK, 'text/plain')
|
||
197 | logger.info('Emulating dumb info/refs')
|
||
198 | repo = get_repo(backend, mat) |
||
199 | for text in generate_info_refs(repo): |
||
200 | yield text
|
||
201 | |||
202 | |||
203 | def get_info_packs(req, backend, mat): |
||
204 | req.nocache() |
||
205 | req.respond(HTTP_OK, 'text/plain')
|
||
206 | logger.info('Emulating dumb info/packs')
|
||
207 | return generate_objects_info_packs(get_repo(backend, mat))
|
||
208 | |||
209 | |||
210 | class _LengthLimitedFile(object): |
||
211 | """Wrapper class to limit the length of reads from a file-like object.
|
||
212 |
|
||
213 | This is used to ensure EOF is read from the wsgi.input object once
|
||
214 | Content-Length bytes are read. This behavior is required by the WSGI spec
|
||
215 | but not implemented in wsgiref as of 2.5.
|
||
216 | """
|
||
217 | |||
218 | def __init__(self, input, max_bytes): |
||
219 | self._input = input |
||
220 | self._bytes_avail = max_bytes
|
||
221 | |||
222 | def read(self, size=-1): |
||
223 | if self._bytes_avail <= 0: |
||
224 | return b'' |
||
225 | if size == -1 or size > self._bytes_avail: |
||
226 | size = self._bytes_avail
|
||
227 | self._bytes_avail -= size
|
||
228 | return self._input.read(size) |
||
229 | |||
230 | # TODO: support more methods as necessary
|
||
231 | |||
232 | |||
233 | def handle_service_request(req, backend, mat): |
||
234 | service = mat.group().lstrip('/')
|
||
235 | logger.info('Handling service request for %s', service)
|
||
236 | handler_cls = req.handlers.get(service.encode('ascii'), None) |
||
237 | if handler_cls is None: |
||
238 | yield req.forbidden('Unsupported service') |
||
239 | return
|
||
240 | req.nocache() |
||
241 | write = req.respond(HTTP_OK, 'application/x-%s-result' % service)
|
||
242 | proto = ReceivableProtocol(req.environ['wsgi.input'].read, write)
|
||
243 | handler = handler_cls(backend, [url_prefix(mat)], proto, http_req=req) |
||
244 | handler.handle() |
||
245 | |||
246 | |||
247 | class HTTPGitRequest(object): |
||
248 | """Class encapsulating the state of a single git HTTP request.
|
||
249 |
|
||
250 | :ivar environ: the WSGI environment for the request.
|
||
251 | """
|
||
252 | |||
253 | def __init__(self, environ, start_response, dumb=False, handlers=None): |
||
254 | self.environ = environ
|
||
255 | self.dumb = dumb
|
||
256 | self.handlers = handlers
|
||
257 | self._start_response = start_response
|
||
258 | self._cache_headers = []
|
||
259 | self._headers = []
|
||
260 | |||
261 | def add_header(self, name, value): |
||
262 | """Add a header to the response."""
|
||
263 | self._headers.append((name, value))
|
||
264 | |||
265 | def respond(self, status=HTTP_OK, content_type=None, headers=None): |
||
266 | """Begin a response with the given status and other headers."""
|
||
267 | if headers:
|
||
268 | self._headers.extend(headers)
|
||
269 | if content_type:
|
||
270 | self._headers.append(('Content-Type', content_type)) |
||
271 | self._headers.extend(self._cache_headers) |
||
272 | |||
273 | return self._start_response(status, self._headers) |
||
274 | |||
275 | def not_found(self, message): |
||
276 | """Begin a HTTP 404 response and return the text of a message."""
|
||
277 | self._cache_headers = []
|
||
278 | logger.info('Not found: %s', message)
|
||
279 | self.respond(HTTP_NOT_FOUND, 'text/plain') |
||
280 | return message.encode('ascii') |
||
281 | |||
282 | def forbidden(self, message): |
||
283 | """Begin a HTTP 403 response and return the text of a message."""
|
||
284 | self._cache_headers = []
|
||
285 | logger.info('Forbidden: %s', message)
|
||
286 | self.respond(HTTP_FORBIDDEN, 'text/plain') |
||
287 | return message.encode('ascii') |
||
288 | |||
289 | def error(self, message): |
||
290 | """Begin a HTTP 500 response and return the text of a message."""
|
||
291 | self._cache_headers = []
|
||
292 | logger.error('Error: %s', message)
|
||
293 | self.respond(HTTP_ERROR, 'text/plain') |
||
294 | return message.encode('ascii') |
||
295 | |||
296 | def nocache(self): |
||
297 | """Set the response to never be cached by the client."""
|
||
298 | self._cache_headers = [
|
||
299 | ('Expires', 'Fri, 01 Jan 1980 00:00:00 GMT'), |
||
300 | ('Pragma', 'no-cache'), |
||
301 | ('Cache-Control', 'no-cache, max-age=0, must-revalidate'), |
||
302 | ] |
||
303 | |||
304 | def cache_forever(self): |
||
305 | """Set the response to be cached forever by the client."""
|
||
306 | now = time.time() |
||
307 | self._cache_headers = [
|
||
308 | ('Date', date_time_string(now)),
|
||
309 | ('Expires', date_time_string(now + 31536000)), |
||
310 | ('Cache-Control', 'public, max-age=31536000'), |
||
311 | ] |
||
312 | |||
313 | |||
314 | class HTTPGitApplication(object): |
||
315 | """Class encapsulating the state of a git WSGI application.
|
||
316 |
|
||
317 | :ivar backend: the Backend object backing this application
|
||
318 | """
|
||
319 | |||
320 | services = { |
||
321 | ('GET', re.compile('/HEAD$')): get_text_file, |
||
322 | ('GET', re.compile('/info/refs$')): get_info_refs, |
||
323 | ('GET', re.compile('/objects/info/alternates$')): get_text_file, |
||
324 | ('GET', re.compile('/objects/info/http-alternates$')): get_text_file, |
||
325 | ('GET', re.compile('/objects/info/packs$')): get_info_packs, |
||
326 | ('GET', re.compile('/objects/([0-9a-f]{2})/([0-9a-f]{38})$')): get_loose_object, |
||
327 | ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.pack$')): get_pack_file, |
||
328 | ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.idx$')): get_idx_file, |
||
329 | |||
330 | ('POST', re.compile('/git-upload-pack$')): handle_service_request, |
||
331 | ('POST', re.compile('/git-receive-pack$')): handle_service_request, |
||
332 | } |
||
333 | |||
334 | def __init__(self, backend, dumb=False, handlers=None, fallback_app=None): |
||
335 | self.backend = backend
|
||
336 | self.dumb = dumb
|
||
337 | self.handlers = dict(DEFAULT_HANDLERS) |
||
338 | self.fallback_app = fallback_app
|
||
339 | if handlers is not None: |
||
340 | self.handlers.update(handlers)
|
||
341 | |||
342 | def __call__(self, environ, start_response): |
||
343 | path = environ['PATH_INFO']
|
||
344 | method = environ['REQUEST_METHOD']
|
||
345 | req = HTTPGitRequest(environ, start_response, dumb=self.dumb,
|
||
346 | handlers=self.handlers)
|
||
347 | # environ['QUERY_STRING'] has qs args
|
||
348 | handler = None
|
||
349 | for smethod, spath in self.services.keys(): |
||
350 | if smethod != method:
|
||
351 | continue
|
||
352 | mat = spath.search(path) |
||
353 | if mat:
|
||
354 | handler = self.services[smethod, spath]
|
||
355 | break
|
||
356 | |||
357 | if handler is None: |
||
358 | if self.fallback_app is not None: |
||
359 | return self.fallback_app(environ, start_response) |
||
360 | else:
|
||
361 | return [req.not_found('Sorry, that method is not supported')] |
||
362 | |||
363 | return handler(req, self.backend, mat) |
||
364 | |||
365 | |||
366 | class GunzipFilter(object): |
||
367 | """WSGI middleware that unzips gzip-encoded requests before
|
||
368 | passing on to the underlying application.
|
||
369 | """
|
||
370 | |||
371 | def __init__(self, application): |
||
372 | self.app = application
|
||
373 | |||
374 | def __call__(self, environ, start_response): |
||
375 | if environ.get('HTTP_CONTENT_ENCODING', '') == 'gzip': |
||
376 | try:
|
||
377 | environ['wsgi.input'].tell()
|
||
378 | wsgi_input = environ['wsgi.input']
|
||
379 | except (AttributeError, IOError, NotImplementedError): |
||
380 | # The gzip implementation in the standard library of Python 2.x
|
||
381 | # requires working '.seek()' and '.tell()' methods on the input
|
||
382 | # stream. Read the data into a temporary file to work around
|
||
383 | # this limitation.
|
||
384 | wsgi_input = tempfile.SpooledTemporaryFile(16 * 1024 * 1024) |
||
385 | shutil.copyfileobj(environ['wsgi.input'], wsgi_input)
|
||
386 | wsgi_input.seek(0)
|
||
387 | |||
388 | environ['wsgi.input'] = gzip.GzipFile(filename=None, fileobj=wsgi_input, mode='r') |
||
389 | del environ['HTTP_CONTENT_ENCODING'] |
||
390 | if 'CONTENT_LENGTH' in environ: |
||
391 | del environ['CONTENT_LENGTH'] |
||
392 | |||
393 | return self.app(environ, start_response) |
||
394 | |||
395 | |||
396 | class LimitedInputFilter(object): |
||
397 | """WSGI middleware that limits the input length of a request to that
|
||
398 | specified in Content-Length.
|
||
399 | """
|
||
400 | |||
401 | def __init__(self, application): |
||
402 | self.app = application
|
||
403 | |||
404 | def __call__(self, environ, start_response): |
||
405 | # This is not necessary if this app is run from a conforming WSGI
|
||
406 | # server. Unfortunately, there's no way to tell that at this point.
|
||
407 | # TODO: git may used HTTP/1.1 chunked encoding instead of specifying
|
||
408 | # content-length
|
||
409 | content_length = environ.get('CONTENT_LENGTH', '') |
||
410 | if content_length:
|
||
411 | environ['wsgi.input'] = _LengthLimitedFile(
|
||
412 | environ['wsgi.input'], int(content_length)) |
||
413 | return self.app(environ, start_response) |
||
414 | |||
415 | |||
416 | def make_wsgi_chain(*args, **kwargs): |
||
417 | """Factory function to create an instance of HTTPGitApplication,
|
||
418 | correctly wrapped with needed middleware.
|
||
419 | """
|
||
420 | app = HTTPGitApplication(*args, **kwargs) |
||
421 | wrapped_app = LimitedInputFilter(GunzipFilter(app)) |
||
422 | return wrapped_app
|
||
423 | |||
424 | |||
425 | class ServerHandlerLogger(ServerHandler): |
||
426 | """ServerHandler that uses dulwich's logger for logging exceptions."""
|
||
427 | |||
428 | def log_exception(self, exc_info): |
||
429 | if sys.version_info < (2, 7): |
||
430 | logger.exception('Exception happened during processing of request')
|
||
431 | else:
|
||
432 | logger.exception('Exception happened during processing of request',
|
||
433 | exc_info=exc_info) |
||
434 | |||
435 | def log_message(self, format, *args): |
||
436 | logger.info(format, *args) |
||
437 | |||
438 | def log_error(self, *args): |
||
439 | logger.error(*args) |
||
440 | |||
441 | |||
442 | class WSGIRequestHandlerLogger(WSGIRequestHandler): |
||
443 | """WSGIRequestHandler that uses dulwich's logger for logging exceptions."""
|
||
444 | |||
445 | def log_exception(self, exc_info): |
||
446 | logger.exception('Exception happened during processing of request',
|
||
447 | exc_info=exc_info) |
||
448 | |||
449 | def log_message(self, format, *args): |
||
450 | logger.info(format, *args) |
||
451 | |||
452 | def log_error(self, *args): |
||
453 | logger.error(*args) |
||
454 | |||
455 | def handle(self): |
||
456 | """Handle a single HTTP request"""
|
||
457 | |||
458 | self.raw_requestline = self.rfile.readline() |
||
459 | if not self.parse_request(): # An error code has been sent, just exit |
||
460 | return
|
||
461 | |||
462 | handler = ServerHandlerLogger( |
||
463 | self.rfile, self.wfile, self.get_stderr(), self.get_environ() |
||
464 | ) |
||
465 | handler.request_handler = self # backpointer for logging |
||
466 | handler.run(self.server.get_app())
|
||
467 | |||
468 | |||
469 | class WSGIServerLogger(WSGIServer): |
||
470 | |||
471 | def handle_error(self, request, client_address): |
||
472 | """Handle an error. """
|
||
473 | logger.exception('Exception happened during processing of request from %s' % str(client_address)) |
||
474 | |||
475 | |||
476 | def main(argv=sys.argv): |
||
477 | """Entry point for starting an HTTP git server."""
|
||
478 | import optparse |
||
479 | parser = optparse.OptionParser() |
||
480 | parser.add_option("-l", "--listen_address", dest="listen_address", |
||
481 | default="localhost",
|
||
482 | help="Binding IP address.")
|
||
483 | parser.add_option("-p", "--port", dest="port", type=int, |
||
484 | default=8000,
|
||
485 | help="Port to listen on.")
|
||
486 | options, args = parser.parse_args(argv) |
||
487 | |||
488 | if len(args) > 1: |
||
489 | gitdir = args[1]
|
||
490 | else:
|
||
491 | gitdir = os.getcwd() |
||
492 | |||
493 | log_utils.default_logging_config() |
||
494 | backend = DictBackend({'/': Repo(gitdir)})
|
||
495 | app = make_wsgi_chain(backend) |
||
496 | server = make_server(options.listen_address, options.port, app, |
||
497 | handler_class=WSGIRequestHandlerLogger, |
||
498 | server_class=WSGIServerLogger) |
||
499 | logger.info('Listening for HTTP connections on %s:%d',
|
||
500 | options.listen_address, options.port) |
||
501 | server.serve_forever() |
||
502 | |||
503 | |||
504 | if __name__ == '__main__': |
||
505 | main() |