gvsig-scripting / org.gvsig.scripting / trunk / org.gvsig.scripting / org.gvsig.scripting.app / org.gvsig.scripting.app.mainplugin / src / main / resources-plugin / scripting / lib / dulwich / object_store.py @ 959
History | View | Annotate | Download (37.2 KB)
1 |
# object_store.py -- Object store for git objects
|
---|---|
2 |
# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
|
3 |
# and others
|
4 |
#
|
5 |
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
|
6 |
# General Public License as public by the Free Software Foundation; version 2.0
|
7 |
# or (at your option) any later version. You can redistribute it and/or
|
8 |
# modify it under the terms of either of these two licenses.
|
9 |
#
|
10 |
# Unless required by applicable law or agreed to in writing, software
|
11 |
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
# You should have received a copy of the licenses; if not, see
|
17 |
# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
|
18 |
# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
|
19 |
# License, Version 2.0.
|
20 |
#
|
21 |
|
22 |
|
23 |
"""Git object store interfaces and implementation."""
|
24 |
|
25 |
|
26 |
from io import BytesIO |
27 |
import errno |
28 |
from itertools import chain |
29 |
import os |
30 |
import stat |
31 |
import sys |
32 |
import tempfile |
33 |
|
34 |
from dulwich.diff_tree import ( |
35 |
tree_changes, |
36 |
walk_trees, |
37 |
) |
38 |
from dulwich.errors import ( |
39 |
NotTreeError, |
40 |
) |
41 |
from dulwich.file import GitFile |
42 |
from dulwich.objects import ( |
43 |
Commit, |
44 |
ShaFile, |
45 |
Tag, |
46 |
Tree, |
47 |
ZERO_SHA, |
48 |
hex_to_sha, |
49 |
sha_to_hex, |
50 |
hex_to_filename, |
51 |
S_ISGITLINK, |
52 |
object_class, |
53 |
) |
54 |
from dulwich.pack import ( |
55 |
Pack, |
56 |
PackData, |
57 |
PackInflater, |
58 |
iter_sha1, |
59 |
write_pack_header, |
60 |
write_pack_index_v2, |
61 |
write_pack_object, |
62 |
write_pack_objects, |
63 |
compute_file_sha, |
64 |
PackIndexer, |
65 |
PackStreamCopier, |
66 |
) |
67 |
|
68 |
INFODIR = 'info'
|
69 |
PACKDIR = 'pack'
|
70 |
|
71 |
|
72 |
class BaseObjectStore(object): |
73 |
"""Object store interface."""
|
74 |
|
75 |
def determine_wants_all(self, refs): |
76 |
return [sha for (ref, sha) in refs.items() |
77 |
if not sha in self and not ref.endswith(b"^{}") and |
78 |
not sha == ZERO_SHA]
|
79 |
|
80 |
def iter_shas(self, shas): |
81 |
"""Iterate over the objects for the specified shas.
|
82 |
|
83 |
:param shas: Iterable object with SHAs
|
84 |
:return: Object iterator
|
85 |
"""
|
86 |
return ObjectStoreIterator(self, shas) |
87 |
|
88 |
def contains_loose(self, sha): |
89 |
"""Check if a particular object is present by SHA1 and is loose."""
|
90 |
raise NotImplementedError(self.contains_loose) |
91 |
|
92 |
def contains_packed(self, sha): |
93 |
"""Check if a particular object is present by SHA1 and is packed."""
|
94 |
raise NotImplementedError(self.contains_packed) |
95 |
|
96 |
def __contains__(self, sha): |
97 |
"""Check if a particular object is present by SHA1.
|
98 |
|
99 |
This method makes no distinction between loose and packed objects.
|
100 |
"""
|
101 |
return self.contains_packed(sha) or self.contains_loose(sha) |
102 |
|
103 |
@property
|
104 |
def packs(self): |
105 |
"""Iterable of pack objects."""
|
106 |
raise NotImplementedError |
107 |
|
108 |
def get_raw(self, name): |
109 |
"""Obtain the raw text for an object.
|
110 |
|
111 |
:param name: sha for the object.
|
112 |
:return: tuple with numeric type and object contents.
|
113 |
"""
|
114 |
raise NotImplementedError(self.get_raw) |
115 |
|
116 |
def __getitem__(self, sha): |
117 |
"""Obtain an object by SHA1."""
|
118 |
type_num, uncomp = self.get_raw(sha)
|
119 |
return ShaFile.from_raw_string(type_num, uncomp, sha=sha)
|
120 |
|
121 |
def __iter__(self): |
122 |
"""Iterate over the SHAs that are present in this store."""
|
123 |
raise NotImplementedError(self.__iter__) |
124 |
|
125 |
def add_object(self, obj): |
126 |
"""Add a single object to this object store.
|
127 |
|
128 |
"""
|
129 |
raise NotImplementedError(self.add_object) |
130 |
|
131 |
def add_objects(self, objects): |
132 |
"""Add a set of objects to this object store.
|
133 |
|
134 |
:param objects: Iterable over a list of (object, path) tuples
|
135 |
"""
|
136 |
raise NotImplementedError(self.add_objects) |
137 |
|
138 |
def tree_changes(self, source, target, want_unchanged=False): |
139 |
"""Find the differences between the contents of two trees
|
140 |
|
141 |
:param source: SHA1 of the source tree
|
142 |
:param target: SHA1 of the target tree
|
143 |
:param want_unchanged: Whether unchanged files should be reported
|
144 |
:return: Iterator over tuples with
|
145 |
(oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
|
146 |
"""
|
147 |
for change in tree_changes(self, source, target, |
148 |
want_unchanged=want_unchanged): |
149 |
yield ((change.old.path, change.new.path),
|
150 |
(change.old.mode, change.new.mode), |
151 |
(change.old.sha, change.new.sha)) |
152 |
|
153 |
def iter_tree_contents(self, tree_id, include_trees=False): |
154 |
"""Iterate the contents of a tree and all subtrees.
|
155 |
|
156 |
Iteration is depth-first pre-order, as in e.g. os.walk.
|
157 |
|
158 |
:param tree_id: SHA1 of the tree.
|
159 |
:param include_trees: If True, include tree objects in the iteration.
|
160 |
:return: Iterator over TreeEntry namedtuples for all the objects in a
|
161 |
tree.
|
162 |
"""
|
163 |
for entry, _ in walk_trees(self, tree_id, None): |
164 |
if not stat.S_ISDIR(entry.mode) or include_trees: |
165 |
yield entry
|
166 |
|
167 |
def find_missing_objects(self, haves, wants, progress=None, |
168 |
get_tagged=None,
|
169 |
get_parents=lambda commit: commit.parents):
|
170 |
"""Find the missing objects required for a set of revisions.
|
171 |
|
172 |
:param haves: Iterable over SHAs already in common.
|
173 |
:param wants: Iterable over SHAs of objects to fetch.
|
174 |
:param progress: Simple progress function that will be called with
|
175 |
updated progress strings.
|
176 |
:param get_tagged: Function that returns a dict of pointed-to sha -> tag
|
177 |
sha for including tags.
|
178 |
:param get_parents: Optional function for getting the parents of a commit.
|
179 |
:return: Iterator over (sha, path) pairs.
|
180 |
"""
|
181 |
finder = MissingObjectFinder(self, haves, wants, progress, get_tagged, get_parents=get_parents)
|
182 |
return iter(finder.next, None) |
183 |
|
184 |
def find_common_revisions(self, graphwalker): |
185 |
"""Find which revisions this store has in common using graphwalker.
|
186 |
|
187 |
:param graphwalker: A graphwalker object.
|
188 |
:return: List of SHAs that are in common
|
189 |
"""
|
190 |
haves = [] |
191 |
sha = next(graphwalker)
|
192 |
while sha:
|
193 |
if sha in self: |
194 |
haves.append(sha) |
195 |
graphwalker.ack(sha) |
196 |
sha = next(graphwalker)
|
197 |
return haves
|
198 |
|
199 |
def generate_pack_contents(self, have, want, progress=None): |
200 |
"""Iterate over the contents of a pack file.
|
201 |
|
202 |
:param have: List of SHA1s of objects that should not be sent
|
203 |
:param want: List of SHA1s of objects that should be sent
|
204 |
:param progress: Optional progress reporting method
|
205 |
"""
|
206 |
return self.iter_shas(self.find_missing_objects(have, want, progress)) |
207 |
|
208 |
def peel_sha(self, sha): |
209 |
"""Peel all tags from a SHA.
|
210 |
|
211 |
:param sha: The object SHA to peel.
|
212 |
:return: The fully-peeled SHA1 of a tag object, after peeling all
|
213 |
intermediate tags; if the original ref does not point to a tag, this
|
214 |
will equal the original SHA1.
|
215 |
"""
|
216 |
obj = self[sha]
|
217 |
obj_class = object_class(obj.type_name) |
218 |
while obj_class is Tag: |
219 |
obj_class, sha = obj.object |
220 |
obj = self[sha]
|
221 |
return obj
|
222 |
|
223 |
def _collect_ancestors(self, heads, common=set(), |
224 |
get_parents=lambda commit: commit.parents):
|
225 |
"""Collect all ancestors of heads up to (excluding) those in common.
|
226 |
|
227 |
:param heads: commits to start from
|
228 |
:param common: commits to end at, or empty set to walk repository
|
229 |
completely
|
230 |
:param get_parents: Optional function for getting the parents of a commit.
|
231 |
:return: a tuple (A, B) where A - all commits reachable
|
232 |
from heads but not present in common, B - common (shared) elements
|
233 |
that are directly reachable from heads
|
234 |
"""
|
235 |
bases = set()
|
236 |
commits = set()
|
237 |
queue = [] |
238 |
queue.extend(heads) |
239 |
while queue:
|
240 |
e = queue.pop(0)
|
241 |
if e in common: |
242 |
bases.add(e) |
243 |
elif e not in commits: |
244 |
commits.add(e) |
245 |
cmt = self[e]
|
246 |
queue.extend(get_parents(cmt)) |
247 |
return (commits, bases)
|
248 |
|
249 |
def close(self): |
250 |
"""Close any files opened by this object store."""
|
251 |
# Default implementation is a NO-OP
|
252 |
|
253 |
|
254 |
class PackBasedObjectStore(BaseObjectStore): |
255 |
|
256 |
def __init__(self): |
257 |
self._pack_cache = {}
|
258 |
|
259 |
@property
|
260 |
def alternates(self): |
261 |
return []
|
262 |
|
263 |
def contains_packed(self, sha): |
264 |
"""Check if a particular object is present by SHA1 and is packed.
|
265 |
|
266 |
This does not check alternates.
|
267 |
"""
|
268 |
for pack in self.packs: |
269 |
if sha in pack: |
270 |
return True |
271 |
return False |
272 |
|
273 |
def __contains__(self, sha): |
274 |
"""Check if a particular object is present by SHA1.
|
275 |
|
276 |
This method makes no distinction between loose and packed objects.
|
277 |
"""
|
278 |
if self.contains_packed(sha) or self.contains_loose(sha): |
279 |
return True |
280 |
for alternate in self.alternates: |
281 |
if sha in alternate: |
282 |
return True |
283 |
return False |
284 |
|
285 |
def _pack_cache_stale(self): |
286 |
"""Check whether the pack cache is stale."""
|
287 |
raise NotImplementedError(self._pack_cache_stale) |
288 |
|
289 |
def _add_known_pack(self, base_name, pack): |
290 |
"""Add a newly appeared pack to the cache by path.
|
291 |
|
292 |
"""
|
293 |
self._pack_cache[base_name] = pack
|
294 |
|
295 |
def close(self): |
296 |
pack_cache = self._pack_cache
|
297 |
self._pack_cache = {}
|
298 |
while pack_cache:
|
299 |
(name, pack) = pack_cache.popitem() |
300 |
pack.close() |
301 |
|
302 |
@property
|
303 |
def packs(self): |
304 |
"""List with pack objects."""
|
305 |
if self._pack_cache is None or self._pack_cache_stale(): |
306 |
self._update_pack_cache()
|
307 |
|
308 |
return self._pack_cache.values() |
309 |
|
310 |
def _iter_alternate_objects(self): |
311 |
"""Iterate over the SHAs of all the objects in alternate stores."""
|
312 |
for alternate in self.alternates: |
313 |
for alternate_object in alternate: |
314 |
yield alternate_object
|
315 |
|
316 |
def _iter_loose_objects(self): |
317 |
"""Iterate over the SHAs of all loose objects."""
|
318 |
raise NotImplementedError(self._iter_loose_objects) |
319 |
|
320 |
def _get_loose_object(self, sha): |
321 |
raise NotImplementedError(self._get_loose_object) |
322 |
|
323 |
def _remove_loose_object(self, sha): |
324 |
raise NotImplementedError(self._remove_loose_object) |
325 |
|
326 |
def pack_loose_objects(self): |
327 |
"""Pack loose objects.
|
328 |
|
329 |
:return: Number of objects packed
|
330 |
"""
|
331 |
objects = set()
|
332 |
for sha in self._iter_loose_objects(): |
333 |
objects.add((self._get_loose_object(sha), None)) |
334 |
self.add_objects(list(objects)) |
335 |
for obj, path in objects: |
336 |
self._remove_loose_object(obj.id)
|
337 |
return len(objects) |
338 |
|
339 |
def __iter__(self): |
340 |
"""Iterate over the SHAs that are present in this store."""
|
341 |
iterables = list(self.packs) + [self._iter_loose_objects()] + [self._iter_alternate_objects()] |
342 |
return chain(*iterables)
|
343 |
|
344 |
def contains_loose(self, sha): |
345 |
"""Check if a particular object is present by SHA1 and is loose.
|
346 |
|
347 |
This does not check alternates.
|
348 |
"""
|
349 |
return self._get_loose_object(sha) is not None |
350 |
|
351 |
def get_raw(self, name): |
352 |
"""Obtain the raw text for an object.
|
353 |
|
354 |
:param name: sha for the object.
|
355 |
:return: tuple with numeric type and object contents.
|
356 |
"""
|
357 |
if len(name) == 40: |
358 |
sha = hex_to_sha(name) |
359 |
hexsha = name |
360 |
elif len(name) == 20: |
361 |
sha = name |
362 |
hexsha = None
|
363 |
else:
|
364 |
raise AssertionError("Invalid object name %r" % name) |
365 |
for pack in self.packs: |
366 |
try:
|
367 |
return pack.get_raw(sha)
|
368 |
except KeyError: |
369 |
pass
|
370 |
if hexsha is None: |
371 |
hexsha = sha_to_hex(name) |
372 |
ret = self._get_loose_object(hexsha)
|
373 |
if ret is not None: |
374 |
return ret.type_num, ret.as_raw_string()
|
375 |
for alternate in self.alternates: |
376 |
try:
|
377 |
return alternate.get_raw(hexsha)
|
378 |
except KeyError: |
379 |
pass
|
380 |
raise KeyError(hexsha) |
381 |
|
382 |
def add_objects(self, objects): |
383 |
"""Add a set of objects to this object store.
|
384 |
|
385 |
:param objects: Iterable over (object, path) tuples, should support
|
386 |
__len__.
|
387 |
:return: Pack object of the objects written.
|
388 |
"""
|
389 |
if len(objects) == 0: |
390 |
# Don't bother writing an empty pack file
|
391 |
return
|
392 |
f, commit, abort = self.add_pack()
|
393 |
try:
|
394 |
write_pack_objects(f, objects) |
395 |
except:
|
396 |
abort() |
397 |
raise
|
398 |
else:
|
399 |
return commit()
|
400 |
|
401 |
|
402 |
class DiskObjectStore(PackBasedObjectStore): |
403 |
"""Git-style object store that exists on disk."""
|
404 |
|
405 |
def __init__(self, path): |
406 |
"""Open an object store.
|
407 |
|
408 |
:param path: Path of the object store.
|
409 |
"""
|
410 |
super(DiskObjectStore, self).__init__() |
411 |
self.path = path
|
412 |
self.pack_dir = os.path.join(self.path, PACKDIR) |
413 |
self._pack_cache_time = 0 |
414 |
self._pack_cache = {}
|
415 |
self._alternates = None |
416 |
|
417 |
def __repr__(self): |
418 |
return "<%s(%r)>" % (self.__class__.__name__, self.path) |
419 |
|
420 |
@property
|
421 |
def alternates(self): |
422 |
if self._alternates is not None: |
423 |
return self._alternates |
424 |
self._alternates = []
|
425 |
for path in self._read_alternate_paths(): |
426 |
self._alternates.append(DiskObjectStore(path))
|
427 |
return self._alternates |
428 |
|
429 |
def _read_alternate_paths(self): |
430 |
try:
|
431 |
f = GitFile(os.path.join(self.path, INFODIR, "alternates"), |
432 |
'rb')
|
433 |
except (OSError, IOError) as e: |
434 |
if e.errno == errno.ENOENT:
|
435 |
return
|
436 |
raise
|
437 |
with f:
|
438 |
for l in f.readlines(): |
439 |
l = l.rstrip(b"\n")
|
440 |
if l[0] == b"#": |
441 |
continue
|
442 |
if os.path.isabs(l):
|
443 |
yield l.decode(sys.getfilesystemencoding())
|
444 |
else:
|
445 |
yield os.path.join(self.path, l).decode(sys.getfilesystemencoding()) |
446 |
|
447 |
def add_alternate_path(self, path): |
448 |
"""Add an alternate path to this object store.
|
449 |
"""
|
450 |
try:
|
451 |
os.mkdir(os.path.join(self.path, INFODIR))
|
452 |
except OSError as e: |
453 |
if e.errno != errno.EEXIST:
|
454 |
raise
|
455 |
alternates_path = os.path.join(self.path, INFODIR, "alternates") |
456 |
with GitFile(alternates_path, 'wb') as f: |
457 |
try:
|
458 |
orig_f = open(alternates_path, 'rb') |
459 |
except (OSError, IOError) as e: |
460 |
if e.errno != errno.ENOENT:
|
461 |
raise
|
462 |
else:
|
463 |
with orig_f:
|
464 |
f.write(orig_f.read()) |
465 |
f.write(path.encode(sys.getfilesystemencoding()) + b"\n")
|
466 |
|
467 |
if not os.path.isabs(path): |
468 |
path = os.path.join(self.path, path)
|
469 |
self.alternates.append(DiskObjectStore(path))
|
470 |
|
471 |
def _update_pack_cache(self): |
472 |
try:
|
473 |
pack_dir_contents = os.listdir(self.pack_dir)
|
474 |
except OSError as e: |
475 |
if e.errno == errno.ENOENT:
|
476 |
self._pack_cache_time = 0 |
477 |
self.close()
|
478 |
return
|
479 |
raise
|
480 |
self._pack_cache_time = os.stat(self.pack_dir).st_mtime |
481 |
pack_files = set()
|
482 |
for name in pack_dir_contents: |
483 |
assert isinstance(name, basestring if sys.version_info[0] == 2 else str) |
484 |
if name.startswith("pack-") and name.endswith(".pack"): |
485 |
# verify that idx exists first (otherwise the pack was not yet fully written)
|
486 |
idx_name = os.path.splitext(name)[0] + ".idx" |
487 |
if idx_name in pack_dir_contents: |
488 |
pack_name = name[:-len(".pack")] |
489 |
pack_files.add(pack_name) |
490 |
|
491 |
# Open newly appeared pack files
|
492 |
for f in pack_files: |
493 |
if f not in self._pack_cache: |
494 |
self._pack_cache[f] = Pack(os.path.join(self.pack_dir, f)) |
495 |
# Remove disappeared pack files
|
496 |
for f in set(self._pack_cache) - pack_files: |
497 |
self._pack_cache.pop(f).close()
|
498 |
|
499 |
def _pack_cache_stale(self): |
500 |
try:
|
501 |
return os.stat(self.pack_dir).st_mtime > self._pack_cache_time |
502 |
except OSError as e: |
503 |
if e.errno == errno.ENOENT:
|
504 |
return True |
505 |
raise
|
506 |
|
507 |
def _get_shafile_path(self, sha): |
508 |
# Check from object dir
|
509 |
return hex_to_filename(self.path, sha) |
510 |
|
511 |
def _iter_loose_objects(self): |
512 |
for base in os.listdir(self.path): |
513 |
if len(base) != 2: |
514 |
continue
|
515 |
for rest in os.listdir(os.path.join(self.path, base)): |
516 |
yield (base+rest).encode(sys.getfilesystemencoding())
|
517 |
|
518 |
def _get_loose_object(self, sha): |
519 |
path = self._get_shafile_path(sha)
|
520 |
try:
|
521 |
return ShaFile.from_path(path)
|
522 |
except (OSError, IOError) as e: |
523 |
if e.errno == errno.ENOENT:
|
524 |
return None |
525 |
raise
|
526 |
|
527 |
def _remove_loose_object(self, sha): |
528 |
os.remove(self._get_shafile_path(sha))
|
529 |
|
530 |
def _get_pack_basepath(self, entries): |
531 |
suffix = iter_sha1(entry[0] for entry in entries) |
532 |
# TODO: Handle self.pack_dir being bytes
|
533 |
suffix = suffix.decode('ascii')
|
534 |
return os.path.join(self.pack_dir, "pack-" + suffix) |
535 |
|
536 |
def _complete_thin_pack(self, f, path, copier, indexer): |
537 |
"""Move a specific file containing a pack into the pack directory.
|
538 |
|
539 |
:note: The file should be on the same file system as the
|
540 |
packs directory.
|
541 |
|
542 |
:param f: Open file object for the pack.
|
543 |
:param path: Path to the pack file.
|
544 |
:param copier: A PackStreamCopier to use for writing pack data.
|
545 |
:param indexer: A PackIndexer for indexing the pack.
|
546 |
"""
|
547 |
entries = list(indexer)
|
548 |
|
549 |
# Update the header with the new number of objects.
|
550 |
f.seek(0)
|
551 |
write_pack_header(f, len(entries) + len(indexer.ext_refs())) |
552 |
|
553 |
# Must flush before reading (http://bugs.python.org/issue3207)
|
554 |
f.flush() |
555 |
|
556 |
# Rescan the rest of the pack, computing the SHA with the new header.
|
557 |
new_sha = compute_file_sha(f, end_ofs=-20)
|
558 |
|
559 |
# Must reposition before writing (http://bugs.python.org/issue3207)
|
560 |
f.seek(0, os.SEEK_CUR)
|
561 |
|
562 |
# Complete the pack.
|
563 |
for ext_sha in indexer.ext_refs(): |
564 |
assert len(ext_sha) == 20 |
565 |
type_num, data = self.get_raw(ext_sha)
|
566 |
offset = f.tell() |
567 |
crc32 = write_pack_object(f, type_num, data, sha=new_sha) |
568 |
entries.append((ext_sha, offset, crc32)) |
569 |
pack_sha = new_sha.digest() |
570 |
f.write(pack_sha) |
571 |
f.close() |
572 |
|
573 |
# Move the pack in.
|
574 |
entries.sort() |
575 |
pack_base_name = self._get_pack_basepath(entries)
|
576 |
if sys.platform == 'win32': |
577 |
try:
|
578 |
os.rename(path, pack_base_name + '.pack')
|
579 |
except WindowsError:
|
580 |
os.remove(pack_base_name + '.pack')
|
581 |
os.rename(path, pack_base_name + '.pack')
|
582 |
else:
|
583 |
os.rename(path, pack_base_name + '.pack')
|
584 |
|
585 |
# Write the index.
|
586 |
index_file = GitFile(pack_base_name + '.idx', 'wb') |
587 |
try:
|
588 |
write_pack_index_v2(index_file, entries, pack_sha) |
589 |
index_file.close() |
590 |
finally:
|
591 |
index_file.abort() |
592 |
|
593 |
# Add the pack to the store and return it.
|
594 |
final_pack = Pack(pack_base_name) |
595 |
final_pack.check_length_and_checksum() |
596 |
self._add_known_pack(pack_base_name, final_pack)
|
597 |
return final_pack
|
598 |
|
599 |
def add_thin_pack(self, read_all, read_some): |
600 |
"""Add a new thin pack to this object store.
|
601 |
|
602 |
Thin packs are packs that contain deltas with parents that exist outside
|
603 |
the pack. They should never be placed in the object store directly, and
|
604 |
always indexed and completed as they are copied.
|
605 |
|
606 |
:param read_all: Read function that blocks until the number of requested
|
607 |
bytes are read.
|
608 |
:param read_some: Read function that returns at least one byte, but may
|
609 |
not return the number of bytes requested.
|
610 |
:return: A Pack object pointing at the now-completed thin pack in the
|
611 |
objects/pack directory.
|
612 |
"""
|
613 |
fd, path = tempfile.mkstemp(dir=self.path, prefix='tmp_pack_') |
614 |
with os.fdopen(fd, 'w+b') as f: |
615 |
indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
|
616 |
copier = PackStreamCopier(read_all, read_some, f, |
617 |
delta_iter=indexer) |
618 |
copier.verify() |
619 |
return self._complete_thin_pack(f, path, copier, indexer) |
620 |
|
621 |
def move_in_pack(self, path): |
622 |
"""Move a specific file containing a pack into the pack directory.
|
623 |
|
624 |
:note: The file should be on the same file system as the
|
625 |
packs directory.
|
626 |
|
627 |
:param path: Path to the pack file.
|
628 |
"""
|
629 |
with PackData(path) as p: |
630 |
entries = p.sorted_entries() |
631 |
basename = self._get_pack_basepath(entries)
|
632 |
with GitFile(basename+".idx", "wb") as f: |
633 |
write_pack_index_v2(f, entries, p.get_stored_checksum()) |
634 |
os.rename(path, basename + ".pack")
|
635 |
final_pack = Pack(basename) |
636 |
self._add_known_pack(basename, final_pack)
|
637 |
return final_pack
|
638 |
|
639 |
def add_pack(self): |
640 |
"""Add a new pack to this object store.
|
641 |
|
642 |
:return: Fileobject to write to, a commit function to
|
643 |
call when the pack is finished and an abort
|
644 |
function.
|
645 |
"""
|
646 |
fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack") |
647 |
f = os.fdopen(fd, 'wb')
|
648 |
def commit(): |
649 |
os.fsync(fd) |
650 |
f.close() |
651 |
if os.path.getsize(path) > 0: |
652 |
return self.move_in_pack(path) |
653 |
else:
|
654 |
os.remove(path) |
655 |
return None |
656 |
def abort(): |
657 |
f.close() |
658 |
os.remove(path) |
659 |
return f, commit, abort
|
660 |
|
661 |
def add_object(self, obj): |
662 |
"""Add a single object to this object store.
|
663 |
|
664 |
:param obj: Object to add
|
665 |
"""
|
666 |
path = self._get_shafile_path(obj.id)
|
667 |
dir = os.path.dirname(path) |
668 |
try:
|
669 |
os.mkdir(dir)
|
670 |
except OSError as e: |
671 |
if e.errno != errno.EEXIST:
|
672 |
raise
|
673 |
if os.path.exists(path):
|
674 |
return # Already there, no need to write again |
675 |
with GitFile(path, 'wb') as f: |
676 |
f.write(obj.as_legacy_object()) |
677 |
|
678 |
@classmethod
|
679 |
def init(cls, path): |
680 |
try:
|
681 |
os.mkdir(path) |
682 |
except OSError as e: |
683 |
if e.errno != errno.EEXIST:
|
684 |
raise
|
685 |
os.mkdir(os.path.join(path, "info"))
|
686 |
os.mkdir(os.path.join(path, PACKDIR)) |
687 |
return cls(path)
|
688 |
|
689 |
|
690 |
class MemoryObjectStore(BaseObjectStore): |
691 |
"""Object store that keeps all objects in memory."""
|
692 |
|
693 |
def __init__(self): |
694 |
super(MemoryObjectStore, self).__init__() |
695 |
self._data = {}
|
696 |
|
697 |
def _to_hexsha(self, sha): |
698 |
if len(sha) == 40: |
699 |
return sha
|
700 |
elif len(sha) == 20: |
701 |
return sha_to_hex(sha)
|
702 |
else:
|
703 |
raise ValueError("Invalid sha %r" % (sha,)) |
704 |
|
705 |
def contains_loose(self, sha): |
706 |
"""Check if a particular object is present by SHA1 and is loose."""
|
707 |
return self._to_hexsha(sha) in self._data |
708 |
|
709 |
def contains_packed(self, sha): |
710 |
"""Check if a particular object is present by SHA1 and is packed."""
|
711 |
return False |
712 |
|
713 |
def __iter__(self): |
714 |
"""Iterate over the SHAs that are present in this store."""
|
715 |
return iter(self._data.keys()) |
716 |
|
717 |
@property
|
718 |
def packs(self): |
719 |
"""List with pack objects."""
|
720 |
return []
|
721 |
|
722 |
def get_raw(self, name): |
723 |
"""Obtain the raw text for an object.
|
724 |
|
725 |
:param name: sha for the object.
|
726 |
:return: tuple with numeric type and object contents.
|
727 |
"""
|
728 |
obj = self[self._to_hexsha(name)] |
729 |
return obj.type_num, obj.as_raw_string()
|
730 |
|
731 |
def __getitem__(self, name): |
732 |
return self._data[self._to_hexsha(name)].copy() |
733 |
|
734 |
def __delitem__(self, name): |
735 |
"""Delete an object from this store, for testing only."""
|
736 |
del self._data[self._to_hexsha(name)] |
737 |
|
738 |
def add_object(self, obj): |
739 |
"""Add a single object to this object store.
|
740 |
|
741 |
"""
|
742 |
self._data[obj.id] = obj.copy()
|
743 |
|
744 |
def add_objects(self, objects): |
745 |
"""Add a set of objects to this object store.
|
746 |
|
747 |
:param objects: Iterable over a list of (object, path) tuples
|
748 |
"""
|
749 |
for obj, path in objects: |
750 |
self.add_object(obj)
|
751 |
|
752 |
def add_pack(self): |
753 |
"""Add a new pack to this object store.
|
754 |
|
755 |
Because this object store doesn't support packs, we extract and add the
|
756 |
individual objects.
|
757 |
|
758 |
:return: Fileobject to write to and a commit function to
|
759 |
call when the pack is finished.
|
760 |
"""
|
761 |
f = BytesIO() |
762 |
def commit(): |
763 |
p = PackData.from_file(BytesIO(f.getvalue()), f.tell()) |
764 |
f.close() |
765 |
for obj in PackInflater.for_pack_data(p, self.get_raw): |
766 |
self.add_object(obj)
|
767 |
def abort(): |
768 |
pass
|
769 |
return f, commit, abort
|
770 |
|
771 |
def _complete_thin_pack(self, f, indexer): |
772 |
"""Complete a thin pack by adding external references.
|
773 |
|
774 |
:param f: Open file object for the pack.
|
775 |
:param indexer: A PackIndexer for indexing the pack.
|
776 |
"""
|
777 |
entries = list(indexer)
|
778 |
|
779 |
# Update the header with the new number of objects.
|
780 |
f.seek(0)
|
781 |
write_pack_header(f, len(entries) + len(indexer.ext_refs())) |
782 |
|
783 |
# Rescan the rest of the pack, computing the SHA with the new header.
|
784 |
new_sha = compute_file_sha(f, end_ofs=-20)
|
785 |
|
786 |
# Complete the pack.
|
787 |
for ext_sha in indexer.ext_refs(): |
788 |
assert len(ext_sha) == 20 |
789 |
type_num, data = self.get_raw(ext_sha)
|
790 |
write_pack_object(f, type_num, data, sha=new_sha) |
791 |
pack_sha = new_sha.digest() |
792 |
f.write(pack_sha) |
793 |
|
794 |
def add_thin_pack(self, read_all, read_some): |
795 |
"""Add a new thin pack to this object store.
|
796 |
|
797 |
Thin packs are packs that contain deltas with parents that exist outside
|
798 |
the pack. Because this object store doesn't support packs, we extract
|
799 |
and add the individual objects.
|
800 |
|
801 |
:param read_all: Read function that blocks until the number of requested
|
802 |
bytes are read.
|
803 |
:param read_some: Read function that returns at least one byte, but may
|
804 |
not return the number of bytes requested.
|
805 |
"""
|
806 |
f, commit, abort = self.add_pack()
|
807 |
try:
|
808 |
indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
|
809 |
copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) |
810 |
copier.verify() |
811 |
self._complete_thin_pack(f, indexer)
|
812 |
except:
|
813 |
abort() |
814 |
raise
|
815 |
else:
|
816 |
commit() |
817 |
|
818 |
|
819 |
class ObjectImporter(object): |
820 |
"""Interface for importing objects."""
|
821 |
|
822 |
def __init__(self, count): |
823 |
"""Create a new ObjectImporter.
|
824 |
|
825 |
:param count: Number of objects that's going to be imported.
|
826 |
"""
|
827 |
self.count = count
|
828 |
|
829 |
def add_object(self, object): |
830 |
"""Add an object."""
|
831 |
raise NotImplementedError(self.add_object) |
832 |
|
833 |
def finish(self, object): |
834 |
"""Finish the import and write objects to disk."""
|
835 |
raise NotImplementedError(self.finish) |
836 |
|
837 |
|
838 |
class ObjectIterator(object): |
839 |
"""Interface for iterating over objects."""
|
840 |
|
841 |
def iterobjects(self): |
842 |
raise NotImplementedError(self.iterobjects) |
843 |
|
844 |
|
845 |
class ObjectStoreIterator(ObjectIterator): |
846 |
"""ObjectIterator that works on top of an ObjectStore."""
|
847 |
|
848 |
def __init__(self, store, sha_iter): |
849 |
"""Create a new ObjectIterator.
|
850 |
|
851 |
:param store: Object store to retrieve from
|
852 |
:param sha_iter: Iterator over (sha, path) tuples
|
853 |
"""
|
854 |
self.store = store
|
855 |
self.sha_iter = sha_iter
|
856 |
self._shas = []
|
857 |
|
858 |
def __iter__(self): |
859 |
"""Yield tuple with next object and path."""
|
860 |
for sha, path in self.itershas(): |
861 |
yield self.store[sha], path |
862 |
|
863 |
def iterobjects(self): |
864 |
"""Iterate over just the objects."""
|
865 |
for o, path in self: |
866 |
yield o
|
867 |
|
868 |
def itershas(self): |
869 |
"""Iterate over the SHAs."""
|
870 |
for sha in self._shas: |
871 |
yield sha
|
872 |
for sha in self.sha_iter: |
873 |
self._shas.append(sha)
|
874 |
yield sha
|
875 |
|
876 |
def __contains__(self, needle): |
877 |
"""Check if an object is present.
|
878 |
|
879 |
:note: This checks if the object is present in
|
880 |
the underlying object store, not if it would
|
881 |
be yielded by the iterator.
|
882 |
|
883 |
:param needle: SHA1 of the object to check for
|
884 |
"""
|
885 |
return needle in self.store |
886 |
|
887 |
def __getitem__(self, key): |
888 |
"""Find an object by SHA1.
|
889 |
|
890 |
:note: This retrieves the object from the underlying
|
891 |
object store. It will also succeed if the object would
|
892 |
not be returned by the iterator.
|
893 |
"""
|
894 |
return self.store[key] |
895 |
|
896 |
def __len__(self): |
897 |
"""Return the number of objects."""
|
898 |
return len(list(self.itershas())) |
899 |
|
900 |
|
901 |
def tree_lookup_path(lookup_obj, root_sha, path): |
902 |
"""Look up an object in a Git tree.
|
903 |
|
904 |
:param lookup_obj: Callback for retrieving object by SHA1
|
905 |
:param root_sha: SHA1 of the root tree
|
906 |
:param path: Path to lookup
|
907 |
:return: A tuple of (mode, SHA) of the resulting path.
|
908 |
"""
|
909 |
tree = lookup_obj(root_sha) |
910 |
if not isinstance(tree, Tree): |
911 |
raise NotTreeError(root_sha)
|
912 |
return tree.lookup_path(lookup_obj, path)
|
913 |
|
914 |
|
915 |
def _collect_filetree_revs(obj_store, tree_sha, kset): |
916 |
"""Collect SHA1s of files and directories for specified tree.
|
917 |
|
918 |
:param obj_store: Object store to get objects by SHA from
|
919 |
:param tree_sha: tree reference to walk
|
920 |
:param kset: set to fill with references to files and directories
|
921 |
"""
|
922 |
filetree = obj_store[tree_sha] |
923 |
for name, mode, sha in filetree.iteritems(): |
924 |
if not S_ISGITLINK(mode) and sha not in kset: |
925 |
kset.add(sha) |
926 |
if stat.S_ISDIR(mode):
|
927 |
_collect_filetree_revs(obj_store, sha, kset) |
928 |
|
929 |
|
930 |
def _split_commits_and_tags(obj_store, lst, ignore_unknown=False): |
931 |
"""Split object id list into three lists with commit, tag, and other SHAs.
|
932 |
|
933 |
Commits referenced by tags are included into commits
|
934 |
list as well. Only SHA1s known in this repository will get
|
935 |
through, and unless ignore_unknown argument is True, KeyError
|
936 |
is thrown for SHA1 missing in the repository
|
937 |
|
938 |
:param obj_store: Object store to get objects by SHA1 from
|
939 |
:param lst: Collection of commit and tag SHAs
|
940 |
:param ignore_unknown: True to skip SHA1 missing in the repository
|
941 |
silently.
|
942 |
:return: A tuple of (commits, tags, others) SHA1s
|
943 |
"""
|
944 |
commits = set()
|
945 |
tags = set()
|
946 |
others = set()
|
947 |
for e in lst: |
948 |
try:
|
949 |
o = obj_store[e] |
950 |
except KeyError: |
951 |
if not ignore_unknown: |
952 |
raise
|
953 |
else:
|
954 |
if isinstance(o, Commit): |
955 |
commits.add(e) |
956 |
elif isinstance(o, Tag): |
957 |
tags.add(e) |
958 |
tagged = o.object[1]
|
959 |
c, t, o = _split_commits_and_tags( |
960 |
obj_store, [tagged], ignore_unknown=ignore_unknown) |
961 |
commits |= c |
962 |
tags |= t |
963 |
others |= o |
964 |
else:
|
965 |
others.add(e) |
966 |
return (commits, tags, others)
|
967 |
|
968 |
|
969 |
class MissingObjectFinder(object): |
970 |
"""Find the objects missing from another object store.
|
971 |
|
972 |
:param object_store: Object store containing at least all objects to be
|
973 |
sent
|
974 |
:param haves: SHA1s of commits not to send (already present in target)
|
975 |
:param wants: SHA1s of commits to send
|
976 |
:param progress: Optional function to report progress to.
|
977 |
:param get_tagged: Function that returns a dict of pointed-to sha -> tag
|
978 |
sha for including tags.
|
979 |
:param get_parents: Optional function for getting the parents of a commit.
|
980 |
:param tagged: dict of pointed-to sha -> tag sha for including tags
|
981 |
"""
|
982 |
|
983 |
def __init__(self, object_store, haves, wants, progress=None, |
984 |
get_tagged=None, get_parents=lambda commit: commit.parents): |
985 |
self.object_store = object_store
|
986 |
self._get_parents = get_parents
|
987 |
# process Commits and Tags differently
|
988 |
# Note, while haves may list commits/tags not available locally,
|
989 |
# and such SHAs would get filtered out by _split_commits_and_tags,
|
990 |
# wants shall list only known SHAs, and otherwise
|
991 |
# _split_commits_and_tags fails with KeyError
|
992 |
have_commits, have_tags, have_others = ( |
993 |
_split_commits_and_tags(object_store, haves, True))
|
994 |
want_commits, want_tags, want_others = ( |
995 |
_split_commits_and_tags(object_store, wants, False))
|
996 |
# all_ancestors is a set of commits that shall not be sent
|
997 |
# (complete repository up to 'haves')
|
998 |
all_ancestors = object_store._collect_ancestors( |
999 |
have_commits, get_parents=self._get_parents)[0] |
1000 |
# all_missing - complete set of commits between haves and wants
|
1001 |
# common - commits from all_ancestors we hit into while
|
1002 |
# traversing parent hierarchy of wants
|
1003 |
missing_commits, common_commits = object_store._collect_ancestors( |
1004 |
want_commits, all_ancestors, get_parents=self._get_parents)
|
1005 |
self.sha_done = set() |
1006 |
# Now, fill sha_done with commits and revisions of
|
1007 |
# files and directories known to be both locally
|
1008 |
# and on target. Thus these commits and files
|
1009 |
# won't get selected for fetch
|
1010 |
for h in common_commits: |
1011 |
self.sha_done.add(h)
|
1012 |
cmt = object_store[h] |
1013 |
_collect_filetree_revs(object_store, cmt.tree, self.sha_done)
|
1014 |
# record tags we have as visited, too
|
1015 |
for t in have_tags: |
1016 |
self.sha_done.add(t)
|
1017 |
|
1018 |
missing_tags = want_tags.difference(have_tags) |
1019 |
missing_others = want_others.difference(have_others) |
1020 |
# in fact, what we 'want' is commits, tags, and others
|
1021 |
# we've found missing
|
1022 |
wants = missing_commits.union(missing_tags) |
1023 |
wants = wants.union(missing_others) |
1024 |
|
1025 |
self.objects_to_send = set([(w, None, False) for w in wants]) |
1026 |
|
1027 |
if progress is None: |
1028 |
self.progress = lambda x: None |
1029 |
else:
|
1030 |
self.progress = progress
|
1031 |
self._tagged = get_tagged and get_tagged() or {} |
1032 |
|
1033 |
def add_todo(self, entries): |
1034 |
self.objects_to_send.update([e for e in entries |
1035 |
if not e[0] in self.sha_done]) |
1036 |
|
1037 |
def next(self): |
1038 |
while True: |
1039 |
if not self.objects_to_send: |
1040 |
return None |
1041 |
(sha, name, leaf) = self.objects_to_send.pop()
|
1042 |
if sha not in self.sha_done: |
1043 |
break
|
1044 |
if not leaf: |
1045 |
o = self.object_store[sha]
|
1046 |
if isinstance(o, Commit): |
1047 |
self.add_todo([(o.tree, "", False)]) |
1048 |
elif isinstance(o, Tree): |
1049 |
self.add_todo([(s, n, not stat.S_ISDIR(m)) |
1050 |
for n, m, s in o.iteritems() |
1051 |
if not S_ISGITLINK(m)]) |
1052 |
elif isinstance(o, Tag): |
1053 |
self.add_todo([(o.object[1], None, False)]) |
1054 |
if sha in self._tagged: |
1055 |
self.add_todo([(self._tagged[sha], None, True)]) |
1056 |
self.sha_done.add(sha)
|
1057 |
self.progress(("counting objects: %d\r" % len(self.sha_done)).encode('ascii')) |
1058 |
return (sha, name)
|
1059 |
|
1060 |
__next__ = next
|
1061 |
|
1062 |
|
1063 |
class ObjectStoreGraphWalker(object): |
1064 |
"""Graph walker that finds what commits are missing from an object store.
|
1065 |
|
1066 |
:ivar heads: Revisions without descendants in the local repo
|
1067 |
:ivar get_parents: Function to retrieve parents in the local repo
|
1068 |
"""
|
1069 |
|
1070 |
def __init__(self, local_heads, get_parents): |
1071 |
"""Create a new instance.
|
1072 |
|
1073 |
:param local_heads: Heads to start search with
|
1074 |
:param get_parents: Function for finding the parents of a SHA1.
|
1075 |
"""
|
1076 |
self.heads = set(local_heads) |
1077 |
self.get_parents = get_parents
|
1078 |
self.parents = {}
|
1079 |
|
1080 |
def ack(self, sha): |
1081 |
"""Ack that a revision and its ancestors are present in the source."""
|
1082 |
if len(sha) != 40: |
1083 |
raise ValueError("unexpected sha %r received" % sha) |
1084 |
ancestors = set([sha])
|
1085 |
|
1086 |
# stop if we run out of heads to remove
|
1087 |
while self.heads: |
1088 |
for a in ancestors: |
1089 |
if a in self.heads: |
1090 |
self.heads.remove(a)
|
1091 |
|
1092 |
# collect all ancestors
|
1093 |
new_ancestors = set()
|
1094 |
for a in ancestors: |
1095 |
ps = self.parents.get(a)
|
1096 |
if ps is not None: |
1097 |
new_ancestors.update(ps) |
1098 |
self.parents[a] = None |
1099 |
|
1100 |
# no more ancestors; stop
|
1101 |
if not new_ancestors: |
1102 |
break
|
1103 |
|
1104 |
ancestors = new_ancestors |
1105 |
|
1106 |
def next(self): |
1107 |
"""Iterate over ancestors of heads in the target."""
|
1108 |
if self.heads: |
1109 |
ret = self.heads.pop()
|
1110 |
ps = self.get_parents(ret)
|
1111 |
self.parents[ret] = ps
|
1112 |
self.heads.update([p for p in ps if not p in self.parents]) |
1113 |
return ret
|
1114 |
return None |
1115 |
|
1116 |
__next__ = next
|