Statistics
| Revision:

gvsig-scripting / org.gvsig.scripting / trunk / org.gvsig.scripting / org.gvsig.scripting.app / org.gvsig.scripting.app.mainplugin / src / main / resources-plugin / scripting / lib / dulwich / object_store.py @ 959

History | View | Annotate | Download (37.2 KB)

1
# object_store.py -- Object store for git objects
2
# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
3
#                         and others
4
#
5
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
6
# General Public License as public by the Free Software Foundation; version 2.0
7
# or (at your option) any later version. You can redistribute it and/or
8
# modify it under the terms of either of these two licenses.
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
#
16
# You should have received a copy of the licenses; if not, see
17
# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
18
# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
19
# License, Version 2.0.
20
#
21

    
22

    
23
"""Git object store interfaces and implementation."""
24

    
25

    
26
from io import BytesIO
27
import errno
28
from itertools import chain
29
import os
30
import stat
31
import sys
32
import tempfile
33

    
34
from dulwich.diff_tree import (
35
    tree_changes,
36
    walk_trees,
37
    )
38
from dulwich.errors import (
39
    NotTreeError,
40
    )
41
from dulwich.file import GitFile
42
from dulwich.objects import (
43
    Commit,
44
    ShaFile,
45
    Tag,
46
    Tree,
47
    ZERO_SHA,
48
    hex_to_sha,
49
    sha_to_hex,
50
    hex_to_filename,
51
    S_ISGITLINK,
52
    object_class,
53
    )
54
from dulwich.pack import (
55
    Pack,
56
    PackData,
57
    PackInflater,
58
    iter_sha1,
59
    write_pack_header,
60
    write_pack_index_v2,
61
    write_pack_object,
62
    write_pack_objects,
63
    compute_file_sha,
64
    PackIndexer,
65
    PackStreamCopier,
66
    )
67

    
68
INFODIR = 'info'
69
PACKDIR = 'pack'
70

    
71

    
72
class BaseObjectStore(object):
73
    """Object store interface."""
74

    
75
    def determine_wants_all(self, refs):
76
        return [sha for (ref, sha) in refs.items()
77
                if not sha in self and not ref.endswith(b"^{}") and
78
                   not sha == ZERO_SHA]
79

    
80
    def iter_shas(self, shas):
81
        """Iterate over the objects for the specified shas.
82

83
        :param shas: Iterable object with SHAs
84
        :return: Object iterator
85
        """
86
        return ObjectStoreIterator(self, shas)
87

    
88
    def contains_loose(self, sha):
89
        """Check if a particular object is present by SHA1 and is loose."""
90
        raise NotImplementedError(self.contains_loose)
91

    
92
    def contains_packed(self, sha):
93
        """Check if a particular object is present by SHA1 and is packed."""
94
        raise NotImplementedError(self.contains_packed)
95

    
96
    def __contains__(self, sha):
97
        """Check if a particular object is present by SHA1.
98

99
        This method makes no distinction between loose and packed objects.
100
        """
101
        return self.contains_packed(sha) or self.contains_loose(sha)
102

    
103
    @property
104
    def packs(self):
105
        """Iterable of pack objects."""
106
        raise NotImplementedError
107

    
108
    def get_raw(self, name):
109
        """Obtain the raw text for an object.
110

111
        :param name: sha for the object.
112
        :return: tuple with numeric type and object contents.
113
        """
114
        raise NotImplementedError(self.get_raw)
115

    
116
    def __getitem__(self, sha):
117
        """Obtain an object by SHA1."""
118
        type_num, uncomp = self.get_raw(sha)
119
        return ShaFile.from_raw_string(type_num, uncomp, sha=sha)
120

    
121
    def __iter__(self):
122
        """Iterate over the SHAs that are present in this store."""
123
        raise NotImplementedError(self.__iter__)
124

    
125
    def add_object(self, obj):
126
        """Add a single object to this object store.
127

128
        """
129
        raise NotImplementedError(self.add_object)
130

    
131
    def add_objects(self, objects):
132
        """Add a set of objects to this object store.
133

134
        :param objects: Iterable over a list of (object, path) tuples
135
        """
136
        raise NotImplementedError(self.add_objects)
137

    
138
    def tree_changes(self, source, target, want_unchanged=False):
139
        """Find the differences between the contents of two trees
140

141
        :param source: SHA1 of the source tree
142
        :param target: SHA1 of the target tree
143
        :param want_unchanged: Whether unchanged files should be reported
144
        :return: Iterator over tuples with
145
            (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
146
        """
147
        for change in tree_changes(self, source, target,
148
                                   want_unchanged=want_unchanged):
149
            yield ((change.old.path, change.new.path),
150
                   (change.old.mode, change.new.mode),
151
                   (change.old.sha, change.new.sha))
152

    
153
    def iter_tree_contents(self, tree_id, include_trees=False):
154
        """Iterate the contents of a tree and all subtrees.
155

156
        Iteration is depth-first pre-order, as in e.g. os.walk.
157

158
        :param tree_id: SHA1 of the tree.
159
        :param include_trees: If True, include tree objects in the iteration.
160
        :return: Iterator over TreeEntry namedtuples for all the objects in a
161
            tree.
162
        """
163
        for entry, _ in walk_trees(self, tree_id, None):
164
            if not stat.S_ISDIR(entry.mode) or include_trees:
165
                yield entry
166

    
167
    def find_missing_objects(self, haves, wants, progress=None,
168
                             get_tagged=None,
169
                             get_parents=lambda commit: commit.parents):
170
        """Find the missing objects required for a set of revisions.
171

172
        :param haves: Iterable over SHAs already in common.
173
        :param wants: Iterable over SHAs of objects to fetch.
174
        :param progress: Simple progress function that will be called with
175
            updated progress strings.
176
        :param get_tagged: Function that returns a dict of pointed-to sha -> tag
177
            sha for including tags.
178
        :param get_parents: Optional function for getting the parents of a commit.
179
        :return: Iterator over (sha, path) pairs.
180
        """
181
        finder = MissingObjectFinder(self, haves, wants, progress, get_tagged, get_parents=get_parents)
182
        return iter(finder.next, None)
183

    
184
    def find_common_revisions(self, graphwalker):
185
        """Find which revisions this store has in common using graphwalker.
186

187
        :param graphwalker: A graphwalker object.
188
        :return: List of SHAs that are in common
189
        """
190
        haves = []
191
        sha = next(graphwalker)
192
        while sha:
193
            if sha in self:
194
                haves.append(sha)
195
                graphwalker.ack(sha)
196
            sha = next(graphwalker)
197
        return haves
198

    
199
    def generate_pack_contents(self, have, want, progress=None):
200
        """Iterate over the contents of a pack file.
201

202
        :param have: List of SHA1s of objects that should not be sent
203
        :param want: List of SHA1s of objects that should be sent
204
        :param progress: Optional progress reporting method
205
        """
206
        return self.iter_shas(self.find_missing_objects(have, want, progress))
207

    
208
    def peel_sha(self, sha):
209
        """Peel all tags from a SHA.
210

211
        :param sha: The object SHA to peel.
212
        :return: The fully-peeled SHA1 of a tag object, after peeling all
213
            intermediate tags; if the original ref does not point to a tag, this
214
            will equal the original SHA1.
215
        """
216
        obj = self[sha]
217
        obj_class = object_class(obj.type_name)
218
        while obj_class is Tag:
219
            obj_class, sha = obj.object
220
            obj = self[sha]
221
        return obj
222

    
223
    def _collect_ancestors(self, heads, common=set(),
224
                           get_parents=lambda commit: commit.parents):
225
        """Collect all ancestors of heads up to (excluding) those in common.
226

227
        :param heads: commits to start from
228
        :param common: commits to end at, or empty set to walk repository
229
            completely
230
        :param get_parents: Optional function for getting the parents of a commit.
231
        :return: a tuple (A, B) where A - all commits reachable
232
            from heads but not present in common, B - common (shared) elements
233
            that are directly reachable from heads
234
        """
235
        bases = set()
236
        commits = set()
237
        queue = []
238
        queue.extend(heads)
239
        while queue:
240
            e = queue.pop(0)
241
            if e in common:
242
                bases.add(e)
243
            elif e not in commits:
244
                commits.add(e)
245
                cmt = self[e]
246
                queue.extend(get_parents(cmt))
247
        return (commits, bases)
248

    
249
    def close(self):
250
        """Close any files opened by this object store."""
251
        # Default implementation is a NO-OP
252

    
253

    
254
class PackBasedObjectStore(BaseObjectStore):
255

    
256
    def __init__(self):
257
        self._pack_cache = {}
258

    
259
    @property
260
    def alternates(self):
261
        return []
262

    
263
    def contains_packed(self, sha):
264
        """Check if a particular object is present by SHA1 and is packed.
265

266
        This does not check alternates.
267
        """
268
        for pack in self.packs:
269
            if sha in pack:
270
                return True
271
        return False
272

    
273
    def __contains__(self, sha):
274
        """Check if a particular object is present by SHA1.
275

276
        This method makes no distinction between loose and packed objects.
277
        """
278
        if self.contains_packed(sha) or self.contains_loose(sha):
279
            return True
280
        for alternate in self.alternates:
281
            if sha in alternate:
282
                return True
283
        return False
284

    
285
    def _pack_cache_stale(self):
286
        """Check whether the pack cache is stale."""
287
        raise NotImplementedError(self._pack_cache_stale)
288

    
289
    def _add_known_pack(self, base_name, pack):
290
        """Add a newly appeared pack to the cache by path.
291

292
        """
293
        self._pack_cache[base_name] = pack
294

    
295
    def close(self):
296
        pack_cache = self._pack_cache
297
        self._pack_cache = {}
298
        while pack_cache:
299
            (name, pack) = pack_cache.popitem()
300
            pack.close()
301

    
302
    @property
303
    def packs(self):
304
        """List with pack objects."""
305
        if self._pack_cache is None or self._pack_cache_stale():
306
            self._update_pack_cache()
307

    
308
        return self._pack_cache.values()
309

    
310
    def _iter_alternate_objects(self):
311
        """Iterate over the SHAs of all the objects in alternate stores."""
312
        for alternate in self.alternates:
313
            for alternate_object in alternate:
314
                yield alternate_object
315

    
316
    def _iter_loose_objects(self):
317
        """Iterate over the SHAs of all loose objects."""
318
        raise NotImplementedError(self._iter_loose_objects)
319

    
320
    def _get_loose_object(self, sha):
321
        raise NotImplementedError(self._get_loose_object)
322

    
323
    def _remove_loose_object(self, sha):
324
        raise NotImplementedError(self._remove_loose_object)
325

    
326
    def pack_loose_objects(self):
327
        """Pack loose objects.
328

329
        :return: Number of objects packed
330
        """
331
        objects = set()
332
        for sha in self._iter_loose_objects():
333
            objects.add((self._get_loose_object(sha), None))
334
        self.add_objects(list(objects))
335
        for obj, path in objects:
336
            self._remove_loose_object(obj.id)
337
        return len(objects)
338

    
339
    def __iter__(self):
340
        """Iterate over the SHAs that are present in this store."""
341
        iterables = list(self.packs) + [self._iter_loose_objects()] + [self._iter_alternate_objects()]
342
        return chain(*iterables)
343

    
344
    def contains_loose(self, sha):
345
        """Check if a particular object is present by SHA1 and is loose.
346

347
        This does not check alternates.
348
        """
349
        return self._get_loose_object(sha) is not None
350

    
351
    def get_raw(self, name):
352
        """Obtain the raw text for an object.
353

354
        :param name: sha for the object.
355
        :return: tuple with numeric type and object contents.
356
        """
357
        if len(name) == 40:
358
            sha = hex_to_sha(name)
359
            hexsha = name
360
        elif len(name) == 20:
361
            sha = name
362
            hexsha = None
363
        else:
364
            raise AssertionError("Invalid object name %r" % name)
365
        for pack in self.packs:
366
            try:
367
                return pack.get_raw(sha)
368
            except KeyError:
369
                pass
370
        if hexsha is None:
371
            hexsha = sha_to_hex(name)
372
        ret = self._get_loose_object(hexsha)
373
        if ret is not None:
374
            return ret.type_num, ret.as_raw_string()
375
        for alternate in self.alternates:
376
            try:
377
                return alternate.get_raw(hexsha)
378
            except KeyError:
379
                pass
380
        raise KeyError(hexsha)
381

    
382
    def add_objects(self, objects):
383
        """Add a set of objects to this object store.
384

385
        :param objects: Iterable over (object, path) tuples, should support
386
            __len__.
387
        :return: Pack object of the objects written.
388
        """
389
        if len(objects) == 0:
390
            # Don't bother writing an empty pack file
391
            return
392
        f, commit, abort = self.add_pack()
393
        try:
394
            write_pack_objects(f, objects)
395
        except:
396
            abort()
397
            raise
398
        else:
399
            return commit()
400

    
401

    
402
class DiskObjectStore(PackBasedObjectStore):
403
    """Git-style object store that exists on disk."""
404

    
405
    def __init__(self, path):
406
        """Open an object store.
407

408
        :param path: Path of the object store.
409
        """
410
        super(DiskObjectStore, self).__init__()
411
        self.path = path
412
        self.pack_dir = os.path.join(self.path, PACKDIR)
413
        self._pack_cache_time = 0
414
        self._pack_cache = {}
415
        self._alternates = None
416

    
417
    def __repr__(self):
418
        return "<%s(%r)>" % (self.__class__.__name__, self.path)
419

    
420
    @property
421
    def alternates(self):
422
        if self._alternates is not None:
423
            return self._alternates
424
        self._alternates = []
425
        for path in self._read_alternate_paths():
426
            self._alternates.append(DiskObjectStore(path))
427
        return self._alternates
428

    
429
    def _read_alternate_paths(self):
430
        try:
431
            f = GitFile(os.path.join(self.path, INFODIR, "alternates"),
432
                    'rb')
433
        except (OSError, IOError) as e:
434
            if e.errno == errno.ENOENT:
435
                return
436
            raise
437
        with f:
438
            for l in f.readlines():
439
                l = l.rstrip(b"\n")
440
                if l[0] == b"#":
441
                    continue
442
                if os.path.isabs(l):
443
                    yield l.decode(sys.getfilesystemencoding())
444
                else:
445
                    yield os.path.join(self.path, l).decode(sys.getfilesystemencoding())
446

    
447
    def add_alternate_path(self, path):
448
        """Add an alternate path to this object store.
449
        """
450
        try:
451
            os.mkdir(os.path.join(self.path, INFODIR))
452
        except OSError as e:
453
            if e.errno != errno.EEXIST:
454
                raise
455
        alternates_path = os.path.join(self.path, INFODIR, "alternates")
456
        with GitFile(alternates_path, 'wb') as f:
457
            try:
458
                orig_f = open(alternates_path, 'rb')
459
            except (OSError, IOError) as e:
460
                if e.errno != errno.ENOENT:
461
                    raise
462
            else:
463
                with orig_f:
464
                    f.write(orig_f.read())
465
            f.write(path.encode(sys.getfilesystemencoding()) + b"\n")
466

    
467
        if not os.path.isabs(path):
468
            path = os.path.join(self.path, path)
469
        self.alternates.append(DiskObjectStore(path))
470

    
471
    def _update_pack_cache(self):
472
        try:
473
            pack_dir_contents = os.listdir(self.pack_dir)
474
        except OSError as e:
475
            if e.errno == errno.ENOENT:
476
                self._pack_cache_time = 0
477
                self.close()
478
                return
479
            raise
480
        self._pack_cache_time = os.stat(self.pack_dir).st_mtime
481
        pack_files = set()
482
        for name in pack_dir_contents:
483
            assert isinstance(name, basestring if sys.version_info[0] == 2 else str)
484
            if name.startswith("pack-") and name.endswith(".pack"):
485
                # verify that idx exists first (otherwise the pack was not yet fully written)
486
                idx_name = os.path.splitext(name)[0] + ".idx"
487
                if idx_name in pack_dir_contents:
488
                    pack_name = name[:-len(".pack")]
489
                    pack_files.add(pack_name)
490

    
491
        # Open newly appeared pack files
492
        for f in pack_files:
493
            if f not in self._pack_cache:
494
                self._pack_cache[f] = Pack(os.path.join(self.pack_dir, f))
495
        # Remove disappeared pack files
496
        for f in set(self._pack_cache) - pack_files:
497
            self._pack_cache.pop(f).close()
498

    
499
    def _pack_cache_stale(self):
500
        try:
501
            return os.stat(self.pack_dir).st_mtime > self._pack_cache_time
502
        except OSError as e:
503
            if e.errno == errno.ENOENT:
504
                return True
505
            raise
506

    
507
    def _get_shafile_path(self, sha):
508
        # Check from object dir
509
        return hex_to_filename(self.path, sha)
510

    
511
    def _iter_loose_objects(self):
512
        for base in os.listdir(self.path):
513
            if len(base) != 2:
514
                continue
515
            for rest in os.listdir(os.path.join(self.path, base)):
516
                yield (base+rest).encode(sys.getfilesystemencoding())
517

    
518
    def _get_loose_object(self, sha):
519
        path = self._get_shafile_path(sha)
520
        try:
521
            return ShaFile.from_path(path)
522
        except (OSError, IOError) as e:
523
            if e.errno == errno.ENOENT:
524
                return None
525
            raise
526

    
527
    def _remove_loose_object(self, sha):
528
        os.remove(self._get_shafile_path(sha))
529

    
530
    def _get_pack_basepath(self, entries):
531
        suffix = iter_sha1(entry[0] for entry in entries)
532
        # TODO: Handle self.pack_dir being bytes
533
        suffix = suffix.decode('ascii')
534
        return os.path.join(self.pack_dir, "pack-" + suffix)
535

    
536
    def _complete_thin_pack(self, f, path, copier, indexer):
537
        """Move a specific file containing a pack into the pack directory.
538

539
        :note: The file should be on the same file system as the
540
            packs directory.
541

542
        :param f: Open file object for the pack.
543
        :param path: Path to the pack file.
544
        :param copier: A PackStreamCopier to use for writing pack data.
545
        :param indexer: A PackIndexer for indexing the pack.
546
        """
547
        entries = list(indexer)
548

    
549
        # Update the header with the new number of objects.
550
        f.seek(0)
551
        write_pack_header(f, len(entries) + len(indexer.ext_refs()))
552

    
553
        # Must flush before reading (http://bugs.python.org/issue3207)
554
        f.flush()
555

    
556
        # Rescan the rest of the pack, computing the SHA with the new header.
557
        new_sha = compute_file_sha(f, end_ofs=-20)
558

    
559
        # Must reposition before writing (http://bugs.python.org/issue3207)
560
        f.seek(0, os.SEEK_CUR)
561

    
562
        # Complete the pack.
563
        for ext_sha in indexer.ext_refs():
564
            assert len(ext_sha) == 20
565
            type_num, data = self.get_raw(ext_sha)
566
            offset = f.tell()
567
            crc32 = write_pack_object(f, type_num, data, sha=new_sha)
568
            entries.append((ext_sha, offset, crc32))
569
        pack_sha = new_sha.digest()
570
        f.write(pack_sha)
571
        f.close()
572

    
573
        # Move the pack in.
574
        entries.sort()
575
        pack_base_name = self._get_pack_basepath(entries)
576
        if sys.platform == 'win32':
577
            try:
578
                os.rename(path, pack_base_name + '.pack')
579
            except WindowsError:
580
                os.remove(pack_base_name + '.pack')
581
                os.rename(path, pack_base_name + '.pack')
582
        else:
583
            os.rename(path, pack_base_name + '.pack')
584

    
585
        # Write the index.
586
        index_file = GitFile(pack_base_name + '.idx', 'wb')
587
        try:
588
            write_pack_index_v2(index_file, entries, pack_sha)
589
            index_file.close()
590
        finally:
591
            index_file.abort()
592

    
593
        # Add the pack to the store and return it.
594
        final_pack = Pack(pack_base_name)
595
        final_pack.check_length_and_checksum()
596
        self._add_known_pack(pack_base_name, final_pack)
597
        return final_pack
598

    
599
    def add_thin_pack(self, read_all, read_some):
600
        """Add a new thin pack to this object store.
601

602
        Thin packs are packs that contain deltas with parents that exist outside
603
        the pack. They should never be placed in the object store directly, and
604
        always indexed and completed as they are copied.
605

606
        :param read_all: Read function that blocks until the number of requested
607
            bytes are read.
608
        :param read_some: Read function that returns at least one byte, but may
609
            not return the number of bytes requested.
610
        :return: A Pack object pointing at the now-completed thin pack in the
611
            objects/pack directory.
612
        """
613
        fd, path = tempfile.mkstemp(dir=self.path, prefix='tmp_pack_')
614
        with os.fdopen(fd, 'w+b') as f:
615
            indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
616
            copier = PackStreamCopier(read_all, read_some, f,
617
                                      delta_iter=indexer)
618
            copier.verify()
619
            return self._complete_thin_pack(f, path, copier, indexer)
620

    
621
    def move_in_pack(self, path):
622
        """Move a specific file containing a pack into the pack directory.
623

624
        :note: The file should be on the same file system as the
625
            packs directory.
626

627
        :param path: Path to the pack file.
628
        """
629
        with PackData(path) as p:
630
            entries = p.sorted_entries()
631
            basename = self._get_pack_basepath(entries)
632
            with GitFile(basename+".idx", "wb") as f:
633
                write_pack_index_v2(f, entries, p.get_stored_checksum())
634
        os.rename(path, basename + ".pack")
635
        final_pack = Pack(basename)
636
        self._add_known_pack(basename, final_pack)
637
        return final_pack
638

    
639
    def add_pack(self):
640
        """Add a new pack to this object store.
641

642
        :return: Fileobject to write to, a commit function to
643
            call when the pack is finished and an abort
644
            function.
645
        """
646
        fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
647
        f = os.fdopen(fd, 'wb')
648
        def commit():
649
            os.fsync(fd)
650
            f.close()
651
            if os.path.getsize(path) > 0:
652
                return self.move_in_pack(path)
653
            else:
654
                os.remove(path)
655
                return None
656
        def abort():
657
            f.close()
658
            os.remove(path)
659
        return f, commit, abort
660

    
661
    def add_object(self, obj):
662
        """Add a single object to this object store.
663

664
        :param obj: Object to add
665
        """
666
        path = self._get_shafile_path(obj.id)
667
        dir = os.path.dirname(path)
668
        try:
669
            os.mkdir(dir)
670
        except OSError as e:
671
            if e.errno != errno.EEXIST:
672
                raise
673
        if os.path.exists(path):
674
            return # Already there, no need to write again
675
        with GitFile(path, 'wb') as f:
676
            f.write(obj.as_legacy_object())
677

    
678
    @classmethod
679
    def init(cls, path):
680
        try:
681
            os.mkdir(path)
682
        except OSError as e:
683
            if e.errno != errno.EEXIST:
684
                raise
685
        os.mkdir(os.path.join(path, "info"))
686
        os.mkdir(os.path.join(path, PACKDIR))
687
        return cls(path)
688

    
689

    
690
class MemoryObjectStore(BaseObjectStore):
691
    """Object store that keeps all objects in memory."""
692

    
693
    def __init__(self):
694
        super(MemoryObjectStore, self).__init__()
695
        self._data = {}
696

    
697
    def _to_hexsha(self, sha):
698
        if len(sha) == 40:
699
            return sha
700
        elif len(sha) == 20:
701
            return sha_to_hex(sha)
702
        else:
703
            raise ValueError("Invalid sha %r" % (sha,))
704

    
705
    def contains_loose(self, sha):
706
        """Check if a particular object is present by SHA1 and is loose."""
707
        return self._to_hexsha(sha) in self._data
708

    
709
    def contains_packed(self, sha):
710
        """Check if a particular object is present by SHA1 and is packed."""
711
        return False
712

    
713
    def __iter__(self):
714
        """Iterate over the SHAs that are present in this store."""
715
        return iter(self._data.keys())
716

    
717
    @property
718
    def packs(self):
719
        """List with pack objects."""
720
        return []
721

    
722
    def get_raw(self, name):
723
        """Obtain the raw text for an object.
724

725
        :param name: sha for the object.
726
        :return: tuple with numeric type and object contents.
727
        """
728
        obj = self[self._to_hexsha(name)]
729
        return obj.type_num, obj.as_raw_string()
730

    
731
    def __getitem__(self, name):
732
        return self._data[self._to_hexsha(name)].copy()
733

    
734
    def __delitem__(self, name):
735
        """Delete an object from this store, for testing only."""
736
        del self._data[self._to_hexsha(name)]
737

    
738
    def add_object(self, obj):
739
        """Add a single object to this object store.
740

741
        """
742
        self._data[obj.id] = obj.copy()
743

    
744
    def add_objects(self, objects):
745
        """Add a set of objects to this object store.
746

747
        :param objects: Iterable over a list of (object, path) tuples
748
        """
749
        for obj, path in objects:
750
            self.add_object(obj)
751

    
752
    def add_pack(self):
753
        """Add a new pack to this object store.
754

755
        Because this object store doesn't support packs, we extract and add the
756
        individual objects.
757

758
        :return: Fileobject to write to and a commit function to
759
            call when the pack is finished.
760
        """
761
        f = BytesIO()
762
        def commit():
763
            p = PackData.from_file(BytesIO(f.getvalue()), f.tell())
764
            f.close()
765
            for obj in PackInflater.for_pack_data(p, self.get_raw):
766
                self.add_object(obj)
767
        def abort():
768
            pass
769
        return f, commit, abort
770

    
771
    def _complete_thin_pack(self, f, indexer):
772
        """Complete a thin pack by adding external references.
773

774
        :param f: Open file object for the pack.
775
        :param indexer: A PackIndexer for indexing the pack.
776
        """
777
        entries = list(indexer)
778

    
779
        # Update the header with the new number of objects.
780
        f.seek(0)
781
        write_pack_header(f, len(entries) + len(indexer.ext_refs()))
782

    
783
        # Rescan the rest of the pack, computing the SHA with the new header.
784
        new_sha = compute_file_sha(f, end_ofs=-20)
785

    
786
        # Complete the pack.
787
        for ext_sha in indexer.ext_refs():
788
            assert len(ext_sha) == 20
789
            type_num, data = self.get_raw(ext_sha)
790
            write_pack_object(f, type_num, data, sha=new_sha)
791
        pack_sha = new_sha.digest()
792
        f.write(pack_sha)
793

    
794
    def add_thin_pack(self, read_all, read_some):
795
        """Add a new thin pack to this object store.
796

797
        Thin packs are packs that contain deltas with parents that exist outside
798
        the pack. Because this object store doesn't support packs, we extract
799
        and add the individual objects.
800

801
        :param read_all: Read function that blocks until the number of requested
802
            bytes are read.
803
        :param read_some: Read function that returns at least one byte, but may
804
            not return the number of bytes requested.
805
        """
806
        f, commit, abort = self.add_pack()
807
        try:
808
            indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
809
            copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer)
810
            copier.verify()
811
            self._complete_thin_pack(f, indexer)
812
        except:
813
            abort()
814
            raise
815
        else:
816
            commit()
817

    
818

    
819
class ObjectImporter(object):
820
    """Interface for importing objects."""
821

    
822
    def __init__(self, count):
823
        """Create a new ObjectImporter.
824

825
        :param count: Number of objects that's going to be imported.
826
        """
827
        self.count = count
828

    
829
    def add_object(self, object):
830
        """Add an object."""
831
        raise NotImplementedError(self.add_object)
832

    
833
    def finish(self, object):
834
        """Finish the import and write objects to disk."""
835
        raise NotImplementedError(self.finish)
836

    
837

    
838
class ObjectIterator(object):
839
    """Interface for iterating over objects."""
840

    
841
    def iterobjects(self):
842
        raise NotImplementedError(self.iterobjects)
843

    
844

    
845
class ObjectStoreIterator(ObjectIterator):
846
    """ObjectIterator that works on top of an ObjectStore."""
847

    
848
    def __init__(self, store, sha_iter):
849
        """Create a new ObjectIterator.
850

851
        :param store: Object store to retrieve from
852
        :param sha_iter: Iterator over (sha, path) tuples
853
        """
854
        self.store = store
855
        self.sha_iter = sha_iter
856
        self._shas = []
857

    
858
    def __iter__(self):
859
        """Yield tuple with next object and path."""
860
        for sha, path in self.itershas():
861
            yield self.store[sha], path
862

    
863
    def iterobjects(self):
864
        """Iterate over just the objects."""
865
        for o, path in self:
866
            yield o
867

    
868
    def itershas(self):
869
        """Iterate over the SHAs."""
870
        for sha in self._shas:
871
            yield sha
872
        for sha in self.sha_iter:
873
            self._shas.append(sha)
874
            yield sha
875

    
876
    def __contains__(self, needle):
877
        """Check if an object is present.
878

879
        :note: This checks if the object is present in
880
            the underlying object store, not if it would
881
            be yielded by the iterator.
882

883
        :param needle: SHA1 of the object to check for
884
        """
885
        return needle in self.store
886

    
887
    def __getitem__(self, key):
888
        """Find an object by SHA1.
889

890
        :note: This retrieves the object from the underlying
891
            object store. It will also succeed if the object would
892
            not be returned by the iterator.
893
        """
894
        return self.store[key]
895

    
896
    def __len__(self):
897
        """Return the number of objects."""
898
        return len(list(self.itershas()))
899

    
900

    
901
def tree_lookup_path(lookup_obj, root_sha, path):
902
    """Look up an object in a Git tree.
903

904
    :param lookup_obj: Callback for retrieving object by SHA1
905
    :param root_sha: SHA1 of the root tree
906
    :param path: Path to lookup
907
    :return: A tuple of (mode, SHA) of the resulting path.
908
    """
909
    tree = lookup_obj(root_sha)
910
    if not isinstance(tree, Tree):
911
        raise NotTreeError(root_sha)
912
    return tree.lookup_path(lookup_obj, path)
913

    
914

    
915
def _collect_filetree_revs(obj_store, tree_sha, kset):
916
    """Collect SHA1s of files and directories for specified tree.
917

918
    :param obj_store: Object store to get objects by SHA from
919
    :param tree_sha: tree reference to walk
920
    :param kset: set to fill with references to files and directories
921
    """
922
    filetree = obj_store[tree_sha]
923
    for name, mode, sha in filetree.iteritems():
924
        if not S_ISGITLINK(mode) and sha not in kset:
925
            kset.add(sha)
926
            if stat.S_ISDIR(mode):
927
                _collect_filetree_revs(obj_store, sha, kset)
928

    
929

    
930
def _split_commits_and_tags(obj_store, lst, ignore_unknown=False):
931
    """Split object id list into three lists with commit, tag, and other SHAs.
932

933
    Commits referenced by tags are included into commits
934
    list as well. Only SHA1s known in this repository will get
935
    through, and unless ignore_unknown argument is True, KeyError
936
    is thrown for SHA1 missing in the repository
937

938
    :param obj_store: Object store to get objects by SHA1 from
939
    :param lst: Collection of commit and tag SHAs
940
    :param ignore_unknown: True to skip SHA1 missing in the repository
941
        silently.
942
    :return: A tuple of (commits, tags, others) SHA1s
943
    """
944
    commits = set()
945
    tags = set()
946
    others = set()
947
    for e in lst:
948
        try:
949
            o = obj_store[e]
950
        except KeyError:
951
            if not ignore_unknown:
952
                raise
953
        else:
954
            if isinstance(o, Commit):
955
                commits.add(e)
956
            elif isinstance(o, Tag):
957
                tags.add(e)
958
                tagged = o.object[1]
959
                c, t, o = _split_commits_and_tags(
960
                    obj_store, [tagged], ignore_unknown=ignore_unknown)
961
                commits |= c
962
                tags |= t
963
                others |= o
964
            else:
965
                others.add(e)
966
    return (commits, tags, others)
967

    
968

    
969
class MissingObjectFinder(object):
970
    """Find the objects missing from another object store.
971

972
    :param object_store: Object store containing at least all objects to be
973
        sent
974
    :param haves: SHA1s of commits not to send (already present in target)
975
    :param wants: SHA1s of commits to send
976
    :param progress: Optional function to report progress to.
977
    :param get_tagged: Function that returns a dict of pointed-to sha -> tag
978
        sha for including tags.
979
    :param get_parents: Optional function for getting the parents of a commit.
980
    :param tagged: dict of pointed-to sha -> tag sha for including tags
981
    """
982

    
983
    def __init__(self, object_store, haves, wants, progress=None,
984
                 get_tagged=None, get_parents=lambda commit: commit.parents):
985
        self.object_store = object_store
986
        self._get_parents = get_parents
987
        # process Commits and Tags differently
988
        # Note, while haves may list commits/tags not available locally,
989
        # and such SHAs would get filtered out by _split_commits_and_tags,
990
        # wants shall list only known SHAs, and otherwise
991
        # _split_commits_and_tags fails with KeyError
992
        have_commits, have_tags, have_others = (
993
            _split_commits_and_tags(object_store, haves, True))
994
        want_commits, want_tags, want_others = (
995
            _split_commits_and_tags(object_store, wants, False))
996
        # all_ancestors is a set of commits that shall not be sent
997
        # (complete repository up to 'haves')
998
        all_ancestors = object_store._collect_ancestors(
999
            have_commits, get_parents=self._get_parents)[0]
1000
        # all_missing - complete set of commits between haves and wants
1001
        # common - commits from all_ancestors we hit into while
1002
        # traversing parent hierarchy of wants
1003
        missing_commits, common_commits = object_store._collect_ancestors(
1004
            want_commits, all_ancestors, get_parents=self._get_parents)
1005
        self.sha_done = set()
1006
        # Now, fill sha_done with commits and revisions of
1007
        # files and directories known to be both locally
1008
        # and on target. Thus these commits and files
1009
        # won't get selected for fetch
1010
        for h in common_commits:
1011
            self.sha_done.add(h)
1012
            cmt = object_store[h]
1013
            _collect_filetree_revs(object_store, cmt.tree, self.sha_done)
1014
        # record tags we have as visited, too
1015
        for t in have_tags:
1016
            self.sha_done.add(t)
1017

    
1018
        missing_tags = want_tags.difference(have_tags)
1019
        missing_others = want_others.difference(have_others)
1020
        # in fact, what we 'want' is commits, tags, and others
1021
        # we've found missing
1022
        wants = missing_commits.union(missing_tags)
1023
        wants = wants.union(missing_others)
1024

    
1025
        self.objects_to_send = set([(w, None, False) for w in wants])
1026

    
1027
        if progress is None:
1028
            self.progress = lambda x: None
1029
        else:
1030
            self.progress = progress
1031
        self._tagged = get_tagged and get_tagged() or {}
1032

    
1033
    def add_todo(self, entries):
1034
        self.objects_to_send.update([e for e in entries
1035
                                     if not e[0] in self.sha_done])
1036

    
1037
    def next(self):
1038
        while True:
1039
            if not self.objects_to_send:
1040
                return None
1041
            (sha, name, leaf) = self.objects_to_send.pop()
1042
            if sha not in self.sha_done:
1043
                break
1044
        if not leaf:
1045
            o = self.object_store[sha]
1046
            if isinstance(o, Commit):
1047
                self.add_todo([(o.tree, "", False)])
1048
            elif isinstance(o, Tree):
1049
                self.add_todo([(s, n, not stat.S_ISDIR(m))
1050
                               for n, m, s in o.iteritems()
1051
                               if not S_ISGITLINK(m)])
1052
            elif isinstance(o, Tag):
1053
                self.add_todo([(o.object[1], None, False)])
1054
        if sha in self._tagged:
1055
            self.add_todo([(self._tagged[sha], None, True)])
1056
        self.sha_done.add(sha)
1057
        self.progress(("counting objects: %d\r" % len(self.sha_done)).encode('ascii'))
1058
        return (sha, name)
1059

    
1060
    __next__ = next
1061

    
1062

    
1063
class ObjectStoreGraphWalker(object):
1064
    """Graph walker that finds what commits are missing from an object store.
1065

1066
    :ivar heads: Revisions without descendants in the local repo
1067
    :ivar get_parents: Function to retrieve parents in the local repo
1068
    """
1069

    
1070
    def __init__(self, local_heads, get_parents):
1071
        """Create a new instance.
1072

1073
        :param local_heads: Heads to start search with
1074
        :param get_parents: Function for finding the parents of a SHA1.
1075
        """
1076
        self.heads = set(local_heads)
1077
        self.get_parents = get_parents
1078
        self.parents = {}
1079

    
1080
    def ack(self, sha):
1081
        """Ack that a revision and its ancestors are present in the source."""
1082
        if len(sha) != 40:
1083
            raise ValueError("unexpected sha %r received" % sha)
1084
        ancestors = set([sha])
1085

    
1086
        # stop if we run out of heads to remove
1087
        while self.heads:
1088
            for a in ancestors:
1089
                if a in self.heads:
1090
                    self.heads.remove(a)
1091

    
1092
            # collect all ancestors
1093
            new_ancestors = set()
1094
            for a in ancestors:
1095
                ps = self.parents.get(a)
1096
                if ps is not None:
1097
                    new_ancestors.update(ps)
1098
                self.parents[a] = None
1099

    
1100
            # no more ancestors; stop
1101
            if not new_ancestors:
1102
                break
1103

    
1104
            ancestors = new_ancestors
1105

    
1106
    def next(self):
1107
        """Iterate over ancestors of heads in the target."""
1108
        if self.heads:
1109
            ret = self.heads.pop()
1110
            ps = self.get_parents(ret)
1111
            self.parents[ret] = ps
1112
            self.heads.update([p for p in ps if not p in self.parents])
1113
            return ret
1114
        return None
1115

    
1116
    __next__ = next