Revision 959

View differences:

org.gvsig.scripting/trunk/org.gvsig.scripting/org.gvsig.scripting.app/org.gvsig.scripting.app.mainplugin/src/main/resources-plugin/scripting/lib/.directory
1
[Dolphin]
2
Timestamp=2017,10,11,19,8,11
3
Version=3
4
ViewMode=1
org.gvsig.scripting/trunk/org.gvsig.scripting/org.gvsig.scripting.app/org.gvsig.scripting.app.mainplugin/src/main/resources-plugin/scripting/lib/dulwich/lru_cache.py
1
# lru_cache.py -- Simple LRU cache for dulwich
2
# Copyright (C) 2006, 2008 Canonical Ltd
3
#
4
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
5
# General Public License as public by the Free Software Foundation; version 2.0
6
# or (at your option) any later version. You can redistribute it and/or
7
# modify it under the terms of either of these two licenses.
8
#
9
# Unless required by applicable law or agreed to in writing, software
10
# distributed under the License is distributed on an "AS IS" BASIS,
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
14
#
15
# You should have received a copy of the licenses; if not, see
16
# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
17
# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
18
# License, Version 2.0.
19
#
20

  
21
"""A simple least-recently-used (LRU) cache."""
22

  
23
_null_key = object()
24

  
25

  
26
class _LRUNode(object):
27
    """This maintains the linked-list which is the lru internals."""
28

  
29
    __slots__ = ('prev', 'next_key', 'key', 'value', 'cleanup', 'size')
30

  
31
    def __init__(self, key, value, cleanup=None):
32
        self.prev = None
33
        self.next_key = _null_key
34
        self.key = key
35
        self.value = value
36
        self.cleanup = cleanup
37
        # TODO: We could compute this 'on-the-fly' like we used to, and remove
38
        #       one pointer from this object, we just need to decide if it
39
        #       actually costs us much of anything in normal usage
40
        self.size = None
41

  
42
    def __repr__(self):
43
        if self.prev is None:
44
            prev_key = None
45
        else:
46
            prev_key = self.prev.key
47
        return '%s(%r n:%r p:%r)' % (self.__class__.__name__, self.key,
48
                                     self.next_key, prev_key)
49

  
50
    def run_cleanup(self):
51
        if self.cleanup is not None:
52
            self.cleanup(self.key, self.value)
53
        self.cleanup = None
54
        # Just make sure to break any refcycles, etc
55
        self.value = None
56

  
57

  
58
class LRUCache(object):
59
    """A class which manages a cache of entries, removing unused ones."""
60

  
61
    def __init__(self, max_cache=100, after_cleanup_count=None):
62
        self._cache = {}
63
        # The "HEAD" of the lru linked list
64
        self._most_recently_used = None
65
        # The "TAIL" of the lru linked list
66
        self._least_recently_used = None
67
        self._update_max_cache(max_cache, after_cleanup_count)
68

  
69
    def __contains__(self, key):
70
        return key in self._cache
71

  
72
    def __getitem__(self, key):
73
        cache = self._cache
74
        node = cache[key]
75
        # Inlined from _record_access to decrease the overhead of __getitem__
76
        # We also have more knowledge about structure if __getitem__ is
77
        # succeeding, then we know that self._most_recently_used must not be
78
        # None, etc.
79
        mru = self._most_recently_used
80
        if node is mru:
81
            # Nothing to do, this node is already at the head of the queue
82
            return node.value
83
        # Remove this node from the old location
84
        node_prev = node.prev
85
        next_key = node.next_key
86
        # benchmarking shows that the lookup of _null_key in globals is faster
87
        # than the attribute lookup for (node is self._least_recently_used)
88
        if next_key is _null_key:
89
            # 'node' is the _least_recently_used, because it doesn't have a
90
            # 'next' item. So move the current lru to the previous node.
91
            self._least_recently_used = node_prev
92
        else:
93
            node_next = cache[next_key]
94
            node_next.prev = node_prev
95
        node_prev.next_key = next_key
96
        # Insert this node at the front of the list
97
        node.next_key = mru.key
98
        mru.prev = node
99
        self._most_recently_used = node
100
        node.prev = None
101
        return node.value
102

  
103
    def __len__(self):
104
        return len(self._cache)
105

  
106
    def _walk_lru(self):
107
        """Walk the LRU list, only meant to be used in tests."""
108
        node = self._most_recently_used
109
        if node is not None:
110
            if node.prev is not None:
111
                raise AssertionError('the _most_recently_used entry is not'
112
                                     ' supposed to have a previous entry'
113
                                     ' %s' % (node,))
114
        while node is not None:
115
            if node.next_key is _null_key:
116
                if node is not self._least_recently_used:
117
                    raise AssertionError('only the last node should have'
118
                                         ' no next value: %s' % (node,))
119
                node_next = None
120
            else:
121
                node_next = self._cache[node.next_key]
122
                if node_next.prev is not node:
123
                    raise AssertionError('inconsistency found, node.next.prev'
124
                                         ' != node: %s' % (node,))
125
            if node.prev is None:
126
                if node is not self._most_recently_used:
127
                    raise AssertionError('only the _most_recently_used should'
128
                                         ' not have a previous node: %s'
129
                                         % (node,))
130
            else:
131
                if node.prev.next_key != node.key:
132
                    raise AssertionError('inconsistency found, node.prev.next'
133
                                         ' != node: %s' % (node,))
134
            yield node
135
            node = node_next
136

  
137
    def add(self, key, value, cleanup=None):
138
        """Add a new value to the cache.
139

  
140
        Also, if the entry is ever removed from the cache, call
141
        cleanup(key, value).
142

  
143
        :param key: The key to store it under
144
        :param value: The object to store
145
        :param cleanup: None or a function taking (key, value) to indicate
146
                        'value' should be cleaned up.
147
        """
148
        if key is _null_key:
149
            raise ValueError('cannot use _null_key as a key')
150
        if key in self._cache:
151
            node = self._cache[key]
152
            node.run_cleanup()
153
            node.value = value
154
            node.cleanup = cleanup
155
        else:
156
            node = _LRUNode(key, value, cleanup=cleanup)
157
            self._cache[key] = node
158
        self._record_access(node)
159

  
160
        if len(self._cache) > self._max_cache:
161
            # Trigger the cleanup
162
            self.cleanup()
163

  
164
    def cache_size(self):
165
        """Get the number of entries we will cache."""
166
        return self._max_cache
167

  
168
    def get(self, key, default=None):
169
        node = self._cache.get(key, None)
170
        if node is None:
171
            return default
172
        self._record_access(node)
173
        return node.value
174

  
175
    def keys(self):
176
        """Get the list of keys currently cached.
177

  
178
        Note that values returned here may not be available by the time you
179
        request them later. This is simply meant as a peak into the current
180
        state.
181

  
182
        :return: An unordered list of keys that are currently cached.
183
        """
184
        return self._cache.keys()
185

  
186
    def items(self):
187
        """Get the key:value pairs as a dict."""
188
        return dict((k, n.value) for k, n in self._cache.items())
189

  
190
    def cleanup(self):
191
        """Clear the cache until it shrinks to the requested size.
192

  
193
        This does not completely wipe the cache, just makes sure it is under
194
        the after_cleanup_count.
195
        """
196
        # Make sure the cache is shrunk to the correct size
197
        while len(self._cache) > self._after_cleanup_count:
198
            self._remove_lru()
199

  
200
    def __setitem__(self, key, value):
201
        """Add a value to the cache, there will be no cleanup function."""
202
        self.add(key, value, cleanup=None)
203

  
204
    def _record_access(self, node):
205
        """Record that key was accessed."""
206
        # Move 'node' to the front of the queue
207
        if self._most_recently_used is None:
208
            self._most_recently_used = node
209
            self._least_recently_used = node
210
            return
211
        elif node is self._most_recently_used:
212
            # Nothing to do, this node is already at the head of the queue
213
            return
214
        # We've taken care of the tail pointer, remove the node, and insert it
215
        # at the front
216
        # REMOVE
217
        if node is self._least_recently_used:
218
            self._least_recently_used = node.prev
219
        if node.prev is not None:
220
            node.prev.next_key = node.next_key
221
        if node.next_key is not _null_key:
222
            node_next = self._cache[node.next_key]
223
            node_next.prev = node.prev
224
        # INSERT
225
        node.next_key = self._most_recently_used.key
226
        self._most_recently_used.prev = node
227
        self._most_recently_used = node
228
        node.prev = None
229

  
230
    def _remove_node(self, node):
231
        if node is self._least_recently_used:
232
            self._least_recently_used = node.prev
233
        self._cache.pop(node.key)
234
        # If we have removed all entries, remove the head pointer as well
235
        if self._least_recently_used is None:
236
            self._most_recently_used = None
237
        node.run_cleanup()
238
        # Now remove this node from the linked list
239
        if node.prev is not None:
240
            node.prev.next_key = node.next_key
241
        if node.next_key is not _null_key:
242
            node_next = self._cache[node.next_key]
243
            node_next.prev = node.prev
244
        # And remove this node's pointers
245
        node.prev = None
246
        node.next_key = _null_key
247

  
248
    def _remove_lru(self):
249
        """Remove one entry from the lru, and handle consequences.
250

  
251
        If there are no more references to the lru, then this entry should be
252
        removed from the cache.
253
        """
254
        self._remove_node(self._least_recently_used)
255

  
256
    def clear(self):
257
        """Clear out all of the cache."""
258
        # Clean up in LRU order
259
        while self._cache:
260
            self._remove_lru()
261

  
262
    def resize(self, max_cache, after_cleanup_count=None):
263
        """Change the number of entries that will be cached."""
264
        self._update_max_cache(max_cache,
265
                               after_cleanup_count=after_cleanup_count)
266

  
267
    def _update_max_cache(self, max_cache, after_cleanup_count=None):
268
        self._max_cache = max_cache
269
        if after_cleanup_count is None:
270
            self._after_cleanup_count = self._max_cache * 8 / 10
271
        else:
272
            self._after_cleanup_count = min(after_cleanup_count,
273
                                            self._max_cache)
274
        self.cleanup()
275

  
276

  
277
class LRUSizeCache(LRUCache):
278
    """An LRUCache that removes things based on the size of the values.
279

  
280
    This differs in that it doesn't care how many actual items there are,
281
    it just restricts the cache to be cleaned up after so much data is stored.
282

  
283
    The size of items added will be computed using compute_size(value), which
284
    defaults to len() if not supplied.
285
    """
286

  
287
    def __init__(self, max_size=1024*1024, after_cleanup_size=None,
288
                 compute_size=None):
289
        """Create a new LRUSizeCache.
290

  
291
        :param max_size: The max number of bytes to store before we start
292
            clearing out entries.
293
        :param after_cleanup_size: After cleaning up, shrink everything to this
294
            size.
295
        :param compute_size: A function to compute the size of the values. We
296
            use a function here, so that you can pass 'len' if you are just
297
            using simple strings, or a more complex function if you are using
298
            something like a list of strings, or even a custom object.
299
            The function should take the form "compute_size(value) => integer".
300
            If not supplied, it defaults to 'len()'
301
        """
302
        self._value_size = 0
303
        self._compute_size = compute_size
304
        if compute_size is None:
305
            self._compute_size = len
306
        self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
307
        LRUCache.__init__(self, max_cache=max(int(max_size/512), 1))
308

  
309
    def add(self, key, value, cleanup=None):
310
        """Add a new value to the cache.
311

  
312
        Also, if the entry is ever removed from the cache, call
313
        cleanup(key, value).
314

  
315
        :param key: The key to store it under
316
        :param value: The object to store
317
        :param cleanup: None or a function taking (key, value) to indicate
318
                        'value' should be cleaned up.
319
        """
320
        if key is _null_key:
321
            raise ValueError('cannot use _null_key as a key')
322
        node = self._cache.get(key, None)
323
        value_len = self._compute_size(value)
324
        if value_len >= self._after_cleanup_size:
325
            # The new value is 'too big to fit', as it would fill up/overflow
326
            # the cache all by itself
327
            if node is not None:
328
                # We won't be replacing the old node, so just remove it
329
                self._remove_node(node)
330
            if cleanup is not None:
331
                cleanup(key, value)
332
            return
333
        if node is None:
334
            node = _LRUNode(key, value, cleanup=cleanup)
335
            self._cache[key] = node
336
        else:
337
            self._value_size -= node.size
338
        node.size = value_len
339
        self._value_size += value_len
340
        self._record_access(node)
341

  
342
        if self._value_size > self._max_size:
343
            # Time to cleanup
344
            self.cleanup()
345

  
346
    def cleanup(self):
347
        """Clear the cache until it shrinks to the requested size.
348

  
349
        This does not completely wipe the cache, just makes sure it is under
350
        the after_cleanup_size.
351
        """
352
        # Make sure the cache is shrunk to the correct size
353
        while self._value_size > self._after_cleanup_size:
354
            self._remove_lru()
355

  
356
    def _remove_node(self, node):
357
        self._value_size -= node.size
358
        LRUCache._remove_node(self, node)
359

  
360
    def resize(self, max_size, after_cleanup_size=None):
361
        """Change the number of bytes that will be cached."""
362
        self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
363
        max_cache = max(int(max_size/512), 1)
364
        self._update_max_cache(max_cache)
365

  
366
    def _update_max_size(self, max_size, after_cleanup_size=None):
367
        self._max_size = max_size
368
        if after_cleanup_size is None:
369
            self._after_cleanup_size = self._max_size * 8 // 10
370
        else:
371
            self._after_cleanup_size = min(after_cleanup_size, self._max_size)
org.gvsig.scripting/trunk/org.gvsig.scripting/org.gvsig.scripting.app/org.gvsig.scripting.app.mainplugin/src/main/resources-plugin/scripting/lib/dulwich/diff_tree.py
1
# diff_tree.py -- Utilities for diffing files and trees.
2
# Copyright (C) 2010 Google, Inc.
3
#
4
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
5
# General Public License as public by the Free Software Foundation; version 2.0
6
# or (at your option) any later version. You can redistribute it and/or
7
# modify it under the terms of either of these two licenses.
8
#
9
# Unless required by applicable law or agreed to in writing, software
10
# distributed under the License is distributed on an "AS IS" BASIS,
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
14
#
15
# You should have received a copy of the licenses; if not, see
16
# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
17
# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
18
# License, Version 2.0.
19
#
20

  
21
"""Utilities for diffing files and trees."""
22
import sys
23
from collections import (
24
    defaultdict,
25
    namedtuple,
26
    )
27

  
28
from io import BytesIO
29
from itertools import chain
30
import stat
31

  
32
from dulwich.objects import (
33
    S_ISGITLINK,
34
    TreeEntry,
35
    )
36

  
37

  
38
# TreeChange type constants.
39
CHANGE_ADD = 'add'
40
CHANGE_MODIFY = 'modify'
41
CHANGE_DELETE = 'delete'
42
CHANGE_RENAME = 'rename'
43
CHANGE_COPY = 'copy'
44
CHANGE_UNCHANGED = 'unchanged'
45

  
46
RENAME_CHANGE_TYPES = (CHANGE_RENAME, CHANGE_COPY)
47

  
48
_NULL_ENTRY = TreeEntry(None, None, None)
49

  
50
_MAX_SCORE = 100
51
RENAME_THRESHOLD = 60
52
MAX_FILES = 200
53
REWRITE_THRESHOLD = None
54

  
55

  
56
class TreeChange(namedtuple('TreeChange', ['type', 'old', 'new'])):
57
    """Named tuple a single change between two trees."""
58

  
59
    @classmethod
60
    def add(cls, new):
61
        return cls(CHANGE_ADD, _NULL_ENTRY, new)
62

  
63
    @classmethod
64
    def delete(cls, old):
65
        return cls(CHANGE_DELETE, old, _NULL_ENTRY)
66

  
67

  
68
def _tree_entries(path, tree):
69
    result = []
70
    if not tree:
71
        return result
72
    for entry in tree.iteritems(name_order=True):
73
        result.append(entry.in_path(path))
74
    return result
75

  
76

  
77
def _merge_entries(path, tree1, tree2):
78
    """Merge the entries of two trees.
79

  
80
    :param path: A path to prepend to all tree entry names.
81
    :param tree1: The first Tree object to iterate, or None.
82
    :param tree2: The second Tree object to iterate, or None.
83
    :return: A list of pairs of TreeEntry objects for each pair of entries in
84
        the trees. If an entry exists in one tree but not the other, the other
85
        entry will have all attributes set to None. If neither entry's path is
86
        None, they are guaranteed to match.
87
    """
88
    entries1 = _tree_entries(path, tree1)
89
    entries2 = _tree_entries(path, tree2)
90
    i1 = i2 = 0
91
    len1 = len(entries1)
92
    len2 = len(entries2)
93

  
94
    result = []
95
    while i1 < len1 and i2 < len2:
96
        entry1 = entries1[i1]
97
        entry2 = entries2[i2]
98
        if entry1.path < entry2.path:
99
            result.append((entry1, _NULL_ENTRY))
100
            i1 += 1
101
        elif entry1.path > entry2.path:
102
            result.append((_NULL_ENTRY, entry2))
103
            i2 += 1
104
        else:
105
            result.append((entry1, entry2))
106
            i1 += 1
107
            i2 += 1
108
    for i in range(i1, len1):
109
        result.append((entries1[i], _NULL_ENTRY))
110
    for i in range(i2, len2):
111
        result.append((_NULL_ENTRY, entries2[i]))
112
    return result
113

  
114

  
115
def _is_tree(entry):
116
    mode = entry.mode
117
    if mode is None:
118
        return False
119
    return stat.S_ISDIR(mode)
120

  
121

  
122
def walk_trees(store, tree1_id, tree2_id, prune_identical=False):
123
    """Recursively walk all the entries of two trees.
124

  
125
    Iteration is depth-first pre-order, as in e.g. os.walk.
126

  
127
    :param store: An ObjectStore for looking up objects.
128
    :param tree1_id: The SHA of the first Tree object to iterate, or None.
129
    :param tree2_id: The SHA of the second Tree object to iterate, or None.
130
    :param prune_identical: If True, identical subtrees will not be walked.
131
    :return: Iterator over Pairs of TreeEntry objects for each pair of entries
132
        in the trees and their subtrees recursively. If an entry exists in one
133
        tree but not the other, the other entry will have all attributes set
134
        to None. If neither entry's path is None, they are guaranteed to
135
        match.
136
    """
137
    # This could be fairly easily generalized to >2 trees if we find a use
138
    # case.
139
    mode1 = tree1_id and stat.S_IFDIR or None
140
    mode2 = tree2_id and stat.S_IFDIR or None
141
    todo = [(TreeEntry(b'', mode1, tree1_id), TreeEntry(b'', mode2, tree2_id))]
142
    while todo:
143
        entry1, entry2 = todo.pop()
144
        is_tree1 = _is_tree(entry1)
145
        is_tree2 = _is_tree(entry2)
146
        if prune_identical and is_tree1 and is_tree2 and entry1 == entry2:
147
            continue
148

  
149
        tree1 = is_tree1 and store[entry1.sha] or None
150
        tree2 = is_tree2 and store[entry2.sha] or None
151
        path = entry1.path or entry2.path
152
        todo.extend(reversed(_merge_entries(path, tree1, tree2)))
153
        yield entry1, entry2
154

  
155

  
156
def _skip_tree(entry):
157
    if entry.mode is None or stat.S_ISDIR(entry.mode):
158
        return _NULL_ENTRY
159
    return entry
160

  
161

  
162
def tree_changes(store, tree1_id, tree2_id, want_unchanged=False,
163
                 rename_detector=None):
164
    """Find the differences between the contents of two trees.
165

  
166
    :param store: An ObjectStore for looking up objects.
167
    :param tree1_id: The SHA of the source tree.
168
    :param tree2_id: The SHA of the target tree.
169
    :param want_unchanged: If True, include TreeChanges for unmodified entries
170
        as well.
171
    :param rename_detector: RenameDetector object for detecting renames.
172
    :return: Iterator over TreeChange instances for each change between the
173
        source and target tree.
174
    """
175
    if (rename_detector is not None and tree1_id is not None and
176
        tree2_id is not None):
177
        for change in rename_detector.changes_with_renames(
178
            tree1_id, tree2_id, want_unchanged=want_unchanged):
179
                yield change
180
        return
181

  
182
    entries = walk_trees(store, tree1_id, tree2_id,
183
                         prune_identical=(not want_unchanged))
184
    for entry1, entry2 in entries:
185
        if entry1 == entry2 and not want_unchanged:
186
            continue
187

  
188
        # Treat entries for trees as missing.
189
        entry1 = _skip_tree(entry1)
190
        entry2 = _skip_tree(entry2)
191

  
192
        if entry1 != _NULL_ENTRY and entry2 != _NULL_ENTRY:
193
            if stat.S_IFMT(entry1.mode) != stat.S_IFMT(entry2.mode):
194
                # File type changed: report as delete/add.
195
                yield TreeChange.delete(entry1)
196
                entry1 = _NULL_ENTRY
197
                change_type = CHANGE_ADD
198
            elif entry1 == entry2:
199
                change_type = CHANGE_UNCHANGED
200
            else:
201
                change_type = CHANGE_MODIFY
202
        elif entry1 != _NULL_ENTRY:
203
            change_type = CHANGE_DELETE
204
        elif entry2 != _NULL_ENTRY:
205
            change_type = CHANGE_ADD
206
        else:
207
            # Both were None because at least one was a tree.
208
            continue
209
        yield TreeChange(change_type, entry1, entry2)
210

  
211

  
212
def _all_eq(seq, key, value):
213
    for e in seq:
214
        if key(e) != value:
215
            return False
216
    return True
217

  
218

  
219
def _all_same(seq, key):
220
    return _all_eq(seq[1:], key, key(seq[0]))
221

  
222

  
223
def tree_changes_for_merge(store, parent_tree_ids, tree_id,
224
                           rename_detector=None):
225
    """Get the tree changes for a merge tree relative to all its parents.
226

  
227
    :param store: An ObjectStore for looking up objects.
228
    :param parent_tree_ids: An iterable of the SHAs of the parent trees.
229
    :param tree_id: The SHA of the merge tree.
230
    :param rename_detector: RenameDetector object for detecting renames.
231

  
232
    :return: Iterator over lists of TreeChange objects, one per conflicted path
233
        in the merge.
234

  
235
        Each list contains one element per parent, with the TreeChange for that
236
        path relative to that parent. An element may be None if it never
237
        existed in one parent and was deleted in two others.
238

  
239
        A path is only included in the output if it is a conflict, i.e. its SHA
240
        in the merge tree is not found in any of the parents, or in the case of
241
        deletes, if not all of the old SHAs match.
242
    """
243
    all_parent_changes = [tree_changes(store, t, tree_id,
244
                                       rename_detector=rename_detector)
245
                          for t in parent_tree_ids]
246
    num_parents = len(parent_tree_ids)
247
    changes_by_path = defaultdict(lambda: [None] * num_parents)
248

  
249
    # Organize by path.
250
    for i, parent_changes in enumerate(all_parent_changes):
251
        for change in parent_changes:
252
            if change.type == CHANGE_DELETE:
253
                path = change.old.path
254
            else:
255
                path = change.new.path
256
            changes_by_path[path][i] = change
257

  
258
    old_sha = lambda c: c.old.sha
259
    change_type = lambda c: c.type
260

  
261
    # Yield only conflicting changes.
262
    for _, changes in sorted(changes_by_path.items()):
263
        assert len(changes) == num_parents
264
        have = [c for c in changes if c is not None]
265
        if _all_eq(have, change_type, CHANGE_DELETE):
266
            if not _all_same(have, old_sha):
267
                yield changes
268
        elif not _all_same(have, change_type):
269
            yield changes
270
        elif None not in changes:
271
            # If no change was found relative to one parent, that means the SHA
272
            # must have matched the SHA in that parent, so it is not a
273
            # conflict.
274
            yield changes
275

  
276

  
277
_BLOCK_SIZE = 64
278

  
279

  
280
def _count_blocks(obj):
281
    """Count the blocks in an object.
282

  
283
    Splits the data into blocks either on lines or <=64-byte chunks of lines.
284

  
285
    :param obj: The object to count blocks for.
286
    :return: A dict of block hashcode -> total bytes occurring.
287
    """
288
    block_counts = defaultdict(int)
289
    block = BytesIO()
290
    n = 0
291

  
292
    # Cache attrs as locals to avoid expensive lookups in the inner loop.
293
    block_write = block.write
294
    block_seek = block.seek
295
    block_truncate = block.truncate
296
    block_getvalue = block.getvalue
297

  
298
    for c in chain(*obj.as_raw_chunks()):
299
        if sys.version_info[0] == 3:
300
            c = c.to_bytes(1, 'big')
301
        block_write(c)
302
        n += 1
303
        if c == b'\n' or n == _BLOCK_SIZE:
304
            value = block_getvalue()
305
            block_counts[hash(value)] += len(value)
306
            block_seek(0)
307
            block_truncate()
308
            n = 0
309
    if n > 0:
310
        last_block = block_getvalue()
311
        block_counts[hash(last_block)] += len(last_block)
312
    return block_counts
313

  
314

  
315
def _common_bytes(blocks1, blocks2):
316
    """Count the number of common bytes in two block count dicts.
317

  
318
    :param block1: The first dict of block hashcode -> total bytes.
319
    :param block2: The second dict of block hashcode -> total bytes.
320
    :return: The number of bytes in common between blocks1 and blocks2. This is
321
        only approximate due to possible hash collisions.
322
    """
323
    # Iterate over the smaller of the two dicts, since this is symmetrical.
324
    if len(blocks1) > len(blocks2):
325
        blocks1, blocks2 = blocks2, blocks1
326
    score = 0
327
    for block, count1 in blocks1.items():
328
        count2 = blocks2.get(block)
329
        if count2:
330
            score += min(count1, count2)
331
    return score
332

  
333

  
334
def _similarity_score(obj1, obj2, block_cache=None):
335
    """Compute a similarity score for two objects.
336

  
337
    :param obj1: The first object to score.
338
    :param obj2: The second object to score.
339
    :param block_cache: An optional dict of SHA to block counts to cache
340
        results between calls.
341
    :return: The similarity score between the two objects, defined as the
342
        number of bytes in common between the two objects divided by the
343
        maximum size, scaled to the range 0-100.
344
    """
345
    if block_cache is None:
346
        block_cache = {}
347
    if obj1.id not in block_cache:
348
        block_cache[obj1.id] = _count_blocks(obj1)
349
    if obj2.id not in block_cache:
350
        block_cache[obj2.id] = _count_blocks(obj2)
351

  
352
    common_bytes = _common_bytes(block_cache[obj1.id], block_cache[obj2.id])
353
    max_size = max(obj1.raw_length(), obj2.raw_length())
354
    if not max_size:
355
        return _MAX_SCORE
356
    return int(float(common_bytes) * _MAX_SCORE / max_size)
357

  
358

  
359
def _tree_change_key(entry):
360
    # Sort by old path then new path. If only one exists, use it for both keys.
361
    path1 = entry.old.path
362
    path2 = entry.new.path
363
    if path1 is None:
364
        path1 = path2
365
    if path2 is None:
366
        path2 = path1
367
    return (path1, path2)
368

  
369

  
370
class RenameDetector(object):
371
    """Object for handling rename detection between two trees."""
372

  
373
    def __init__(self, store, rename_threshold=RENAME_THRESHOLD,
374
                 max_files=MAX_FILES,
375
                 rewrite_threshold=REWRITE_THRESHOLD,
376
                 find_copies_harder=False):
377
        """Initialize the rename detector.
378

  
379
        :param store: An ObjectStore for looking up objects.
380
        :param rename_threshold: The threshold similarity score for considering
381
            an add/delete pair to be a rename/copy; see _similarity_score.
382
        :param max_files: The maximum number of adds and deletes to consider,
383
            or None for no limit. The detector is guaranteed to compare no more
384
            than max_files ** 2 add/delete pairs. This limit is provided because
385
            rename detection can be quadratic in the project size. If the limit
386
            is exceeded, no content rename detection is attempted.
387
        :param rewrite_threshold: The threshold similarity score below which a
388
            modify should be considered a delete/add, or None to not break
389
            modifies; see _similarity_score.
390
        :param find_copies_harder: If True, consider unmodified files when
391
            detecting copies.
392
        """
393
        self._store = store
394
        self._rename_threshold = rename_threshold
395
        self._rewrite_threshold = rewrite_threshold
396
        self._max_files = max_files
397
        self._find_copies_harder = find_copies_harder
398
        self._want_unchanged = False
399

  
400
    def _reset(self):
401
        self._adds = []
402
        self._deletes = []
403
        self._changes = []
404

  
405
    def _should_split(self, change):
406
        if (self._rewrite_threshold is None or change.type != CHANGE_MODIFY or
407
            change.old.sha == change.new.sha):
408
            return False
409
        old_obj = self._store[change.old.sha]
410
        new_obj = self._store[change.new.sha]
411
        return _similarity_score(old_obj, new_obj) < self._rewrite_threshold
412

  
413
    def _add_change(self, change):
414
        if change.type == CHANGE_ADD:
415
            self._adds.append(change)
416
        elif change.type == CHANGE_DELETE:
417
            self._deletes.append(change)
418
        elif self._should_split(change):
419
            self._deletes.append(TreeChange.delete(change.old))
420
            self._adds.append(TreeChange.add(change.new))
421
        elif ((self._find_copies_harder and change.type == CHANGE_UNCHANGED)
422
              or change.type == CHANGE_MODIFY):
423
            # Treat all modifies as potential deletes for rename detection,
424
            # but don't split them (to avoid spurious renames). Setting
425
            # find_copies_harder means we treat unchanged the same as
426
            # modified.
427
            self._deletes.append(change)
428
        else:
429
            self._changes.append(change)
430

  
431
    def _collect_changes(self, tree1_id, tree2_id):
432
        want_unchanged = self._find_copies_harder or self._want_unchanged
433
        for change in tree_changes(self._store, tree1_id, tree2_id,
434
                                   want_unchanged=want_unchanged):
435
            self._add_change(change)
436

  
437
    def _prune(self, add_paths, delete_paths):
438
        self._adds = [a for a in self._adds if a.new.path not in add_paths]
439
        self._deletes = [d for d in self._deletes
440
                         if d.old.path not in delete_paths]
441

  
442
    def _find_exact_renames(self):
443
        add_map = defaultdict(list)
444
        for add in self._adds:
445
            add_map[add.new.sha].append(add.new)
446
        delete_map = defaultdict(list)
447
        for delete in self._deletes:
448
            # Keep track of whether the delete was actually marked as a delete.
449
            # If not, it needs to be marked as a copy.
450
            is_delete = delete.type == CHANGE_DELETE
451
            delete_map[delete.old.sha].append((delete.old, is_delete))
452

  
453
        add_paths = set()
454
        delete_paths = set()
455
        for sha, sha_deletes in delete_map.items():
456
            sha_adds = add_map[sha]
457
            for (old, is_delete), new in zip(sha_deletes, sha_adds):
458
                if stat.S_IFMT(old.mode) != stat.S_IFMT(new.mode):
459
                    continue
460
                if is_delete:
461
                    delete_paths.add(old.path)
462
                add_paths.add(new.path)
463
                new_type = is_delete and CHANGE_RENAME or CHANGE_COPY
464
                self._changes.append(TreeChange(new_type, old, new))
465

  
466
            num_extra_adds = len(sha_adds) - len(sha_deletes)
467
            # TODO(dborowitz): Less arbitrary way of dealing with extra copies.
468
            old = sha_deletes[0][0]
469
            if num_extra_adds > 0:
470
                for new in sha_adds[-num_extra_adds:]:
471
                    add_paths.add(new.path)
472
                    self._changes.append(TreeChange(CHANGE_COPY, old, new))
473
        self._prune(add_paths, delete_paths)
474

  
475
    def _should_find_content_renames(self):
476
        return len(self._adds) * len(self._deletes) <= self._max_files ** 2
477

  
478
    def _rename_type(self, check_paths, delete, add):
479
        if check_paths and delete.old.path == add.new.path:
480
            # If the paths match, this must be a split modify, so make sure it
481
            # comes out as a modify.
482
            return CHANGE_MODIFY
483
        elif delete.type != CHANGE_DELETE:
484
            # If it's in deletes but not marked as a delete, it must have been
485
            # added due to find_copies_harder, and needs to be marked as a
486
            # copy.
487
            return CHANGE_COPY
488
        return CHANGE_RENAME
489

  
490
    def _find_content_rename_candidates(self):
491
        candidates = self._candidates = []
492
        # TODO: Optimizations:
493
        #  - Compare object sizes before counting blocks.
494
        #  - Skip if delete's S_IFMT differs from all adds.
495
        #  - Skip if adds or deletes is empty.
496
        # Match C git's behavior of not attempting to find content renames if
497
        # the matrix size exceeds the threshold.
498
        if not self._should_find_content_renames():
499
            return
500

  
501
        block_cache = {}
502
        check_paths = self._rename_threshold is not None
503
        for delete in self._deletes:
504
            if S_ISGITLINK(delete.old.mode):
505
                continue  # Git links don't exist in this repo.
506
            old_sha = delete.old.sha
507
            old_obj = self._store[old_sha]
508
            block_cache[old_sha] = _count_blocks(old_obj)
509
            for add in self._adds:
510
                if stat.S_IFMT(delete.old.mode) != stat.S_IFMT(add.new.mode):
511
                    continue
512
                new_obj = self._store[add.new.sha]
513
                score = _similarity_score(old_obj, new_obj,
514
                                          block_cache=block_cache)
515
                if score > self._rename_threshold:
516
                    new_type = self._rename_type(check_paths, delete, add)
517
                    rename = TreeChange(new_type, delete.old, add.new)
518
                    candidates.append((-score, rename))
519

  
520
    def _choose_content_renames(self):
521
        # Sort scores from highest to lowest, but keep names in ascending
522
        # order.
523
        self._candidates.sort()
524

  
525
        delete_paths = set()
526
        add_paths = set()
527
        for _, change in self._candidates:
528
            new_path = change.new.path
529
            if new_path in add_paths:
530
                continue
531
            old_path = change.old.path
532
            orig_type = change.type
533
            if old_path in delete_paths:
534
                change = TreeChange(CHANGE_COPY, change.old, change.new)
535

  
536
            # If the candidate was originally a copy, that means it came from a
537
            # modified or unchanged path, so we don't want to prune it.
538
            if orig_type != CHANGE_COPY:
539
                delete_paths.add(old_path)
540
            add_paths.add(new_path)
541
            self._changes.append(change)
542
        self._prune(add_paths, delete_paths)
543

  
544
    def _join_modifies(self):
545
        if self._rewrite_threshold is None:
546
            return
547

  
548
        modifies = {}
549
        delete_map = dict((d.old.path, d) for d in self._deletes)
550
        for add in self._adds:
551
            path = add.new.path
552
            delete = delete_map.get(path)
553
            if (delete is not None and
554
                stat.S_IFMT(delete.old.mode) == stat.S_IFMT(add.new.mode)):
555
                modifies[path] = TreeChange(CHANGE_MODIFY, delete.old, add.new)
556

  
557
        self._adds = [a for a in self._adds if a.new.path not in modifies]
558
        self._deletes = [a for a in self._deletes if a.new.path not in
559
                         modifies]
560
        self._changes += modifies.values()
561

  
562
    def _sorted_changes(self):
563
        result = []
564
        result.extend(self._adds)
565
        result.extend(self._deletes)
566
        result.extend(self._changes)
567
        result.sort(key=_tree_change_key)
568
        return result
569

  
570
    def _prune_unchanged(self):
571
        if self._want_unchanged:
572
            return
573
        self._deletes = [d for d in self._deletes if d.type != CHANGE_UNCHANGED]
574

  
575
    def changes_with_renames(self, tree1_id, tree2_id, want_unchanged=False):
576
        """Iterate TreeChanges between two tree SHAs, with rename detection."""
577
        self._reset()
578
        self._want_unchanged = want_unchanged
579
        self._collect_changes(tree1_id, tree2_id)
580
        self._find_exact_renames()
581
        self._find_content_rename_candidates()
582
        self._choose_content_renames()
583
        self._join_modifies()
584
        self._prune_unchanged()
585
        return self._sorted_changes()
586

  
587

  
588
# Hold on to the pure-python implementations for testing.
589
_is_tree_py = _is_tree
590
_merge_entries_py = _merge_entries
591
_count_blocks_py = _count_blocks
592
try:
593
    # Try to import C versions
594
    from dulwich._diff_tree import _is_tree, _merge_entries, _count_blocks
595
except ImportError:
596
    pass
org.gvsig.scripting/trunk/org.gvsig.scripting/org.gvsig.scripting.app/org.gvsig.scripting.app.mainplugin/src/main/resources-plugin/scripting/lib/dulwich/objects.py
1
# objects.py -- Access to base git objects
2
# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
3
# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
4
#
5
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
6
# General Public License as public by the Free Software Foundation; version 2.0
7
# or (at your option) any later version. You can redistribute it and/or
8
# modify it under the terms of either of these two licenses.
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
#
16
# You should have received a copy of the licenses; if not, see
17
# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
18
# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
19
# License, Version 2.0.
20
#
21

  
22
"""Access to base git objects."""
23

  
24
import binascii
25
from io import BytesIO
26
from collections import namedtuple
27
import os
28
import posixpath
29
import stat
30
import warnings
31
import zlib
32
from hashlib import sha1
33

  
34
from dulwich.errors import (
35
    ChecksumMismatch,
36
    NotBlobError,
37
    NotCommitError,
38
    NotTagError,
39
    NotTreeError,
40
    ObjectFormatException,
41
    )
42
from dulwich.file import GitFile
43

  
44

  
45
ZERO_SHA = b'0' * 40
46

  
47
# Header fields for commits
48
_TREE_HEADER = b'tree'
49
_PARENT_HEADER = b'parent'
50
_AUTHOR_HEADER = b'author'
51
_COMMITTER_HEADER = b'committer'
52
_ENCODING_HEADER = b'encoding'
53
_MERGETAG_HEADER = b'mergetag'
54
_GPGSIG_HEADER = b'gpgsig'
55

  
56
# Header fields for objects
57
_OBJECT_HEADER = b'object'
58
_TYPE_HEADER = b'type'
59
_TAG_HEADER = b'tag'
60
_TAGGER_HEADER = b'tagger'
61

  
62

  
63
S_IFGITLINK = 0o160000
64

  
65

  
66
def S_ISGITLINK(m):
67
    """Check if a mode indicates a submodule.
68

  
69
    :param m: Mode to check
70
    :return: a ``boolean``
71
    """
72
    return (stat.S_IFMT(m) == S_IFGITLINK)
73

  
74

  
75
def _decompress(string):
76
    dcomp = zlib.decompressobj()
77
    dcomped = dcomp.decompress(string)
78
    dcomped += dcomp.flush()
79
    return dcomped
80

  
81

  
82
def sha_to_hex(sha):
83
    """Takes a string and returns the hex of the sha within"""
84
    hexsha = binascii.hexlify(sha)
85
    assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha
86
    return hexsha
87

  
88

  
89
def hex_to_sha(hex):
90
    """Takes a hex sha and returns a binary sha"""
91
    assert len(hex) == 40, "Incorrect length of hexsha: %s" % hex
92
    try:
93
        return binascii.unhexlify(hex)
94
    except TypeError as exc:
95
        if not isinstance(hex, bytes):
96
            raise
97
        raise ValueError(exc.args[0])
98

  
99

  
100
def valid_hexsha(hex):
101
    if len(hex) != 40:
102
        return False
103
    try:
104
        binascii.unhexlify(hex)
105
    except (TypeError, binascii.Error):
106
        return False
107
    else:
108
        return True
109

  
110

  
111
def hex_to_filename(path, hex):
112
    """Takes a hex sha and returns its filename relative to the given path."""
113
    # os.path.join accepts bytes or unicode, but all args must be of the same
114
    # type. Make sure that hex which is expected to be bytes, is the same type
115
    # as path.
116
    if getattr(path, 'encode', None) is not None:
117
        hex = hex.decode('ascii')
118
    dir = hex[:2]
119
    file = hex[2:]
120
    # Check from object dir
121
    return os.path.join(path, dir, file)
122

  
123

  
124
def filename_to_hex(filename):
125
    """Takes an object filename and returns its corresponding hex sha."""
126
    # grab the last (up to) two path components
127
    names = filename.rsplit(os.path.sep, 2)[-2:]
128
    errmsg = "Invalid object filename: %s" % filename
129
    assert len(names) == 2, errmsg
130
    base, rest = names
131
    assert len(base) == 2 and len(rest) == 38, errmsg
132
    hex = (base + rest).encode('ascii')
133
    hex_to_sha(hex)
134
    return hex
135

  
136

  
137
def object_header(num_type, length):
138
    """Return an object header for the given numeric type and text length."""
139
    return object_class(num_type).type_name + b' ' + str(length).encode('ascii') + b'\0'
140

  
141

  
142
def serializable_property(name, docstring=None):
143
    """A property that helps tracking whether serialization is necessary.
144
    """
145
    def set(obj, value):
146
        setattr(obj, "_"+name, value)
147
        obj._needs_serialization = True
148
    def get(obj):
149
        return getattr(obj, "_"+name)
150
    return property(get, set, doc=docstring)
151

  
152

  
153
def object_class(type):
154
    """Get the object class corresponding to the given type.
155

  
156
    :param type: Either a type name string or a numeric type.
157
    :return: The ShaFile subclass corresponding to the given type, or None if
158
        type is not a valid type name/number.
159
    """
160
    return _TYPE_MAP.get(type, None)
161

  
162

  
163
def check_hexsha(hex, error_msg):
164
    """Check if a string is a valid hex sha string.
165

  
166
    :param hex: Hex string to check
167
    :param error_msg: Error message to use in exception
168
    :raise ObjectFormatException: Raised when the string is not valid
169
    """
170
    if not valid_hexsha(hex):
171
        raise ObjectFormatException("%s %s" % (error_msg, hex))
172

  
173

  
174
def check_identity(identity, error_msg):
175
    """Check if the specified identity is valid.
176

  
177
    This will raise an exception if the identity is not valid.
178

  
179
    :param identity: Identity string
180
    :param error_msg: Error message to use in exception
181
    """
182
    email_start = identity.find(b'<')
183
    email_end = identity.find(b'>')
184
    if (email_start < 0 or email_end < 0 or email_end <= email_start
185
        or identity.find(b'<', email_start + 1) >= 0
186
        or identity.find(b'>', email_end + 1) >= 0
187
        or not identity.endswith(b'>')):
188
        raise ObjectFormatException(error_msg)
189

  
190

  
191
def git_line(*items):
192
    """Formats items into a space sepreated line."""
193
    return b' '.join(items) + b'\n'
194

  
195

  
196
class FixedSha(object):
197
    """SHA object that behaves like hashlib's but is given a fixed value."""
198

  
199
    __slots__ = ('_hexsha', '_sha')
200

  
201
    def __init__(self, hexsha):
202
        if getattr(hexsha, 'encode', None) is not None:
203
            hexsha = hexsha.encode('ascii')
204
        if not isinstance(hexsha, bytes):
205
            raise TypeError('Expected bytes for hexsha, got %r' % hexsha)
206
        self._hexsha = hexsha
207
        self._sha = hex_to_sha(hexsha)
208

  
209
    def digest(self):
210
        """Return the raw SHA digest."""
211
        return self._sha
212

  
213
    def hexdigest(self):
214
        """Return the hex SHA digest."""
215
        return self._hexsha.decode('ascii')
216

  
217

  
218
class ShaFile(object):
219
    """A git SHA file."""
220

  
221
    __slots__ = ('_chunked_text', '_sha', '_needs_serialization')
222

  
223
    @staticmethod
224
    def _parse_legacy_object_header(magic, f):
225
        """Parse a legacy object, creating it but not reading the file."""
226
        bufsize = 1024
227
        decomp = zlib.decompressobj()
228
        header = decomp.decompress(magic)
229
        start = 0
230
        end = -1
231
        while end < 0:
232
            extra = f.read(bufsize)
233
            header += decomp.decompress(extra)
234
            magic += extra
235
            end = header.find(b'\0', start)
236
            start = len(header)
237
        header = header[:end]
238
        type_name, size = header.split(b' ', 1)
239
        size = int(size)  # sanity check
240
        obj_class = object_class(type_name)
241
        if not obj_class:
242
            raise ObjectFormatException("Not a known type: %s" % type_name)
243
        return obj_class()
244

  
245
    def _parse_legacy_object(self, map):
246
        """Parse a legacy object, setting the raw string."""
247
        text = _decompress(map)
248
        header_end = text.find(b'\0')
249
        if header_end < 0:
250
            raise ObjectFormatException("Invalid object header, no \\0")
251
        self.set_raw_string(text[header_end+1:])
252

  
253
    def as_legacy_object_chunks(self):
254
        """Return chunks representing the object in the experimental format.
255

  
256
        :return: List of strings
257
        """
258
        compobj = zlib.compressobj()
259
        yield compobj.compress(self._header())
260
        for chunk in self.as_raw_chunks():
261
            yield compobj.compress(chunk)
262
        yield compobj.flush()
263

  
264
    def as_legacy_object(self):
265
        """Return string representing the object in the experimental format.
266
        """
267
        return b''.join(self.as_legacy_object_chunks())
268

  
269
    def as_raw_chunks(self):
270
        """Return chunks with serialization of the object.
271

  
272
        :return: List of strings, not necessarily one per line
273
        """
274
        if self._needs_serialization:
275
            self._sha = None
276
            self._chunked_text = self._serialize()
277
            self._needs_serialization = False
278
        return self._chunked_text
279

  
280
    def as_raw_string(self):
281
        """Return raw string with serialization of the object.
282

  
283
        :return: String object
284
        """
285
        return b''.join(self.as_raw_chunks())
286

  
287
    def __str__(self):
288
        """Return raw string serialization of this object."""
289
        return self.as_raw_string()
290

  
291
    def __hash__(self):
292
        """Return unique hash for this object."""
293
        return hash(self.id)
294

  
295
    def as_pretty_string(self):
296
        """Return a string representing this object, fit for display."""
297
        return self.as_raw_string()
298

  
299
    def set_raw_string(self, text, sha=None):
300
        """Set the contents of this object from a serialized string."""
301
        if not isinstance(text, bytes):
302
            raise TypeError('Expected bytes for text, got %r' % text)
303
        self.set_raw_chunks([text], sha)
304

  
305
    def set_raw_chunks(self, chunks, sha=None):
306
        """Set the contents of this object from a list of chunks."""
307
        self._chunked_text = chunks
308
        self._deserialize(chunks)
309
        if sha is None:
310
            self._sha = None
311
        else:
312
            self._sha = FixedSha(sha)
313
        self._needs_serialization = False
314

  
315
    @staticmethod
316
    def _parse_object_header(magic, f):
317
        """Parse a new style object, creating it but not reading the file."""
318
        num_type = (ord(magic[0:1]) >> 4) & 7
319
        obj_class = object_class(num_type)
320
        if not obj_class:
321
            raise ObjectFormatException("Not a known type %d" % num_type)
322
        return obj_class()
323

  
324
    def _parse_object(self, map):
325
        """Parse a new style object, setting self._text."""
326
        # skip type and size; type must have already been determined, and
327
        # we trust zlib to fail if it's otherwise corrupted
328
        byte = ord(map[0:1])
329
        used = 1
330
        while (byte & 0x80) != 0:
331
            byte = ord(map[used:used+1])
332
            used += 1
333
        raw = map[used:]
334
        self.set_raw_string(_decompress(raw))
335

  
336
    @classmethod
337
    def _is_legacy_object(cls, magic):
338
        b0 = ord(magic[0:1])
339
        b1 = ord(magic[1:2])
340
        word = (b0 << 8) + b1
341
        return (b0 & 0x8F) == 0x08 and (word % 31) == 0
342

  
343
    @classmethod
344
    def _parse_file(cls, f):
345
        map = f.read()
346
        if cls._is_legacy_object(map):
347
            obj = cls._parse_legacy_object_header(map, f)
348
            obj._parse_legacy_object(map)
349
        else:
350
            obj = cls._parse_object_header(map, f)
351
            obj._parse_object(map)
352
        return obj
353

  
354
    def __init__(self):
355
        """Don't call this directly"""
356
        self._sha = None
357
        self._chunked_text = []
358
        self._needs_serialization = True
359

  
360
    def _deserialize(self, chunks):
361
        raise NotImplementedError(self._deserialize)
362

  
363
    def _serialize(self):
364
        raise NotImplementedError(self._serialize)
365

  
366
    @classmethod
367
    def from_path(cls, path):
368
        """Open a SHA file from disk."""
369
        with GitFile(path, 'rb') as f:
370
            return cls.from_file(f)
371

  
372
    @classmethod
373
    def from_file(cls, f):
374
        """Get the contents of a SHA file on disk."""
375
        try:
376
            obj = cls._parse_file(f)
377
            obj._sha = None
378
            return obj
379
        except (IndexError, ValueError):
380
            raise ObjectFormatException("invalid object header")
381

  
382
    @staticmethod
383
    def from_raw_string(type_num, string, sha=None):
384
        """Creates an object of the indicated type from the raw string given.
385

  
386
        :param type_num: The numeric type of the object.
387
        :param string: The raw uncompressed contents.
388
        :param sha: Optional known sha for the object
389
        """
390
        obj = object_class(type_num)()
391
        obj.set_raw_string(string, sha)
392
        return obj
393

  
394
    @staticmethod
395
    def from_raw_chunks(type_num, chunks, sha=None):
396
        """Creates an object of the indicated type from the raw chunks given.
397

  
398
        :param type_num: The numeric type of the object.
399
        :param chunks: An iterable of the raw uncompressed contents.
400
        :param sha: Optional known sha for the object
401
        """
402
        obj = object_class(type_num)()
403
        obj.set_raw_chunks(chunks, sha)
404
        return obj
405

  
406
    @classmethod
407
    def from_string(cls, string):
408
        """Create a ShaFile from a string."""
409
        obj = cls()
410
        obj.set_raw_string(string)
411
        return obj
412

  
413
    def _check_has_member(self, member, error_msg):
414
        """Check that the object has a given member variable.
415

  
416
        :param member: the member variable to check for
417
        :param error_msg: the message for an error if the member is missing
418
        :raise ObjectFormatException: with the given error_msg if member is
419
            missing or is None
420
        """
421
        if getattr(self, member, None) is None:
422
            raise ObjectFormatException(error_msg)
423

  
424
    def check(self):
425
        """Check this object for internal consistency.
426

  
427
        :raise ObjectFormatException: if the object is malformed in some way
428
        :raise ChecksumMismatch: if the object was created with a SHA that does
429
            not match its contents
430
        """
431
        # TODO: if we find that error-checking during object parsing is a
432
        # performance bottleneck, those checks should be moved to the class's
433
        # check() method during optimization so we can still check the object
434
        # when necessary.
435
        old_sha = self.id
436
        try:
437
            self._deserialize(self.as_raw_chunks())
438
            self._sha = None
439
            new_sha = self.id
440
        except Exception as e:
441
            raise ObjectFormatException(e)
442
        if old_sha != new_sha:
443
            raise ChecksumMismatch(new_sha, old_sha)
444

  
445
    def _header(self):
446
        return object_header(self.type, self.raw_length())
447

  
448
    def raw_length(self):
449
        """Returns the length of the raw string of this object."""
450
        ret = 0
451
        for chunk in self.as_raw_chunks():
452
            ret += len(chunk)
453
        return ret
454

  
455
    def sha(self):
456
        """The SHA1 object that is the name of this object."""
457
        if self._sha is None or self._needs_serialization:
458
            # this is a local because as_raw_chunks() overwrites self._sha
459
            new_sha = sha1()
460
            new_sha.update(self._header())
461
            for chunk in self.as_raw_chunks():
462
                new_sha.update(chunk)
463
            self._sha = new_sha
464
        return self._sha
465

  
466
    def copy(self):
467
        """Create a new copy of this SHA1 object from its raw string"""
468
        obj_class = object_class(self.get_type())
469
        return obj_class.from_raw_string(
470
            self.get_type(),
471
            self.as_raw_string(),
472
            self.id)
473

  
474
    @property
475
    def id(self):
476
        """The hex SHA of this object."""
477
        return self.sha().hexdigest().encode('ascii')
478

  
479
    def get_type(self):
480
        """Return the type number for this object class."""
481
        return self.type_num
482

  
483
    def set_type(self, type):
484
        """Set the type number for this object class."""
485
        self.type_num = type
486

  
487
    # DEPRECATED: use type_num or type_name as needed.
488
    type = property(get_type, set_type)
489

  
490
    def __repr__(self):
491
        return "<%s %s>" % (self.__class__.__name__, self.id)
492

  
493
    def __ne__(self, other):
494
        return not isinstance(other, ShaFile) or self.id != other.id
495

  
496
    def __eq__(self, other):
497
        """Return True if the SHAs of the two objects match.
498

  
499
        It doesn't make sense to talk about an order on ShaFiles, so we don't
500
        override the rich comparison methods (__le__, etc.).
501
        """
502
        return isinstance(other, ShaFile) and self.id == other.id
503

  
504
    def __lt__(self, other):
505
        if not isinstance(other, ShaFile):
506
            raise TypeError
507
        return self.id < other.id
508

  
509
    def __le__(self, other):
510
        if not isinstance(other, ShaFile):
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff