Revision 959
org.gvsig.scripting/trunk/org.gvsig.scripting/org.gvsig.scripting.app/org.gvsig.scripting.app.mainplugin/src/main/resources-plugin/scripting/lib/.directory | ||
---|---|---|
1 |
[Dolphin] |
|
2 |
Timestamp=2017,10,11,19,8,11 |
|
3 |
Version=3 |
|
4 |
ViewMode=1 |
org.gvsig.scripting/trunk/org.gvsig.scripting/org.gvsig.scripting.app/org.gvsig.scripting.app.mainplugin/src/main/resources-plugin/scripting/lib/dulwich/lru_cache.py | ||
---|---|---|
1 |
# lru_cache.py -- Simple LRU cache for dulwich |
|
2 |
# Copyright (C) 2006, 2008 Canonical Ltd |
|
3 |
# |
|
4 |
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU |
|
5 |
# General Public License as public by the Free Software Foundation; version 2.0 |
|
6 |
# or (at your option) any later version. You can redistribute it and/or |
|
7 |
# modify it under the terms of either of these two licenses. |
|
8 |
# |
|
9 |
# Unless required by applicable law or agreed to in writing, software |
|
10 |
# distributed under the License is distributed on an "AS IS" BASIS, |
|
11 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
12 |
# See the License for the specific language governing permissions and |
|
13 |
# limitations under the License. |
|
14 |
# |
|
15 |
# You should have received a copy of the licenses; if not, see |
|
16 |
# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License |
|
17 |
# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache |
|
18 |
# License, Version 2.0. |
|
19 |
# |
|
20 |
|
|
21 |
"""A simple least-recently-used (LRU) cache.""" |
|
22 |
|
|
23 |
_null_key = object() |
|
24 |
|
|
25 |
|
|
26 |
class _LRUNode(object): |
|
27 |
"""This maintains the linked-list which is the lru internals.""" |
|
28 |
|
|
29 |
__slots__ = ('prev', 'next_key', 'key', 'value', 'cleanup', 'size') |
|
30 |
|
|
31 |
def __init__(self, key, value, cleanup=None): |
|
32 |
self.prev = None |
|
33 |
self.next_key = _null_key |
|
34 |
self.key = key |
|
35 |
self.value = value |
|
36 |
self.cleanup = cleanup |
|
37 |
# TODO: We could compute this 'on-the-fly' like we used to, and remove |
|
38 |
# one pointer from this object, we just need to decide if it |
|
39 |
# actually costs us much of anything in normal usage |
|
40 |
self.size = None |
|
41 |
|
|
42 |
def __repr__(self): |
|
43 |
if self.prev is None: |
|
44 |
prev_key = None |
|
45 |
else: |
|
46 |
prev_key = self.prev.key |
|
47 |
return '%s(%r n:%r p:%r)' % (self.__class__.__name__, self.key, |
|
48 |
self.next_key, prev_key) |
|
49 |
|
|
50 |
def run_cleanup(self): |
|
51 |
if self.cleanup is not None: |
|
52 |
self.cleanup(self.key, self.value) |
|
53 |
self.cleanup = None |
|
54 |
# Just make sure to break any refcycles, etc |
|
55 |
self.value = None |
|
56 |
|
|
57 |
|
|
58 |
class LRUCache(object): |
|
59 |
"""A class which manages a cache of entries, removing unused ones.""" |
|
60 |
|
|
61 |
def __init__(self, max_cache=100, after_cleanup_count=None): |
|
62 |
self._cache = {} |
|
63 |
# The "HEAD" of the lru linked list |
|
64 |
self._most_recently_used = None |
|
65 |
# The "TAIL" of the lru linked list |
|
66 |
self._least_recently_used = None |
|
67 |
self._update_max_cache(max_cache, after_cleanup_count) |
|
68 |
|
|
69 |
def __contains__(self, key): |
|
70 |
return key in self._cache |
|
71 |
|
|
72 |
def __getitem__(self, key): |
|
73 |
cache = self._cache |
|
74 |
node = cache[key] |
|
75 |
# Inlined from _record_access to decrease the overhead of __getitem__ |
|
76 |
# We also have more knowledge about structure if __getitem__ is |
|
77 |
# succeeding, then we know that self._most_recently_used must not be |
|
78 |
# None, etc. |
|
79 |
mru = self._most_recently_used |
|
80 |
if node is mru: |
|
81 |
# Nothing to do, this node is already at the head of the queue |
|
82 |
return node.value |
|
83 |
# Remove this node from the old location |
|
84 |
node_prev = node.prev |
|
85 |
next_key = node.next_key |
|
86 |
# benchmarking shows that the lookup of _null_key in globals is faster |
|
87 |
# than the attribute lookup for (node is self._least_recently_used) |
|
88 |
if next_key is _null_key: |
|
89 |
# 'node' is the _least_recently_used, because it doesn't have a |
|
90 |
# 'next' item. So move the current lru to the previous node. |
|
91 |
self._least_recently_used = node_prev |
|
92 |
else: |
|
93 |
node_next = cache[next_key] |
|
94 |
node_next.prev = node_prev |
|
95 |
node_prev.next_key = next_key |
|
96 |
# Insert this node at the front of the list |
|
97 |
node.next_key = mru.key |
|
98 |
mru.prev = node |
|
99 |
self._most_recently_used = node |
|
100 |
node.prev = None |
|
101 |
return node.value |
|
102 |
|
|
103 |
def __len__(self): |
|
104 |
return len(self._cache) |
|
105 |
|
|
106 |
def _walk_lru(self): |
|
107 |
"""Walk the LRU list, only meant to be used in tests.""" |
|
108 |
node = self._most_recently_used |
|
109 |
if node is not None: |
|
110 |
if node.prev is not None: |
|
111 |
raise AssertionError('the _most_recently_used entry is not' |
|
112 |
' supposed to have a previous entry' |
|
113 |
' %s' % (node,)) |
|
114 |
while node is not None: |
|
115 |
if node.next_key is _null_key: |
|
116 |
if node is not self._least_recently_used: |
|
117 |
raise AssertionError('only the last node should have' |
|
118 |
' no next value: %s' % (node,)) |
|
119 |
node_next = None |
|
120 |
else: |
|
121 |
node_next = self._cache[node.next_key] |
|
122 |
if node_next.prev is not node: |
|
123 |
raise AssertionError('inconsistency found, node.next.prev' |
|
124 |
' != node: %s' % (node,)) |
|
125 |
if node.prev is None: |
|
126 |
if node is not self._most_recently_used: |
|
127 |
raise AssertionError('only the _most_recently_used should' |
|
128 |
' not have a previous node: %s' |
|
129 |
% (node,)) |
|
130 |
else: |
|
131 |
if node.prev.next_key != node.key: |
|
132 |
raise AssertionError('inconsistency found, node.prev.next' |
|
133 |
' != node: %s' % (node,)) |
|
134 |
yield node |
|
135 |
node = node_next |
|
136 |
|
|
137 |
def add(self, key, value, cleanup=None): |
|
138 |
"""Add a new value to the cache. |
|
139 |
|
|
140 |
Also, if the entry is ever removed from the cache, call |
|
141 |
cleanup(key, value). |
|
142 |
|
|
143 |
:param key: The key to store it under |
|
144 |
:param value: The object to store |
|
145 |
:param cleanup: None or a function taking (key, value) to indicate |
|
146 |
'value' should be cleaned up. |
|
147 |
""" |
|
148 |
if key is _null_key: |
|
149 |
raise ValueError('cannot use _null_key as a key') |
|
150 |
if key in self._cache: |
|
151 |
node = self._cache[key] |
|
152 |
node.run_cleanup() |
|
153 |
node.value = value |
|
154 |
node.cleanup = cleanup |
|
155 |
else: |
|
156 |
node = _LRUNode(key, value, cleanup=cleanup) |
|
157 |
self._cache[key] = node |
|
158 |
self._record_access(node) |
|
159 |
|
|
160 |
if len(self._cache) > self._max_cache: |
|
161 |
# Trigger the cleanup |
|
162 |
self.cleanup() |
|
163 |
|
|
164 |
def cache_size(self): |
|
165 |
"""Get the number of entries we will cache.""" |
|
166 |
return self._max_cache |
|
167 |
|
|
168 |
def get(self, key, default=None): |
|
169 |
node = self._cache.get(key, None) |
|
170 |
if node is None: |
|
171 |
return default |
|
172 |
self._record_access(node) |
|
173 |
return node.value |
|
174 |
|
|
175 |
def keys(self): |
|
176 |
"""Get the list of keys currently cached. |
|
177 |
|
|
178 |
Note that values returned here may not be available by the time you |
|
179 |
request them later. This is simply meant as a peak into the current |
|
180 |
state. |
|
181 |
|
|
182 |
:return: An unordered list of keys that are currently cached. |
|
183 |
""" |
|
184 |
return self._cache.keys() |
|
185 |
|
|
186 |
def items(self): |
|
187 |
"""Get the key:value pairs as a dict.""" |
|
188 |
return dict((k, n.value) for k, n in self._cache.items()) |
|
189 |
|
|
190 |
def cleanup(self): |
|
191 |
"""Clear the cache until it shrinks to the requested size. |
|
192 |
|
|
193 |
This does not completely wipe the cache, just makes sure it is under |
|
194 |
the after_cleanup_count. |
|
195 |
""" |
|
196 |
# Make sure the cache is shrunk to the correct size |
|
197 |
while len(self._cache) > self._after_cleanup_count: |
|
198 |
self._remove_lru() |
|
199 |
|
|
200 |
def __setitem__(self, key, value): |
|
201 |
"""Add a value to the cache, there will be no cleanup function.""" |
|
202 |
self.add(key, value, cleanup=None) |
|
203 |
|
|
204 |
def _record_access(self, node): |
|
205 |
"""Record that key was accessed.""" |
|
206 |
# Move 'node' to the front of the queue |
|
207 |
if self._most_recently_used is None: |
|
208 |
self._most_recently_used = node |
|
209 |
self._least_recently_used = node |
|
210 |
return |
|
211 |
elif node is self._most_recently_used: |
|
212 |
# Nothing to do, this node is already at the head of the queue |
|
213 |
return |
|
214 |
# We've taken care of the tail pointer, remove the node, and insert it |
|
215 |
# at the front |
|
216 |
# REMOVE |
|
217 |
if node is self._least_recently_used: |
|
218 |
self._least_recently_used = node.prev |
|
219 |
if node.prev is not None: |
|
220 |
node.prev.next_key = node.next_key |
|
221 |
if node.next_key is not _null_key: |
|
222 |
node_next = self._cache[node.next_key] |
|
223 |
node_next.prev = node.prev |
|
224 |
# INSERT |
|
225 |
node.next_key = self._most_recently_used.key |
|
226 |
self._most_recently_used.prev = node |
|
227 |
self._most_recently_used = node |
|
228 |
node.prev = None |
|
229 |
|
|
230 |
def _remove_node(self, node): |
|
231 |
if node is self._least_recently_used: |
|
232 |
self._least_recently_used = node.prev |
|
233 |
self._cache.pop(node.key) |
|
234 |
# If we have removed all entries, remove the head pointer as well |
|
235 |
if self._least_recently_used is None: |
|
236 |
self._most_recently_used = None |
|
237 |
node.run_cleanup() |
|
238 |
# Now remove this node from the linked list |
|
239 |
if node.prev is not None: |
|
240 |
node.prev.next_key = node.next_key |
|
241 |
if node.next_key is not _null_key: |
|
242 |
node_next = self._cache[node.next_key] |
|
243 |
node_next.prev = node.prev |
|
244 |
# And remove this node's pointers |
|
245 |
node.prev = None |
|
246 |
node.next_key = _null_key |
|
247 |
|
|
248 |
def _remove_lru(self): |
|
249 |
"""Remove one entry from the lru, and handle consequences. |
|
250 |
|
|
251 |
If there are no more references to the lru, then this entry should be |
|
252 |
removed from the cache. |
|
253 |
""" |
|
254 |
self._remove_node(self._least_recently_used) |
|
255 |
|
|
256 |
def clear(self): |
|
257 |
"""Clear out all of the cache.""" |
|
258 |
# Clean up in LRU order |
|
259 |
while self._cache: |
|
260 |
self._remove_lru() |
|
261 |
|
|
262 |
def resize(self, max_cache, after_cleanup_count=None): |
|
263 |
"""Change the number of entries that will be cached.""" |
|
264 |
self._update_max_cache(max_cache, |
|
265 |
after_cleanup_count=after_cleanup_count) |
|
266 |
|
|
267 |
def _update_max_cache(self, max_cache, after_cleanup_count=None): |
|
268 |
self._max_cache = max_cache |
|
269 |
if after_cleanup_count is None: |
|
270 |
self._after_cleanup_count = self._max_cache * 8 / 10 |
|
271 |
else: |
|
272 |
self._after_cleanup_count = min(after_cleanup_count, |
|
273 |
self._max_cache) |
|
274 |
self.cleanup() |
|
275 |
|
|
276 |
|
|
277 |
class LRUSizeCache(LRUCache): |
|
278 |
"""An LRUCache that removes things based on the size of the values. |
|
279 |
|
|
280 |
This differs in that it doesn't care how many actual items there are, |
|
281 |
it just restricts the cache to be cleaned up after so much data is stored. |
|
282 |
|
|
283 |
The size of items added will be computed using compute_size(value), which |
|
284 |
defaults to len() if not supplied. |
|
285 |
""" |
|
286 |
|
|
287 |
def __init__(self, max_size=1024*1024, after_cleanup_size=None, |
|
288 |
compute_size=None): |
|
289 |
"""Create a new LRUSizeCache. |
|
290 |
|
|
291 |
:param max_size: The max number of bytes to store before we start |
|
292 |
clearing out entries. |
|
293 |
:param after_cleanup_size: After cleaning up, shrink everything to this |
|
294 |
size. |
|
295 |
:param compute_size: A function to compute the size of the values. We |
|
296 |
use a function here, so that you can pass 'len' if you are just |
|
297 |
using simple strings, or a more complex function if you are using |
|
298 |
something like a list of strings, or even a custom object. |
|
299 |
The function should take the form "compute_size(value) => integer". |
|
300 |
If not supplied, it defaults to 'len()' |
|
301 |
""" |
|
302 |
self._value_size = 0 |
|
303 |
self._compute_size = compute_size |
|
304 |
if compute_size is None: |
|
305 |
self._compute_size = len |
|
306 |
self._update_max_size(max_size, after_cleanup_size=after_cleanup_size) |
|
307 |
LRUCache.__init__(self, max_cache=max(int(max_size/512), 1)) |
|
308 |
|
|
309 |
def add(self, key, value, cleanup=None): |
|
310 |
"""Add a new value to the cache. |
|
311 |
|
|
312 |
Also, if the entry is ever removed from the cache, call |
|
313 |
cleanup(key, value). |
|
314 |
|
|
315 |
:param key: The key to store it under |
|
316 |
:param value: The object to store |
|
317 |
:param cleanup: None or a function taking (key, value) to indicate |
|
318 |
'value' should be cleaned up. |
|
319 |
""" |
|
320 |
if key is _null_key: |
|
321 |
raise ValueError('cannot use _null_key as a key') |
|
322 |
node = self._cache.get(key, None) |
|
323 |
value_len = self._compute_size(value) |
|
324 |
if value_len >= self._after_cleanup_size: |
|
325 |
# The new value is 'too big to fit', as it would fill up/overflow |
|
326 |
# the cache all by itself |
|
327 |
if node is not None: |
|
328 |
# We won't be replacing the old node, so just remove it |
|
329 |
self._remove_node(node) |
|
330 |
if cleanup is not None: |
|
331 |
cleanup(key, value) |
|
332 |
return |
|
333 |
if node is None: |
|
334 |
node = _LRUNode(key, value, cleanup=cleanup) |
|
335 |
self._cache[key] = node |
|
336 |
else: |
|
337 |
self._value_size -= node.size |
|
338 |
node.size = value_len |
|
339 |
self._value_size += value_len |
|
340 |
self._record_access(node) |
|
341 |
|
|
342 |
if self._value_size > self._max_size: |
|
343 |
# Time to cleanup |
|
344 |
self.cleanup() |
|
345 |
|
|
346 |
def cleanup(self): |
|
347 |
"""Clear the cache until it shrinks to the requested size. |
|
348 |
|
|
349 |
This does not completely wipe the cache, just makes sure it is under |
|
350 |
the after_cleanup_size. |
|
351 |
""" |
|
352 |
# Make sure the cache is shrunk to the correct size |
|
353 |
while self._value_size > self._after_cleanup_size: |
|
354 |
self._remove_lru() |
|
355 |
|
|
356 |
def _remove_node(self, node): |
|
357 |
self._value_size -= node.size |
|
358 |
LRUCache._remove_node(self, node) |
|
359 |
|
|
360 |
def resize(self, max_size, after_cleanup_size=None): |
|
361 |
"""Change the number of bytes that will be cached.""" |
|
362 |
self._update_max_size(max_size, after_cleanup_size=after_cleanup_size) |
|
363 |
max_cache = max(int(max_size/512), 1) |
|
364 |
self._update_max_cache(max_cache) |
|
365 |
|
|
366 |
def _update_max_size(self, max_size, after_cleanup_size=None): |
|
367 |
self._max_size = max_size |
|
368 |
if after_cleanup_size is None: |
|
369 |
self._after_cleanup_size = self._max_size * 8 // 10 |
|
370 |
else: |
|
371 |
self._after_cleanup_size = min(after_cleanup_size, self._max_size) |
org.gvsig.scripting/trunk/org.gvsig.scripting/org.gvsig.scripting.app/org.gvsig.scripting.app.mainplugin/src/main/resources-plugin/scripting/lib/dulwich/diff_tree.py | ||
---|---|---|
1 |
# diff_tree.py -- Utilities for diffing files and trees. |
|
2 |
# Copyright (C) 2010 Google, Inc. |
|
3 |
# |
|
4 |
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU |
|
5 |
# General Public License as public by the Free Software Foundation; version 2.0 |
|
6 |
# or (at your option) any later version. You can redistribute it and/or |
|
7 |
# modify it under the terms of either of these two licenses. |
|
8 |
# |
|
9 |
# Unless required by applicable law or agreed to in writing, software |
|
10 |
# distributed under the License is distributed on an "AS IS" BASIS, |
|
11 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
12 |
# See the License for the specific language governing permissions and |
|
13 |
# limitations under the License. |
|
14 |
# |
|
15 |
# You should have received a copy of the licenses; if not, see |
|
16 |
# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License |
|
17 |
# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache |
|
18 |
# License, Version 2.0. |
|
19 |
# |
|
20 |
|
|
21 |
"""Utilities for diffing files and trees.""" |
|
22 |
import sys |
|
23 |
from collections import ( |
|
24 |
defaultdict, |
|
25 |
namedtuple, |
|
26 |
) |
|
27 |
|
|
28 |
from io import BytesIO |
|
29 |
from itertools import chain |
|
30 |
import stat |
|
31 |
|
|
32 |
from dulwich.objects import ( |
|
33 |
S_ISGITLINK, |
|
34 |
TreeEntry, |
|
35 |
) |
|
36 |
|
|
37 |
|
|
38 |
# TreeChange type constants. |
|
39 |
CHANGE_ADD = 'add' |
|
40 |
CHANGE_MODIFY = 'modify' |
|
41 |
CHANGE_DELETE = 'delete' |
|
42 |
CHANGE_RENAME = 'rename' |
|
43 |
CHANGE_COPY = 'copy' |
|
44 |
CHANGE_UNCHANGED = 'unchanged' |
|
45 |
|
|
46 |
RENAME_CHANGE_TYPES = (CHANGE_RENAME, CHANGE_COPY) |
|
47 |
|
|
48 |
_NULL_ENTRY = TreeEntry(None, None, None) |
|
49 |
|
|
50 |
_MAX_SCORE = 100 |
|
51 |
RENAME_THRESHOLD = 60 |
|
52 |
MAX_FILES = 200 |
|
53 |
REWRITE_THRESHOLD = None |
|
54 |
|
|
55 |
|
|
56 |
class TreeChange(namedtuple('TreeChange', ['type', 'old', 'new'])): |
|
57 |
"""Named tuple a single change between two trees.""" |
|
58 |
|
|
59 |
@classmethod |
|
60 |
def add(cls, new): |
|
61 |
return cls(CHANGE_ADD, _NULL_ENTRY, new) |
|
62 |
|
|
63 |
@classmethod |
|
64 |
def delete(cls, old): |
|
65 |
return cls(CHANGE_DELETE, old, _NULL_ENTRY) |
|
66 |
|
|
67 |
|
|
68 |
def _tree_entries(path, tree): |
|
69 |
result = [] |
|
70 |
if not tree: |
|
71 |
return result |
|
72 |
for entry in tree.iteritems(name_order=True): |
|
73 |
result.append(entry.in_path(path)) |
|
74 |
return result |
|
75 |
|
|
76 |
|
|
77 |
def _merge_entries(path, tree1, tree2): |
|
78 |
"""Merge the entries of two trees. |
|
79 |
|
|
80 |
:param path: A path to prepend to all tree entry names. |
|
81 |
:param tree1: The first Tree object to iterate, or None. |
|
82 |
:param tree2: The second Tree object to iterate, or None. |
|
83 |
:return: A list of pairs of TreeEntry objects for each pair of entries in |
|
84 |
the trees. If an entry exists in one tree but not the other, the other |
|
85 |
entry will have all attributes set to None. If neither entry's path is |
|
86 |
None, they are guaranteed to match. |
|
87 |
""" |
|
88 |
entries1 = _tree_entries(path, tree1) |
|
89 |
entries2 = _tree_entries(path, tree2) |
|
90 |
i1 = i2 = 0 |
|
91 |
len1 = len(entries1) |
|
92 |
len2 = len(entries2) |
|
93 |
|
|
94 |
result = [] |
|
95 |
while i1 < len1 and i2 < len2: |
|
96 |
entry1 = entries1[i1] |
|
97 |
entry2 = entries2[i2] |
|
98 |
if entry1.path < entry2.path: |
|
99 |
result.append((entry1, _NULL_ENTRY)) |
|
100 |
i1 += 1 |
|
101 |
elif entry1.path > entry2.path: |
|
102 |
result.append((_NULL_ENTRY, entry2)) |
|
103 |
i2 += 1 |
|
104 |
else: |
|
105 |
result.append((entry1, entry2)) |
|
106 |
i1 += 1 |
|
107 |
i2 += 1 |
|
108 |
for i in range(i1, len1): |
|
109 |
result.append((entries1[i], _NULL_ENTRY)) |
|
110 |
for i in range(i2, len2): |
|
111 |
result.append((_NULL_ENTRY, entries2[i])) |
|
112 |
return result |
|
113 |
|
|
114 |
|
|
115 |
def _is_tree(entry): |
|
116 |
mode = entry.mode |
|
117 |
if mode is None: |
|
118 |
return False |
|
119 |
return stat.S_ISDIR(mode) |
|
120 |
|
|
121 |
|
|
122 |
def walk_trees(store, tree1_id, tree2_id, prune_identical=False): |
|
123 |
"""Recursively walk all the entries of two trees. |
|
124 |
|
|
125 |
Iteration is depth-first pre-order, as in e.g. os.walk. |
|
126 |
|
|
127 |
:param store: An ObjectStore for looking up objects. |
|
128 |
:param tree1_id: The SHA of the first Tree object to iterate, or None. |
|
129 |
:param tree2_id: The SHA of the second Tree object to iterate, or None. |
|
130 |
:param prune_identical: If True, identical subtrees will not be walked. |
|
131 |
:return: Iterator over Pairs of TreeEntry objects for each pair of entries |
|
132 |
in the trees and their subtrees recursively. If an entry exists in one |
|
133 |
tree but not the other, the other entry will have all attributes set |
|
134 |
to None. If neither entry's path is None, they are guaranteed to |
|
135 |
match. |
|
136 |
""" |
|
137 |
# This could be fairly easily generalized to >2 trees if we find a use |
|
138 |
# case. |
|
139 |
mode1 = tree1_id and stat.S_IFDIR or None |
|
140 |
mode2 = tree2_id and stat.S_IFDIR or None |
|
141 |
todo = [(TreeEntry(b'', mode1, tree1_id), TreeEntry(b'', mode2, tree2_id))] |
|
142 |
while todo: |
|
143 |
entry1, entry2 = todo.pop() |
|
144 |
is_tree1 = _is_tree(entry1) |
|
145 |
is_tree2 = _is_tree(entry2) |
|
146 |
if prune_identical and is_tree1 and is_tree2 and entry1 == entry2: |
|
147 |
continue |
|
148 |
|
|
149 |
tree1 = is_tree1 and store[entry1.sha] or None |
|
150 |
tree2 = is_tree2 and store[entry2.sha] or None |
|
151 |
path = entry1.path or entry2.path |
|
152 |
todo.extend(reversed(_merge_entries(path, tree1, tree2))) |
|
153 |
yield entry1, entry2 |
|
154 |
|
|
155 |
|
|
156 |
def _skip_tree(entry): |
|
157 |
if entry.mode is None or stat.S_ISDIR(entry.mode): |
|
158 |
return _NULL_ENTRY |
|
159 |
return entry |
|
160 |
|
|
161 |
|
|
162 |
def tree_changes(store, tree1_id, tree2_id, want_unchanged=False, |
|
163 |
rename_detector=None): |
|
164 |
"""Find the differences between the contents of two trees. |
|
165 |
|
|
166 |
:param store: An ObjectStore for looking up objects. |
|
167 |
:param tree1_id: The SHA of the source tree. |
|
168 |
:param tree2_id: The SHA of the target tree. |
|
169 |
:param want_unchanged: If True, include TreeChanges for unmodified entries |
|
170 |
as well. |
|
171 |
:param rename_detector: RenameDetector object for detecting renames. |
|
172 |
:return: Iterator over TreeChange instances for each change between the |
|
173 |
source and target tree. |
|
174 |
""" |
|
175 |
if (rename_detector is not None and tree1_id is not None and |
|
176 |
tree2_id is not None): |
|
177 |
for change in rename_detector.changes_with_renames( |
|
178 |
tree1_id, tree2_id, want_unchanged=want_unchanged): |
|
179 |
yield change |
|
180 |
return |
|
181 |
|
|
182 |
entries = walk_trees(store, tree1_id, tree2_id, |
|
183 |
prune_identical=(not want_unchanged)) |
|
184 |
for entry1, entry2 in entries: |
|
185 |
if entry1 == entry2 and not want_unchanged: |
|
186 |
continue |
|
187 |
|
|
188 |
# Treat entries for trees as missing. |
|
189 |
entry1 = _skip_tree(entry1) |
|
190 |
entry2 = _skip_tree(entry2) |
|
191 |
|
|
192 |
if entry1 != _NULL_ENTRY and entry2 != _NULL_ENTRY: |
|
193 |
if stat.S_IFMT(entry1.mode) != stat.S_IFMT(entry2.mode): |
|
194 |
# File type changed: report as delete/add. |
|
195 |
yield TreeChange.delete(entry1) |
|
196 |
entry1 = _NULL_ENTRY |
|
197 |
change_type = CHANGE_ADD |
|
198 |
elif entry1 == entry2: |
|
199 |
change_type = CHANGE_UNCHANGED |
|
200 |
else: |
|
201 |
change_type = CHANGE_MODIFY |
|
202 |
elif entry1 != _NULL_ENTRY: |
|
203 |
change_type = CHANGE_DELETE |
|
204 |
elif entry2 != _NULL_ENTRY: |
|
205 |
change_type = CHANGE_ADD |
|
206 |
else: |
|
207 |
# Both were None because at least one was a tree. |
|
208 |
continue |
|
209 |
yield TreeChange(change_type, entry1, entry2) |
|
210 |
|
|
211 |
|
|
212 |
def _all_eq(seq, key, value): |
|
213 |
for e in seq: |
|
214 |
if key(e) != value: |
|
215 |
return False |
|
216 |
return True |
|
217 |
|
|
218 |
|
|
219 |
def _all_same(seq, key): |
|
220 |
return _all_eq(seq[1:], key, key(seq[0])) |
|
221 |
|
|
222 |
|
|
223 |
def tree_changes_for_merge(store, parent_tree_ids, tree_id, |
|
224 |
rename_detector=None): |
|
225 |
"""Get the tree changes for a merge tree relative to all its parents. |
|
226 |
|
|
227 |
:param store: An ObjectStore for looking up objects. |
|
228 |
:param parent_tree_ids: An iterable of the SHAs of the parent trees. |
|
229 |
:param tree_id: The SHA of the merge tree. |
|
230 |
:param rename_detector: RenameDetector object for detecting renames. |
|
231 |
|
|
232 |
:return: Iterator over lists of TreeChange objects, one per conflicted path |
|
233 |
in the merge. |
|
234 |
|
|
235 |
Each list contains one element per parent, with the TreeChange for that |
|
236 |
path relative to that parent. An element may be None if it never |
|
237 |
existed in one parent and was deleted in two others. |
|
238 |
|
|
239 |
A path is only included in the output if it is a conflict, i.e. its SHA |
|
240 |
in the merge tree is not found in any of the parents, or in the case of |
|
241 |
deletes, if not all of the old SHAs match. |
|
242 |
""" |
|
243 |
all_parent_changes = [tree_changes(store, t, tree_id, |
|
244 |
rename_detector=rename_detector) |
|
245 |
for t in parent_tree_ids] |
|
246 |
num_parents = len(parent_tree_ids) |
|
247 |
changes_by_path = defaultdict(lambda: [None] * num_parents) |
|
248 |
|
|
249 |
# Organize by path. |
|
250 |
for i, parent_changes in enumerate(all_parent_changes): |
|
251 |
for change in parent_changes: |
|
252 |
if change.type == CHANGE_DELETE: |
|
253 |
path = change.old.path |
|
254 |
else: |
|
255 |
path = change.new.path |
|
256 |
changes_by_path[path][i] = change |
|
257 |
|
|
258 |
old_sha = lambda c: c.old.sha |
|
259 |
change_type = lambda c: c.type |
|
260 |
|
|
261 |
# Yield only conflicting changes. |
|
262 |
for _, changes in sorted(changes_by_path.items()): |
|
263 |
assert len(changes) == num_parents |
|
264 |
have = [c for c in changes if c is not None] |
|
265 |
if _all_eq(have, change_type, CHANGE_DELETE): |
|
266 |
if not _all_same(have, old_sha): |
|
267 |
yield changes |
|
268 |
elif not _all_same(have, change_type): |
|
269 |
yield changes |
|
270 |
elif None not in changes: |
|
271 |
# If no change was found relative to one parent, that means the SHA |
|
272 |
# must have matched the SHA in that parent, so it is not a |
|
273 |
# conflict. |
|
274 |
yield changes |
|
275 |
|
|
276 |
|
|
277 |
_BLOCK_SIZE = 64 |
|
278 |
|
|
279 |
|
|
280 |
def _count_blocks(obj): |
|
281 |
"""Count the blocks in an object. |
|
282 |
|
|
283 |
Splits the data into blocks either on lines or <=64-byte chunks of lines. |
|
284 |
|
|
285 |
:param obj: The object to count blocks for. |
|
286 |
:return: A dict of block hashcode -> total bytes occurring. |
|
287 |
""" |
|
288 |
block_counts = defaultdict(int) |
|
289 |
block = BytesIO() |
|
290 |
n = 0 |
|
291 |
|
|
292 |
# Cache attrs as locals to avoid expensive lookups in the inner loop. |
|
293 |
block_write = block.write |
|
294 |
block_seek = block.seek |
|
295 |
block_truncate = block.truncate |
|
296 |
block_getvalue = block.getvalue |
|
297 |
|
|
298 |
for c in chain(*obj.as_raw_chunks()): |
|
299 |
if sys.version_info[0] == 3: |
|
300 |
c = c.to_bytes(1, 'big') |
|
301 |
block_write(c) |
|
302 |
n += 1 |
|
303 |
if c == b'\n' or n == _BLOCK_SIZE: |
|
304 |
value = block_getvalue() |
|
305 |
block_counts[hash(value)] += len(value) |
|
306 |
block_seek(0) |
|
307 |
block_truncate() |
|
308 |
n = 0 |
|
309 |
if n > 0: |
|
310 |
last_block = block_getvalue() |
|
311 |
block_counts[hash(last_block)] += len(last_block) |
|
312 |
return block_counts |
|
313 |
|
|
314 |
|
|
315 |
def _common_bytes(blocks1, blocks2): |
|
316 |
"""Count the number of common bytes in two block count dicts. |
|
317 |
|
|
318 |
:param block1: The first dict of block hashcode -> total bytes. |
|
319 |
:param block2: The second dict of block hashcode -> total bytes. |
|
320 |
:return: The number of bytes in common between blocks1 and blocks2. This is |
|
321 |
only approximate due to possible hash collisions. |
|
322 |
""" |
|
323 |
# Iterate over the smaller of the two dicts, since this is symmetrical. |
|
324 |
if len(blocks1) > len(blocks2): |
|
325 |
blocks1, blocks2 = blocks2, blocks1 |
|
326 |
score = 0 |
|
327 |
for block, count1 in blocks1.items(): |
|
328 |
count2 = blocks2.get(block) |
|
329 |
if count2: |
|
330 |
score += min(count1, count2) |
|
331 |
return score |
|
332 |
|
|
333 |
|
|
334 |
def _similarity_score(obj1, obj2, block_cache=None): |
|
335 |
"""Compute a similarity score for two objects. |
|
336 |
|
|
337 |
:param obj1: The first object to score. |
|
338 |
:param obj2: The second object to score. |
|
339 |
:param block_cache: An optional dict of SHA to block counts to cache |
|
340 |
results between calls. |
|
341 |
:return: The similarity score between the two objects, defined as the |
|
342 |
number of bytes in common between the two objects divided by the |
|
343 |
maximum size, scaled to the range 0-100. |
|
344 |
""" |
|
345 |
if block_cache is None: |
|
346 |
block_cache = {} |
|
347 |
if obj1.id not in block_cache: |
|
348 |
block_cache[obj1.id] = _count_blocks(obj1) |
|
349 |
if obj2.id not in block_cache: |
|
350 |
block_cache[obj2.id] = _count_blocks(obj2) |
|
351 |
|
|
352 |
common_bytes = _common_bytes(block_cache[obj1.id], block_cache[obj2.id]) |
|
353 |
max_size = max(obj1.raw_length(), obj2.raw_length()) |
|
354 |
if not max_size: |
|
355 |
return _MAX_SCORE |
|
356 |
return int(float(common_bytes) * _MAX_SCORE / max_size) |
|
357 |
|
|
358 |
|
|
359 |
def _tree_change_key(entry): |
|
360 |
# Sort by old path then new path. If only one exists, use it for both keys. |
|
361 |
path1 = entry.old.path |
|
362 |
path2 = entry.new.path |
|
363 |
if path1 is None: |
|
364 |
path1 = path2 |
|
365 |
if path2 is None: |
|
366 |
path2 = path1 |
|
367 |
return (path1, path2) |
|
368 |
|
|
369 |
|
|
370 |
class RenameDetector(object): |
|
371 |
"""Object for handling rename detection between two trees.""" |
|
372 |
|
|
373 |
def __init__(self, store, rename_threshold=RENAME_THRESHOLD, |
|
374 |
max_files=MAX_FILES, |
|
375 |
rewrite_threshold=REWRITE_THRESHOLD, |
|
376 |
find_copies_harder=False): |
|
377 |
"""Initialize the rename detector. |
|
378 |
|
|
379 |
:param store: An ObjectStore for looking up objects. |
|
380 |
:param rename_threshold: The threshold similarity score for considering |
|
381 |
an add/delete pair to be a rename/copy; see _similarity_score. |
|
382 |
:param max_files: The maximum number of adds and deletes to consider, |
|
383 |
or None for no limit. The detector is guaranteed to compare no more |
|
384 |
than max_files ** 2 add/delete pairs. This limit is provided because |
|
385 |
rename detection can be quadratic in the project size. If the limit |
|
386 |
is exceeded, no content rename detection is attempted. |
|
387 |
:param rewrite_threshold: The threshold similarity score below which a |
|
388 |
modify should be considered a delete/add, or None to not break |
|
389 |
modifies; see _similarity_score. |
|
390 |
:param find_copies_harder: If True, consider unmodified files when |
|
391 |
detecting copies. |
|
392 |
""" |
|
393 |
self._store = store |
|
394 |
self._rename_threshold = rename_threshold |
|
395 |
self._rewrite_threshold = rewrite_threshold |
|
396 |
self._max_files = max_files |
|
397 |
self._find_copies_harder = find_copies_harder |
|
398 |
self._want_unchanged = False |
|
399 |
|
|
400 |
def _reset(self): |
|
401 |
self._adds = [] |
|
402 |
self._deletes = [] |
|
403 |
self._changes = [] |
|
404 |
|
|
405 |
def _should_split(self, change): |
|
406 |
if (self._rewrite_threshold is None or change.type != CHANGE_MODIFY or |
|
407 |
change.old.sha == change.new.sha): |
|
408 |
return False |
|
409 |
old_obj = self._store[change.old.sha] |
|
410 |
new_obj = self._store[change.new.sha] |
|
411 |
return _similarity_score(old_obj, new_obj) < self._rewrite_threshold |
|
412 |
|
|
413 |
def _add_change(self, change): |
|
414 |
if change.type == CHANGE_ADD: |
|
415 |
self._adds.append(change) |
|
416 |
elif change.type == CHANGE_DELETE: |
|
417 |
self._deletes.append(change) |
|
418 |
elif self._should_split(change): |
|
419 |
self._deletes.append(TreeChange.delete(change.old)) |
|
420 |
self._adds.append(TreeChange.add(change.new)) |
|
421 |
elif ((self._find_copies_harder and change.type == CHANGE_UNCHANGED) |
|
422 |
or change.type == CHANGE_MODIFY): |
|
423 |
# Treat all modifies as potential deletes for rename detection, |
|
424 |
# but don't split them (to avoid spurious renames). Setting |
|
425 |
# find_copies_harder means we treat unchanged the same as |
|
426 |
# modified. |
|
427 |
self._deletes.append(change) |
|
428 |
else: |
|
429 |
self._changes.append(change) |
|
430 |
|
|
431 |
def _collect_changes(self, tree1_id, tree2_id): |
|
432 |
want_unchanged = self._find_copies_harder or self._want_unchanged |
|
433 |
for change in tree_changes(self._store, tree1_id, tree2_id, |
|
434 |
want_unchanged=want_unchanged): |
|
435 |
self._add_change(change) |
|
436 |
|
|
437 |
def _prune(self, add_paths, delete_paths): |
|
438 |
self._adds = [a for a in self._adds if a.new.path not in add_paths] |
|
439 |
self._deletes = [d for d in self._deletes |
|
440 |
if d.old.path not in delete_paths] |
|
441 |
|
|
442 |
def _find_exact_renames(self): |
|
443 |
add_map = defaultdict(list) |
|
444 |
for add in self._adds: |
|
445 |
add_map[add.new.sha].append(add.new) |
|
446 |
delete_map = defaultdict(list) |
|
447 |
for delete in self._deletes: |
|
448 |
# Keep track of whether the delete was actually marked as a delete. |
|
449 |
# If not, it needs to be marked as a copy. |
|
450 |
is_delete = delete.type == CHANGE_DELETE |
|
451 |
delete_map[delete.old.sha].append((delete.old, is_delete)) |
|
452 |
|
|
453 |
add_paths = set() |
|
454 |
delete_paths = set() |
|
455 |
for sha, sha_deletes in delete_map.items(): |
|
456 |
sha_adds = add_map[sha] |
|
457 |
for (old, is_delete), new in zip(sha_deletes, sha_adds): |
|
458 |
if stat.S_IFMT(old.mode) != stat.S_IFMT(new.mode): |
|
459 |
continue |
|
460 |
if is_delete: |
|
461 |
delete_paths.add(old.path) |
|
462 |
add_paths.add(new.path) |
|
463 |
new_type = is_delete and CHANGE_RENAME or CHANGE_COPY |
|
464 |
self._changes.append(TreeChange(new_type, old, new)) |
|
465 |
|
|
466 |
num_extra_adds = len(sha_adds) - len(sha_deletes) |
|
467 |
# TODO(dborowitz): Less arbitrary way of dealing with extra copies. |
|
468 |
old = sha_deletes[0][0] |
|
469 |
if num_extra_adds > 0: |
|
470 |
for new in sha_adds[-num_extra_adds:]: |
|
471 |
add_paths.add(new.path) |
|
472 |
self._changes.append(TreeChange(CHANGE_COPY, old, new)) |
|
473 |
self._prune(add_paths, delete_paths) |
|
474 |
|
|
475 |
def _should_find_content_renames(self): |
|
476 |
return len(self._adds) * len(self._deletes) <= self._max_files ** 2 |
|
477 |
|
|
478 |
def _rename_type(self, check_paths, delete, add): |
|
479 |
if check_paths and delete.old.path == add.new.path: |
|
480 |
# If the paths match, this must be a split modify, so make sure it |
|
481 |
# comes out as a modify. |
|
482 |
return CHANGE_MODIFY |
|
483 |
elif delete.type != CHANGE_DELETE: |
|
484 |
# If it's in deletes but not marked as a delete, it must have been |
|
485 |
# added due to find_copies_harder, and needs to be marked as a |
|
486 |
# copy. |
|
487 |
return CHANGE_COPY |
|
488 |
return CHANGE_RENAME |
|
489 |
|
|
490 |
def _find_content_rename_candidates(self): |
|
491 |
candidates = self._candidates = [] |
|
492 |
# TODO: Optimizations: |
|
493 |
# - Compare object sizes before counting blocks. |
|
494 |
# - Skip if delete's S_IFMT differs from all adds. |
|
495 |
# - Skip if adds or deletes is empty. |
|
496 |
# Match C git's behavior of not attempting to find content renames if |
|
497 |
# the matrix size exceeds the threshold. |
|
498 |
if not self._should_find_content_renames(): |
|
499 |
return |
|
500 |
|
|
501 |
block_cache = {} |
|
502 |
check_paths = self._rename_threshold is not None |
|
503 |
for delete in self._deletes: |
|
504 |
if S_ISGITLINK(delete.old.mode): |
|
505 |
continue # Git links don't exist in this repo. |
|
506 |
old_sha = delete.old.sha |
|
507 |
old_obj = self._store[old_sha] |
|
508 |
block_cache[old_sha] = _count_blocks(old_obj) |
|
509 |
for add in self._adds: |
|
510 |
if stat.S_IFMT(delete.old.mode) != stat.S_IFMT(add.new.mode): |
|
511 |
continue |
|
512 |
new_obj = self._store[add.new.sha] |
|
513 |
score = _similarity_score(old_obj, new_obj, |
|
514 |
block_cache=block_cache) |
|
515 |
if score > self._rename_threshold: |
|
516 |
new_type = self._rename_type(check_paths, delete, add) |
|
517 |
rename = TreeChange(new_type, delete.old, add.new) |
|
518 |
candidates.append((-score, rename)) |
|
519 |
|
|
520 |
def _choose_content_renames(self): |
|
521 |
# Sort scores from highest to lowest, but keep names in ascending |
|
522 |
# order. |
|
523 |
self._candidates.sort() |
|
524 |
|
|
525 |
delete_paths = set() |
|
526 |
add_paths = set() |
|
527 |
for _, change in self._candidates: |
|
528 |
new_path = change.new.path |
|
529 |
if new_path in add_paths: |
|
530 |
continue |
|
531 |
old_path = change.old.path |
|
532 |
orig_type = change.type |
|
533 |
if old_path in delete_paths: |
|
534 |
change = TreeChange(CHANGE_COPY, change.old, change.new) |
|
535 |
|
|
536 |
# If the candidate was originally a copy, that means it came from a |
|
537 |
# modified or unchanged path, so we don't want to prune it. |
|
538 |
if orig_type != CHANGE_COPY: |
|
539 |
delete_paths.add(old_path) |
|
540 |
add_paths.add(new_path) |
|
541 |
self._changes.append(change) |
|
542 |
self._prune(add_paths, delete_paths) |
|
543 |
|
|
544 |
def _join_modifies(self): |
|
545 |
if self._rewrite_threshold is None: |
|
546 |
return |
|
547 |
|
|
548 |
modifies = {} |
|
549 |
delete_map = dict((d.old.path, d) for d in self._deletes) |
|
550 |
for add in self._adds: |
|
551 |
path = add.new.path |
|
552 |
delete = delete_map.get(path) |
|
553 |
if (delete is not None and |
|
554 |
stat.S_IFMT(delete.old.mode) == stat.S_IFMT(add.new.mode)): |
|
555 |
modifies[path] = TreeChange(CHANGE_MODIFY, delete.old, add.new) |
|
556 |
|
|
557 |
self._adds = [a for a in self._adds if a.new.path not in modifies] |
|
558 |
self._deletes = [a for a in self._deletes if a.new.path not in |
|
559 |
modifies] |
|
560 |
self._changes += modifies.values() |
|
561 |
|
|
562 |
def _sorted_changes(self): |
|
563 |
result = [] |
|
564 |
result.extend(self._adds) |
|
565 |
result.extend(self._deletes) |
|
566 |
result.extend(self._changes) |
|
567 |
result.sort(key=_tree_change_key) |
|
568 |
return result |
|
569 |
|
|
570 |
def _prune_unchanged(self): |
|
571 |
if self._want_unchanged: |
|
572 |
return |
|
573 |
self._deletes = [d for d in self._deletes if d.type != CHANGE_UNCHANGED] |
|
574 |
|
|
575 |
def changes_with_renames(self, tree1_id, tree2_id, want_unchanged=False): |
|
576 |
"""Iterate TreeChanges between two tree SHAs, with rename detection.""" |
|
577 |
self._reset() |
|
578 |
self._want_unchanged = want_unchanged |
|
579 |
self._collect_changes(tree1_id, tree2_id) |
|
580 |
self._find_exact_renames() |
|
581 |
self._find_content_rename_candidates() |
|
582 |
self._choose_content_renames() |
|
583 |
self._join_modifies() |
|
584 |
self._prune_unchanged() |
|
585 |
return self._sorted_changes() |
|
586 |
|
|
587 |
|
|
588 |
# Hold on to the pure-python implementations for testing. |
|
589 |
_is_tree_py = _is_tree |
|
590 |
_merge_entries_py = _merge_entries |
|
591 |
_count_blocks_py = _count_blocks |
|
592 |
try: |
|
593 |
# Try to import C versions |
|
594 |
from dulwich._diff_tree import _is_tree, _merge_entries, _count_blocks |
|
595 |
except ImportError: |
|
596 |
pass |
org.gvsig.scripting/trunk/org.gvsig.scripting/org.gvsig.scripting.app/org.gvsig.scripting.app.mainplugin/src/main/resources-plugin/scripting/lib/dulwich/objects.py | ||
---|---|---|
1 |
# objects.py -- Access to base git objects |
|
2 |
# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net> |
|
3 |
# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org> |
|
4 |
# |
|
5 |
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU |
|
6 |
# General Public License as public by the Free Software Foundation; version 2.0 |
|
7 |
# or (at your option) any later version. You can redistribute it and/or |
|
8 |
# modify it under the terms of either of these two licenses. |
|
9 |
# |
|
10 |
# Unless required by applicable law or agreed to in writing, software |
|
11 |
# distributed under the License is distributed on an "AS IS" BASIS, |
|
12 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
13 |
# See the License for the specific language governing permissions and |
|
14 |
# limitations under the License. |
|
15 |
# |
|
16 |
# You should have received a copy of the licenses; if not, see |
|
17 |
# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License |
|
18 |
# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache |
|
19 |
# License, Version 2.0. |
|
20 |
# |
|
21 |
|
|
22 |
"""Access to base git objects.""" |
|
23 |
|
|
24 |
import binascii |
|
25 |
from io import BytesIO |
|
26 |
from collections import namedtuple |
|
27 |
import os |
|
28 |
import posixpath |
|
29 |
import stat |
|
30 |
import warnings |
|
31 |
import zlib |
|
32 |
from hashlib import sha1 |
|
33 |
|
|
34 |
from dulwich.errors import ( |
|
35 |
ChecksumMismatch, |
|
36 |
NotBlobError, |
|
37 |
NotCommitError, |
|
38 |
NotTagError, |
|
39 |
NotTreeError, |
|
40 |
ObjectFormatException, |
|
41 |
) |
|
42 |
from dulwich.file import GitFile |
|
43 |
|
|
44 |
|
|
45 |
ZERO_SHA = b'0' * 40 |
|
46 |
|
|
47 |
# Header fields for commits |
|
48 |
_TREE_HEADER = b'tree' |
|
49 |
_PARENT_HEADER = b'parent' |
|
50 |
_AUTHOR_HEADER = b'author' |
|
51 |
_COMMITTER_HEADER = b'committer' |
|
52 |
_ENCODING_HEADER = b'encoding' |
|
53 |
_MERGETAG_HEADER = b'mergetag' |
|
54 |
_GPGSIG_HEADER = b'gpgsig' |
|
55 |
|
|
56 |
# Header fields for objects |
|
57 |
_OBJECT_HEADER = b'object' |
|
58 |
_TYPE_HEADER = b'type' |
|
59 |
_TAG_HEADER = b'tag' |
|
60 |
_TAGGER_HEADER = b'tagger' |
|
61 |
|
|
62 |
|
|
63 |
S_IFGITLINK = 0o160000 |
|
64 |
|
|
65 |
|
|
66 |
def S_ISGITLINK(m): |
|
67 |
"""Check if a mode indicates a submodule. |
|
68 |
|
|
69 |
:param m: Mode to check |
|
70 |
:return: a ``boolean`` |
|
71 |
""" |
|
72 |
return (stat.S_IFMT(m) == S_IFGITLINK) |
|
73 |
|
|
74 |
|
|
75 |
def _decompress(string): |
|
76 |
dcomp = zlib.decompressobj() |
|
77 |
dcomped = dcomp.decompress(string) |
|
78 |
dcomped += dcomp.flush() |
|
79 |
return dcomped |
|
80 |
|
|
81 |
|
|
82 |
def sha_to_hex(sha): |
|
83 |
"""Takes a string and returns the hex of the sha within""" |
|
84 |
hexsha = binascii.hexlify(sha) |
|
85 |
assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha |
|
86 |
return hexsha |
|
87 |
|
|
88 |
|
|
89 |
def hex_to_sha(hex): |
|
90 |
"""Takes a hex sha and returns a binary sha""" |
|
91 |
assert len(hex) == 40, "Incorrect length of hexsha: %s" % hex |
|
92 |
try: |
|
93 |
return binascii.unhexlify(hex) |
|
94 |
except TypeError as exc: |
|
95 |
if not isinstance(hex, bytes): |
|
96 |
raise |
|
97 |
raise ValueError(exc.args[0]) |
|
98 |
|
|
99 |
|
|
100 |
def valid_hexsha(hex): |
|
101 |
if len(hex) != 40: |
|
102 |
return False |
|
103 |
try: |
|
104 |
binascii.unhexlify(hex) |
|
105 |
except (TypeError, binascii.Error): |
|
106 |
return False |
|
107 |
else: |
|
108 |
return True |
|
109 |
|
|
110 |
|
|
111 |
def hex_to_filename(path, hex): |
|
112 |
"""Takes a hex sha and returns its filename relative to the given path.""" |
|
113 |
# os.path.join accepts bytes or unicode, but all args must be of the same |
|
114 |
# type. Make sure that hex which is expected to be bytes, is the same type |
|
115 |
# as path. |
|
116 |
if getattr(path, 'encode', None) is not None: |
|
117 |
hex = hex.decode('ascii') |
|
118 |
dir = hex[:2] |
|
119 |
file = hex[2:] |
|
120 |
# Check from object dir |
|
121 |
return os.path.join(path, dir, file) |
|
122 |
|
|
123 |
|
|
124 |
def filename_to_hex(filename): |
|
125 |
"""Takes an object filename and returns its corresponding hex sha.""" |
|
126 |
# grab the last (up to) two path components |
|
127 |
names = filename.rsplit(os.path.sep, 2)[-2:] |
|
128 |
errmsg = "Invalid object filename: %s" % filename |
|
129 |
assert len(names) == 2, errmsg |
|
130 |
base, rest = names |
|
131 |
assert len(base) == 2 and len(rest) == 38, errmsg |
|
132 |
hex = (base + rest).encode('ascii') |
|
133 |
hex_to_sha(hex) |
|
134 |
return hex |
|
135 |
|
|
136 |
|
|
137 |
def object_header(num_type, length): |
|
138 |
"""Return an object header for the given numeric type and text length.""" |
|
139 |
return object_class(num_type).type_name + b' ' + str(length).encode('ascii') + b'\0' |
|
140 |
|
|
141 |
|
|
142 |
def serializable_property(name, docstring=None): |
|
143 |
"""A property that helps tracking whether serialization is necessary. |
|
144 |
""" |
|
145 |
def set(obj, value): |
|
146 |
setattr(obj, "_"+name, value) |
|
147 |
obj._needs_serialization = True |
|
148 |
def get(obj): |
|
149 |
return getattr(obj, "_"+name) |
|
150 |
return property(get, set, doc=docstring) |
|
151 |
|
|
152 |
|
|
153 |
def object_class(type): |
|
154 |
"""Get the object class corresponding to the given type. |
|
155 |
|
|
156 |
:param type: Either a type name string or a numeric type. |
|
157 |
:return: The ShaFile subclass corresponding to the given type, or None if |
|
158 |
type is not a valid type name/number. |
|
159 |
""" |
|
160 |
return _TYPE_MAP.get(type, None) |
|
161 |
|
|
162 |
|
|
163 |
def check_hexsha(hex, error_msg): |
|
164 |
"""Check if a string is a valid hex sha string. |
|
165 |
|
|
166 |
:param hex: Hex string to check |
|
167 |
:param error_msg: Error message to use in exception |
|
168 |
:raise ObjectFormatException: Raised when the string is not valid |
|
169 |
""" |
|
170 |
if not valid_hexsha(hex): |
|
171 |
raise ObjectFormatException("%s %s" % (error_msg, hex)) |
|
172 |
|
|
173 |
|
|
174 |
def check_identity(identity, error_msg): |
|
175 |
"""Check if the specified identity is valid. |
|
176 |
|
|
177 |
This will raise an exception if the identity is not valid. |
|
178 |
|
|
179 |
:param identity: Identity string |
|
180 |
:param error_msg: Error message to use in exception |
|
181 |
""" |
|
182 |
email_start = identity.find(b'<') |
|
183 |
email_end = identity.find(b'>') |
|
184 |
if (email_start < 0 or email_end < 0 or email_end <= email_start |
|
185 |
or identity.find(b'<', email_start + 1) >= 0 |
|
186 |
or identity.find(b'>', email_end + 1) >= 0 |
|
187 |
or not identity.endswith(b'>')): |
|
188 |
raise ObjectFormatException(error_msg) |
|
189 |
|
|
190 |
|
|
191 |
def git_line(*items): |
|
192 |
"""Formats items into a space sepreated line.""" |
|
193 |
return b' '.join(items) + b'\n' |
|
194 |
|
|
195 |
|
|
196 |
class FixedSha(object): |
|
197 |
"""SHA object that behaves like hashlib's but is given a fixed value.""" |
|
198 |
|
|
199 |
__slots__ = ('_hexsha', '_sha') |
|
200 |
|
|
201 |
def __init__(self, hexsha): |
|
202 |
if getattr(hexsha, 'encode', None) is not None: |
|
203 |
hexsha = hexsha.encode('ascii') |
|
204 |
if not isinstance(hexsha, bytes): |
|
205 |
raise TypeError('Expected bytes for hexsha, got %r' % hexsha) |
|
206 |
self._hexsha = hexsha |
|
207 |
self._sha = hex_to_sha(hexsha) |
|
208 |
|
|
209 |
def digest(self): |
|
210 |
"""Return the raw SHA digest.""" |
|
211 |
return self._sha |
|
212 |
|
|
213 |
def hexdigest(self): |
|
214 |
"""Return the hex SHA digest.""" |
|
215 |
return self._hexsha.decode('ascii') |
|
216 |
|
|
217 |
|
|
218 |
class ShaFile(object): |
|
219 |
"""A git SHA file.""" |
|
220 |
|
|
221 |
__slots__ = ('_chunked_text', '_sha', '_needs_serialization') |
|
222 |
|
|
223 |
@staticmethod |
|
224 |
def _parse_legacy_object_header(magic, f): |
|
225 |
"""Parse a legacy object, creating it but not reading the file.""" |
|
226 |
bufsize = 1024 |
|
227 |
decomp = zlib.decompressobj() |
|
228 |
header = decomp.decompress(magic) |
|
229 |
start = 0 |
|
230 |
end = -1 |
|
231 |
while end < 0: |
|
232 |
extra = f.read(bufsize) |
|
233 |
header += decomp.decompress(extra) |
|
234 |
magic += extra |
|
235 |
end = header.find(b'\0', start) |
|
236 |
start = len(header) |
|
237 |
header = header[:end] |
|
238 |
type_name, size = header.split(b' ', 1) |
|
239 |
size = int(size) # sanity check |
|
240 |
obj_class = object_class(type_name) |
|
241 |
if not obj_class: |
|
242 |
raise ObjectFormatException("Not a known type: %s" % type_name) |
|
243 |
return obj_class() |
|
244 |
|
|
245 |
def _parse_legacy_object(self, map): |
|
246 |
"""Parse a legacy object, setting the raw string.""" |
|
247 |
text = _decompress(map) |
|
248 |
header_end = text.find(b'\0') |
|
249 |
if header_end < 0: |
|
250 |
raise ObjectFormatException("Invalid object header, no \\0") |
|
251 |
self.set_raw_string(text[header_end+1:]) |
|
252 |
|
|
253 |
def as_legacy_object_chunks(self): |
|
254 |
"""Return chunks representing the object in the experimental format. |
|
255 |
|
|
256 |
:return: List of strings |
|
257 |
""" |
|
258 |
compobj = zlib.compressobj() |
|
259 |
yield compobj.compress(self._header()) |
|
260 |
for chunk in self.as_raw_chunks(): |
|
261 |
yield compobj.compress(chunk) |
|
262 |
yield compobj.flush() |
|
263 |
|
|
264 |
def as_legacy_object(self): |
|
265 |
"""Return string representing the object in the experimental format. |
|
266 |
""" |
|
267 |
return b''.join(self.as_legacy_object_chunks()) |
|
268 |
|
|
269 |
def as_raw_chunks(self): |
|
270 |
"""Return chunks with serialization of the object. |
|
271 |
|
|
272 |
:return: List of strings, not necessarily one per line |
|
273 |
""" |
|
274 |
if self._needs_serialization: |
|
275 |
self._sha = None |
|
276 |
self._chunked_text = self._serialize() |
|
277 |
self._needs_serialization = False |
|
278 |
return self._chunked_text |
|
279 |
|
|
280 |
def as_raw_string(self): |
|
281 |
"""Return raw string with serialization of the object. |
|
282 |
|
|
283 |
:return: String object |
|
284 |
""" |
|
285 |
return b''.join(self.as_raw_chunks()) |
|
286 |
|
|
287 |
def __str__(self): |
|
288 |
"""Return raw string serialization of this object.""" |
|
289 |
return self.as_raw_string() |
|
290 |
|
|
291 |
def __hash__(self): |
|
292 |
"""Return unique hash for this object.""" |
|
293 |
return hash(self.id) |
|
294 |
|
|
295 |
def as_pretty_string(self): |
|
296 |
"""Return a string representing this object, fit for display.""" |
|
297 |
return self.as_raw_string() |
|
298 |
|
|
299 |
def set_raw_string(self, text, sha=None): |
|
300 |
"""Set the contents of this object from a serialized string.""" |
|
301 |
if not isinstance(text, bytes): |
|
302 |
raise TypeError('Expected bytes for text, got %r' % text) |
|
303 |
self.set_raw_chunks([text], sha) |
|
304 |
|
|
305 |
def set_raw_chunks(self, chunks, sha=None): |
|
306 |
"""Set the contents of this object from a list of chunks.""" |
|
307 |
self._chunked_text = chunks |
|
308 |
self._deserialize(chunks) |
|
309 |
if sha is None: |
|
310 |
self._sha = None |
|
311 |
else: |
|
312 |
self._sha = FixedSha(sha) |
|
313 |
self._needs_serialization = False |
|
314 |
|
|
315 |
@staticmethod |
|
316 |
def _parse_object_header(magic, f): |
|
317 |
"""Parse a new style object, creating it but not reading the file.""" |
|
318 |
num_type = (ord(magic[0:1]) >> 4) & 7 |
|
319 |
obj_class = object_class(num_type) |
|
320 |
if not obj_class: |
|
321 |
raise ObjectFormatException("Not a known type %d" % num_type) |
|
322 |
return obj_class() |
|
323 |
|
|
324 |
def _parse_object(self, map): |
|
325 |
"""Parse a new style object, setting self._text.""" |
|
326 |
# skip type and size; type must have already been determined, and |
|
327 |
# we trust zlib to fail if it's otherwise corrupted |
|
328 |
byte = ord(map[0:1]) |
|
329 |
used = 1 |
|
330 |
while (byte & 0x80) != 0: |
|
331 |
byte = ord(map[used:used+1]) |
|
332 |
used += 1 |
|
333 |
raw = map[used:] |
|
334 |
self.set_raw_string(_decompress(raw)) |
|
335 |
|
|
336 |
@classmethod |
|
337 |
def _is_legacy_object(cls, magic): |
|
338 |
b0 = ord(magic[0:1]) |
|
339 |
b1 = ord(magic[1:2]) |
|
340 |
word = (b0 << 8) + b1 |
|
341 |
return (b0 & 0x8F) == 0x08 and (word % 31) == 0 |
|
342 |
|
|
343 |
@classmethod |
|
344 |
def _parse_file(cls, f): |
|
345 |
map = f.read() |
|
346 |
if cls._is_legacy_object(map): |
|
347 |
obj = cls._parse_legacy_object_header(map, f) |
|
348 |
obj._parse_legacy_object(map) |
|
349 |
else: |
|
350 |
obj = cls._parse_object_header(map, f) |
|
351 |
obj._parse_object(map) |
|
352 |
return obj |
|
353 |
|
|
354 |
def __init__(self): |
|
355 |
"""Don't call this directly""" |
|
356 |
self._sha = None |
|
357 |
self._chunked_text = [] |
|
358 |
self._needs_serialization = True |
|
359 |
|
|
360 |
def _deserialize(self, chunks): |
|
361 |
raise NotImplementedError(self._deserialize) |
|
362 |
|
|
363 |
def _serialize(self): |
|
364 |
raise NotImplementedError(self._serialize) |
|
365 |
|
|
366 |
@classmethod |
|
367 |
def from_path(cls, path): |
|
368 |
"""Open a SHA file from disk.""" |
|
369 |
with GitFile(path, 'rb') as f: |
|
370 |
return cls.from_file(f) |
|
371 |
|
|
372 |
@classmethod |
|
373 |
def from_file(cls, f): |
|
374 |
"""Get the contents of a SHA file on disk.""" |
|
375 |
try: |
|
376 |
obj = cls._parse_file(f) |
|
377 |
obj._sha = None |
|
378 |
return obj |
|
379 |
except (IndexError, ValueError): |
|
380 |
raise ObjectFormatException("invalid object header") |
|
381 |
|
|
382 |
@staticmethod |
|
383 |
def from_raw_string(type_num, string, sha=None): |
|
384 |
"""Creates an object of the indicated type from the raw string given. |
|
385 |
|
|
386 |
:param type_num: The numeric type of the object. |
|
387 |
:param string: The raw uncompressed contents. |
|
388 |
:param sha: Optional known sha for the object |
|
389 |
""" |
|
390 |
obj = object_class(type_num)() |
|
391 |
obj.set_raw_string(string, sha) |
|
392 |
return obj |
|
393 |
|
|
394 |
@staticmethod |
|
395 |
def from_raw_chunks(type_num, chunks, sha=None): |
|
396 |
"""Creates an object of the indicated type from the raw chunks given. |
|
397 |
|
|
398 |
:param type_num: The numeric type of the object. |
|
399 |
:param chunks: An iterable of the raw uncompressed contents. |
|
400 |
:param sha: Optional known sha for the object |
|
401 |
""" |
|
402 |
obj = object_class(type_num)() |
|
403 |
obj.set_raw_chunks(chunks, sha) |
|
404 |
return obj |
|
405 |
|
|
406 |
@classmethod |
|
407 |
def from_string(cls, string): |
|
408 |
"""Create a ShaFile from a string.""" |
|
409 |
obj = cls() |
|
410 |
obj.set_raw_string(string) |
|
411 |
return obj |
|
412 |
|
|
413 |
def _check_has_member(self, member, error_msg): |
|
414 |
"""Check that the object has a given member variable. |
|
415 |
|
|
416 |
:param member: the member variable to check for |
|
417 |
:param error_msg: the message for an error if the member is missing |
|
418 |
:raise ObjectFormatException: with the given error_msg if member is |
|
419 |
missing or is None |
|
420 |
""" |
|
421 |
if getattr(self, member, None) is None: |
|
422 |
raise ObjectFormatException(error_msg) |
|
423 |
|
|
424 |
def check(self): |
|
425 |
"""Check this object for internal consistency. |
|
426 |
|
|
427 |
:raise ObjectFormatException: if the object is malformed in some way |
|
428 |
:raise ChecksumMismatch: if the object was created with a SHA that does |
|
429 |
not match its contents |
|
430 |
""" |
|
431 |
# TODO: if we find that error-checking during object parsing is a |
|
432 |
# performance bottleneck, those checks should be moved to the class's |
|
433 |
# check() method during optimization so we can still check the object |
|
434 |
# when necessary. |
|
435 |
old_sha = self.id |
|
436 |
try: |
|
437 |
self._deserialize(self.as_raw_chunks()) |
|
438 |
self._sha = None |
|
439 |
new_sha = self.id |
|
440 |
except Exception as e: |
|
441 |
raise ObjectFormatException(e) |
|
442 |
if old_sha != new_sha: |
|
443 |
raise ChecksumMismatch(new_sha, old_sha) |
|
444 |
|
|
445 |
def _header(self): |
|
446 |
return object_header(self.type, self.raw_length()) |
|
447 |
|
|
448 |
def raw_length(self): |
|
449 |
"""Returns the length of the raw string of this object.""" |
|
450 |
ret = 0 |
|
451 |
for chunk in self.as_raw_chunks(): |
|
452 |
ret += len(chunk) |
|
453 |
return ret |
|
454 |
|
|
455 |
def sha(self): |
|
456 |
"""The SHA1 object that is the name of this object.""" |
|
457 |
if self._sha is None or self._needs_serialization: |
|
458 |
# this is a local because as_raw_chunks() overwrites self._sha |
|
459 |
new_sha = sha1() |
|
460 |
new_sha.update(self._header()) |
|
461 |
for chunk in self.as_raw_chunks(): |
|
462 |
new_sha.update(chunk) |
|
463 |
self._sha = new_sha |
|
464 |
return self._sha |
|
465 |
|
|
466 |
def copy(self): |
|
467 |
"""Create a new copy of this SHA1 object from its raw string""" |
|
468 |
obj_class = object_class(self.get_type()) |
|
469 |
return obj_class.from_raw_string( |
|
470 |
self.get_type(), |
|
471 |
self.as_raw_string(), |
|
472 |
self.id) |
|
473 |
|
|
474 |
@property |
|
475 |
def id(self): |
|
476 |
"""The hex SHA of this object.""" |
|
477 |
return self.sha().hexdigest().encode('ascii') |
|
478 |
|
|
479 |
def get_type(self): |
|
480 |
"""Return the type number for this object class.""" |
|
481 |
return self.type_num |
|
482 |
|
|
483 |
def set_type(self, type): |
|
484 |
"""Set the type number for this object class.""" |
|
485 |
self.type_num = type |
|
486 |
|
|
487 |
# DEPRECATED: use type_num or type_name as needed. |
|
488 |
type = property(get_type, set_type) |
|
489 |
|
|
490 |
def __repr__(self): |
|
491 |
return "<%s %s>" % (self.__class__.__name__, self.id) |
|
492 |
|
|
493 |
def __ne__(self, other): |
|
494 |
return not isinstance(other, ShaFile) or self.id != other.id |
|
495 |
|
|
496 |
def __eq__(self, other): |
|
497 |
"""Return True if the SHAs of the two objects match. |
|
498 |
|
|
499 |
It doesn't make sense to talk about an order on ShaFiles, so we don't |
|
500 |
override the rich comparison methods (__le__, etc.). |
|
501 |
""" |
|
502 |
return isinstance(other, ShaFile) and self.id == other.id |
|
503 |
|
|
504 |
def __lt__(self, other): |
|
505 |
if not isinstance(other, ShaFile): |
|
506 |
raise TypeError |
|
507 |
return self.id < other.id |
|
508 |
|
|
509 |
def __le__(self, other): |
|
510 |
if not isinstance(other, ShaFile): |
Also available in: Unified diff