gvsig-scripting / org.gvsig.scripting / trunk / org.gvsig.scripting / org.gvsig.scripting.app / org.gvsig.scripting.app.mainplugin / src / main / resources-plugin / scripting / lib / dulwich / diff_tree.py @ 959
History | View | Annotate | Download (21.8 KB)
1 |
# diff_tree.py -- Utilities for diffing files and trees.
|
---|---|
2 |
# Copyright (C) 2010 Google, Inc.
|
3 |
#
|
4 |
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
|
5 |
# General Public License as public by the Free Software Foundation; version 2.0
|
6 |
# or (at your option) any later version. You can redistribute it and/or
|
7 |
# modify it under the terms of either of these two licenses.
|
8 |
#
|
9 |
# Unless required by applicable law or agreed to in writing, software
|
10 |
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
# See the License for the specific language governing permissions and
|
13 |
# limitations under the License.
|
14 |
#
|
15 |
# You should have received a copy of the licenses; if not, see
|
16 |
# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
|
17 |
# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
|
18 |
# License, Version 2.0.
|
19 |
#
|
20 |
|
21 |
"""Utilities for diffing files and trees."""
|
22 |
import sys |
23 |
from collections import ( |
24 |
defaultdict, |
25 |
namedtuple, |
26 |
) |
27 |
|
28 |
from io import BytesIO |
29 |
from itertools import chain |
30 |
import stat |
31 |
|
32 |
from dulwich.objects import ( |
33 |
S_ISGITLINK, |
34 |
TreeEntry, |
35 |
) |
36 |
|
37 |
|
38 |
# TreeChange type constants.
|
39 |
CHANGE_ADD = 'add'
|
40 |
CHANGE_MODIFY = 'modify'
|
41 |
CHANGE_DELETE = 'delete'
|
42 |
CHANGE_RENAME = 'rename'
|
43 |
CHANGE_COPY = 'copy'
|
44 |
CHANGE_UNCHANGED = 'unchanged'
|
45 |
|
46 |
RENAME_CHANGE_TYPES = (CHANGE_RENAME, CHANGE_COPY) |
47 |
|
48 |
_NULL_ENTRY = TreeEntry(None, None, None) |
49 |
|
50 |
_MAX_SCORE = 100
|
51 |
RENAME_THRESHOLD = 60
|
52 |
MAX_FILES = 200
|
53 |
REWRITE_THRESHOLD = None
|
54 |
|
55 |
|
56 |
class TreeChange(namedtuple('TreeChange', ['type', 'old', 'new'])): |
57 |
"""Named tuple a single change between two trees."""
|
58 |
|
59 |
@classmethod
|
60 |
def add(cls, new): |
61 |
return cls(CHANGE_ADD, _NULL_ENTRY, new)
|
62 |
|
63 |
@classmethod
|
64 |
def delete(cls, old): |
65 |
return cls(CHANGE_DELETE, old, _NULL_ENTRY)
|
66 |
|
67 |
|
68 |
def _tree_entries(path, tree): |
69 |
result = [] |
70 |
if not tree: |
71 |
return result
|
72 |
for entry in tree.iteritems(name_order=True): |
73 |
result.append(entry.in_path(path)) |
74 |
return result
|
75 |
|
76 |
|
77 |
def _merge_entries(path, tree1, tree2): |
78 |
"""Merge the entries of two trees.
|
79 |
|
80 |
:param path: A path to prepend to all tree entry names.
|
81 |
:param tree1: The first Tree object to iterate, or None.
|
82 |
:param tree2: The second Tree object to iterate, or None.
|
83 |
:return: A list of pairs of TreeEntry objects for each pair of entries in
|
84 |
the trees. If an entry exists in one tree but not the other, the other
|
85 |
entry will have all attributes set to None. If neither entry's path is
|
86 |
None, they are guaranteed to match.
|
87 |
"""
|
88 |
entries1 = _tree_entries(path, tree1) |
89 |
entries2 = _tree_entries(path, tree2) |
90 |
i1 = i2 = 0
|
91 |
len1 = len(entries1)
|
92 |
len2 = len(entries2)
|
93 |
|
94 |
result = [] |
95 |
while i1 < len1 and i2 < len2: |
96 |
entry1 = entries1[i1] |
97 |
entry2 = entries2[i2] |
98 |
if entry1.path < entry2.path:
|
99 |
result.append((entry1, _NULL_ENTRY)) |
100 |
i1 += 1
|
101 |
elif entry1.path > entry2.path:
|
102 |
result.append((_NULL_ENTRY, entry2)) |
103 |
i2 += 1
|
104 |
else:
|
105 |
result.append((entry1, entry2)) |
106 |
i1 += 1
|
107 |
i2 += 1
|
108 |
for i in range(i1, len1): |
109 |
result.append((entries1[i], _NULL_ENTRY)) |
110 |
for i in range(i2, len2): |
111 |
result.append((_NULL_ENTRY, entries2[i])) |
112 |
return result
|
113 |
|
114 |
|
115 |
def _is_tree(entry): |
116 |
mode = entry.mode |
117 |
if mode is None: |
118 |
return False |
119 |
return stat.S_ISDIR(mode)
|
120 |
|
121 |
|
122 |
def walk_trees(store, tree1_id, tree2_id, prune_identical=False): |
123 |
"""Recursively walk all the entries of two trees.
|
124 |
|
125 |
Iteration is depth-first pre-order, as in e.g. os.walk.
|
126 |
|
127 |
:param store: An ObjectStore for looking up objects.
|
128 |
:param tree1_id: The SHA of the first Tree object to iterate, or None.
|
129 |
:param tree2_id: The SHA of the second Tree object to iterate, or None.
|
130 |
:param prune_identical: If True, identical subtrees will not be walked.
|
131 |
:return: Iterator over Pairs of TreeEntry objects for each pair of entries
|
132 |
in the trees and their subtrees recursively. If an entry exists in one
|
133 |
tree but not the other, the other entry will have all attributes set
|
134 |
to None. If neither entry's path is None, they are guaranteed to
|
135 |
match.
|
136 |
"""
|
137 |
# This could be fairly easily generalized to >2 trees if we find a use
|
138 |
# case.
|
139 |
mode1 = tree1_id and stat.S_IFDIR or None |
140 |
mode2 = tree2_id and stat.S_IFDIR or None |
141 |
todo = [(TreeEntry(b'', mode1, tree1_id), TreeEntry(b'', mode2, tree2_id))] |
142 |
while todo:
|
143 |
entry1, entry2 = todo.pop() |
144 |
is_tree1 = _is_tree(entry1) |
145 |
is_tree2 = _is_tree(entry2) |
146 |
if prune_identical and is_tree1 and is_tree2 and entry1 == entry2: |
147 |
continue
|
148 |
|
149 |
tree1 = is_tree1 and store[entry1.sha] or None |
150 |
tree2 = is_tree2 and store[entry2.sha] or None |
151 |
path = entry1.path or entry2.path
|
152 |
todo.extend(reversed(_merge_entries(path, tree1, tree2)))
|
153 |
yield entry1, entry2
|
154 |
|
155 |
|
156 |
def _skip_tree(entry): |
157 |
if entry.mode is None or stat.S_ISDIR(entry.mode): |
158 |
return _NULL_ENTRY
|
159 |
return entry
|
160 |
|
161 |
|
162 |
def tree_changes(store, tree1_id, tree2_id, want_unchanged=False, |
163 |
rename_detector=None):
|
164 |
"""Find the differences between the contents of two trees.
|
165 |
|
166 |
:param store: An ObjectStore for looking up objects.
|
167 |
:param tree1_id: The SHA of the source tree.
|
168 |
:param tree2_id: The SHA of the target tree.
|
169 |
:param want_unchanged: If True, include TreeChanges for unmodified entries
|
170 |
as well.
|
171 |
:param rename_detector: RenameDetector object for detecting renames.
|
172 |
:return: Iterator over TreeChange instances for each change between the
|
173 |
source and target tree.
|
174 |
"""
|
175 |
if (rename_detector is not None and tree1_id is not None and |
176 |
tree2_id is not None): |
177 |
for change in rename_detector.changes_with_renames( |
178 |
tree1_id, tree2_id, want_unchanged=want_unchanged): |
179 |
yield change
|
180 |
return
|
181 |
|
182 |
entries = walk_trees(store, tree1_id, tree2_id, |
183 |
prune_identical=(not want_unchanged))
|
184 |
for entry1, entry2 in entries: |
185 |
if entry1 == entry2 and not want_unchanged: |
186 |
continue
|
187 |
|
188 |
# Treat entries for trees as missing.
|
189 |
entry1 = _skip_tree(entry1) |
190 |
entry2 = _skip_tree(entry2) |
191 |
|
192 |
if entry1 != _NULL_ENTRY and entry2 != _NULL_ENTRY: |
193 |
if stat.S_IFMT(entry1.mode) != stat.S_IFMT(entry2.mode):
|
194 |
# File type changed: report as delete/add.
|
195 |
yield TreeChange.delete(entry1)
|
196 |
entry1 = _NULL_ENTRY |
197 |
change_type = CHANGE_ADD |
198 |
elif entry1 == entry2:
|
199 |
change_type = CHANGE_UNCHANGED |
200 |
else:
|
201 |
change_type = CHANGE_MODIFY |
202 |
elif entry1 != _NULL_ENTRY:
|
203 |
change_type = CHANGE_DELETE |
204 |
elif entry2 != _NULL_ENTRY:
|
205 |
change_type = CHANGE_ADD |
206 |
else:
|
207 |
# Both were None because at least one was a tree.
|
208 |
continue
|
209 |
yield TreeChange(change_type, entry1, entry2)
|
210 |
|
211 |
|
212 |
def _all_eq(seq, key, value): |
213 |
for e in seq: |
214 |
if key(e) != value:
|
215 |
return False |
216 |
return True |
217 |
|
218 |
|
219 |
def _all_same(seq, key): |
220 |
return _all_eq(seq[1:], key, key(seq[0])) |
221 |
|
222 |
|
223 |
def tree_changes_for_merge(store, parent_tree_ids, tree_id, |
224 |
rename_detector=None):
|
225 |
"""Get the tree changes for a merge tree relative to all its parents.
|
226 |
|
227 |
:param store: An ObjectStore for looking up objects.
|
228 |
:param parent_tree_ids: An iterable of the SHAs of the parent trees.
|
229 |
:param tree_id: The SHA of the merge tree.
|
230 |
:param rename_detector: RenameDetector object for detecting renames.
|
231 |
|
232 |
:return: Iterator over lists of TreeChange objects, one per conflicted path
|
233 |
in the merge.
|
234 |
|
235 |
Each list contains one element per parent, with the TreeChange for that
|
236 |
path relative to that parent. An element may be None if it never
|
237 |
existed in one parent and was deleted in two others.
|
238 |
|
239 |
A path is only included in the output if it is a conflict, i.e. its SHA
|
240 |
in the merge tree is not found in any of the parents, or in the case of
|
241 |
deletes, if not all of the old SHAs match.
|
242 |
"""
|
243 |
all_parent_changes = [tree_changes(store, t, tree_id, |
244 |
rename_detector=rename_detector) |
245 |
for t in parent_tree_ids] |
246 |
num_parents = len(parent_tree_ids)
|
247 |
changes_by_path = defaultdict(lambda: [None] * num_parents) |
248 |
|
249 |
# Organize by path.
|
250 |
for i, parent_changes in enumerate(all_parent_changes): |
251 |
for change in parent_changes: |
252 |
if change.type == CHANGE_DELETE:
|
253 |
path = change.old.path |
254 |
else:
|
255 |
path = change.new.path |
256 |
changes_by_path[path][i] = change |
257 |
|
258 |
old_sha = lambda c: c.old.sha
|
259 |
change_type = lambda c: c.type
|
260 |
|
261 |
# Yield only conflicting changes.
|
262 |
for _, changes in sorted(changes_by_path.items()): |
263 |
assert len(changes) == num_parents |
264 |
have = [c for c in changes if c is not None] |
265 |
if _all_eq(have, change_type, CHANGE_DELETE):
|
266 |
if not _all_same(have, old_sha): |
267 |
yield changes
|
268 |
elif not _all_same(have, change_type): |
269 |
yield changes
|
270 |
elif None not in changes: |
271 |
# If no change was found relative to one parent, that means the SHA
|
272 |
# must have matched the SHA in that parent, so it is not a
|
273 |
# conflict.
|
274 |
yield changes
|
275 |
|
276 |
|
277 |
_BLOCK_SIZE = 64
|
278 |
|
279 |
|
280 |
def _count_blocks(obj): |
281 |
"""Count the blocks in an object.
|
282 |
|
283 |
Splits the data into blocks either on lines or <=64-byte chunks of lines.
|
284 |
|
285 |
:param obj: The object to count blocks for.
|
286 |
:return: A dict of block hashcode -> total bytes occurring.
|
287 |
"""
|
288 |
block_counts = defaultdict(int)
|
289 |
block = BytesIO() |
290 |
n = 0
|
291 |
|
292 |
# Cache attrs as locals to avoid expensive lookups in the inner loop.
|
293 |
block_write = block.write |
294 |
block_seek = block.seek |
295 |
block_truncate = block.truncate |
296 |
block_getvalue = block.getvalue |
297 |
|
298 |
for c in chain(*obj.as_raw_chunks()): |
299 |
if sys.version_info[0] == 3: |
300 |
c = c.to_bytes(1, 'big') |
301 |
block_write(c) |
302 |
n += 1
|
303 |
if c == b'\n' or n == _BLOCK_SIZE: |
304 |
value = block_getvalue() |
305 |
block_counts[hash(value)] += len(value) |
306 |
block_seek(0)
|
307 |
block_truncate() |
308 |
n = 0
|
309 |
if n > 0: |
310 |
last_block = block_getvalue() |
311 |
block_counts[hash(last_block)] += len(last_block) |
312 |
return block_counts
|
313 |
|
314 |
|
315 |
def _common_bytes(blocks1, blocks2): |
316 |
"""Count the number of common bytes in two block count dicts.
|
317 |
|
318 |
:param block1: The first dict of block hashcode -> total bytes.
|
319 |
:param block2: The second dict of block hashcode -> total bytes.
|
320 |
:return: The number of bytes in common between blocks1 and blocks2. This is
|
321 |
only approximate due to possible hash collisions.
|
322 |
"""
|
323 |
# Iterate over the smaller of the two dicts, since this is symmetrical.
|
324 |
if len(blocks1) > len(blocks2): |
325 |
blocks1, blocks2 = blocks2, blocks1 |
326 |
score = 0
|
327 |
for block, count1 in blocks1.items(): |
328 |
count2 = blocks2.get(block) |
329 |
if count2:
|
330 |
score += min(count1, count2)
|
331 |
return score
|
332 |
|
333 |
|
334 |
def _similarity_score(obj1, obj2, block_cache=None): |
335 |
"""Compute a similarity score for two objects.
|
336 |
|
337 |
:param obj1: The first object to score.
|
338 |
:param obj2: The second object to score.
|
339 |
:param block_cache: An optional dict of SHA to block counts to cache
|
340 |
results between calls.
|
341 |
:return: The similarity score between the two objects, defined as the
|
342 |
number of bytes in common between the two objects divided by the
|
343 |
maximum size, scaled to the range 0-100.
|
344 |
"""
|
345 |
if block_cache is None: |
346 |
block_cache = {} |
347 |
if obj1.id not in block_cache: |
348 |
block_cache[obj1.id] = _count_blocks(obj1) |
349 |
if obj2.id not in block_cache: |
350 |
block_cache[obj2.id] = _count_blocks(obj2) |
351 |
|
352 |
common_bytes = _common_bytes(block_cache[obj1.id], block_cache[obj2.id]) |
353 |
max_size = max(obj1.raw_length(), obj2.raw_length())
|
354 |
if not max_size: |
355 |
return _MAX_SCORE
|
356 |
return int(float(common_bytes) * _MAX_SCORE / max_size) |
357 |
|
358 |
|
359 |
def _tree_change_key(entry): |
360 |
# Sort by old path then new path. If only one exists, use it for both keys.
|
361 |
path1 = entry.old.path |
362 |
path2 = entry.new.path |
363 |
if path1 is None: |
364 |
path1 = path2 |
365 |
if path2 is None: |
366 |
path2 = path1 |
367 |
return (path1, path2)
|
368 |
|
369 |
|
370 |
class RenameDetector(object): |
371 |
"""Object for handling rename detection between two trees."""
|
372 |
|
373 |
def __init__(self, store, rename_threshold=RENAME_THRESHOLD, |
374 |
max_files=MAX_FILES, |
375 |
rewrite_threshold=REWRITE_THRESHOLD, |
376 |
find_copies_harder=False):
|
377 |
"""Initialize the rename detector.
|
378 |
|
379 |
:param store: An ObjectStore for looking up objects.
|
380 |
:param rename_threshold: The threshold similarity score for considering
|
381 |
an add/delete pair to be a rename/copy; see _similarity_score.
|
382 |
:param max_files: The maximum number of adds and deletes to consider,
|
383 |
or None for no limit. The detector is guaranteed to compare no more
|
384 |
than max_files ** 2 add/delete pairs. This limit is provided because
|
385 |
rename detection can be quadratic in the project size. If the limit
|
386 |
is exceeded, no content rename detection is attempted.
|
387 |
:param rewrite_threshold: The threshold similarity score below which a
|
388 |
modify should be considered a delete/add, or None to not break
|
389 |
modifies; see _similarity_score.
|
390 |
:param find_copies_harder: If True, consider unmodified files when
|
391 |
detecting copies.
|
392 |
"""
|
393 |
self._store = store
|
394 |
self._rename_threshold = rename_threshold
|
395 |
self._rewrite_threshold = rewrite_threshold
|
396 |
self._max_files = max_files
|
397 |
self._find_copies_harder = find_copies_harder
|
398 |
self._want_unchanged = False |
399 |
|
400 |
def _reset(self): |
401 |
self._adds = []
|
402 |
self._deletes = []
|
403 |
self._changes = []
|
404 |
|
405 |
def _should_split(self, change): |
406 |
if (self._rewrite_threshold is None or change.type != CHANGE_MODIFY or |
407 |
change.old.sha == change.new.sha): |
408 |
return False |
409 |
old_obj = self._store[change.old.sha]
|
410 |
new_obj = self._store[change.new.sha]
|
411 |
return _similarity_score(old_obj, new_obj) < self._rewrite_threshold |
412 |
|
413 |
def _add_change(self, change): |
414 |
if change.type == CHANGE_ADD:
|
415 |
self._adds.append(change)
|
416 |
elif change.type == CHANGE_DELETE:
|
417 |
self._deletes.append(change)
|
418 |
elif self._should_split(change): |
419 |
self._deletes.append(TreeChange.delete(change.old))
|
420 |
self._adds.append(TreeChange.add(change.new))
|
421 |
elif ((self._find_copies_harder and change.type == CHANGE_UNCHANGED) |
422 |
or change.type == CHANGE_MODIFY):
|
423 |
# Treat all modifies as potential deletes for rename detection,
|
424 |
# but don't split them (to avoid spurious renames). Setting
|
425 |
# find_copies_harder means we treat unchanged the same as
|
426 |
# modified.
|
427 |
self._deletes.append(change)
|
428 |
else:
|
429 |
self._changes.append(change)
|
430 |
|
431 |
def _collect_changes(self, tree1_id, tree2_id): |
432 |
want_unchanged = self._find_copies_harder or self._want_unchanged |
433 |
for change in tree_changes(self._store, tree1_id, tree2_id, |
434 |
want_unchanged=want_unchanged): |
435 |
self._add_change(change)
|
436 |
|
437 |
def _prune(self, add_paths, delete_paths): |
438 |
self._adds = [a for a in self._adds if a.new.path not in add_paths] |
439 |
self._deletes = [d for d in self._deletes |
440 |
if d.old.path not in delete_paths] |
441 |
|
442 |
def _find_exact_renames(self): |
443 |
add_map = defaultdict(list)
|
444 |
for add in self._adds: |
445 |
add_map[add.new.sha].append(add.new) |
446 |
delete_map = defaultdict(list)
|
447 |
for delete in self._deletes: |
448 |
# Keep track of whether the delete was actually marked as a delete.
|
449 |
# If not, it needs to be marked as a copy.
|
450 |
is_delete = delete.type == CHANGE_DELETE |
451 |
delete_map[delete.old.sha].append((delete.old, is_delete)) |
452 |
|
453 |
add_paths = set()
|
454 |
delete_paths = set()
|
455 |
for sha, sha_deletes in delete_map.items(): |
456 |
sha_adds = add_map[sha] |
457 |
for (old, is_delete), new in zip(sha_deletes, sha_adds): |
458 |
if stat.S_IFMT(old.mode) != stat.S_IFMT(new.mode):
|
459 |
continue
|
460 |
if is_delete:
|
461 |
delete_paths.add(old.path) |
462 |
add_paths.add(new.path) |
463 |
new_type = is_delete and CHANGE_RENAME or CHANGE_COPY |
464 |
self._changes.append(TreeChange(new_type, old, new))
|
465 |
|
466 |
num_extra_adds = len(sha_adds) - len(sha_deletes) |
467 |
# TODO(dborowitz): Less arbitrary way of dealing with extra copies.
|
468 |
old = sha_deletes[0][0] |
469 |
if num_extra_adds > 0: |
470 |
for new in sha_adds[-num_extra_adds:]: |
471 |
add_paths.add(new.path) |
472 |
self._changes.append(TreeChange(CHANGE_COPY, old, new))
|
473 |
self._prune(add_paths, delete_paths)
|
474 |
|
475 |
def _should_find_content_renames(self): |
476 |
return len(self._adds) * len(self._deletes) <= self._max_files ** 2 |
477 |
|
478 |
def _rename_type(self, check_paths, delete, add): |
479 |
if check_paths and delete.old.path == add.new.path: |
480 |
# If the paths match, this must be a split modify, so make sure it
|
481 |
# comes out as a modify.
|
482 |
return CHANGE_MODIFY
|
483 |
elif delete.type != CHANGE_DELETE:
|
484 |
# If it's in deletes but not marked as a delete, it must have been
|
485 |
# added due to find_copies_harder, and needs to be marked as a
|
486 |
# copy.
|
487 |
return CHANGE_COPY
|
488 |
return CHANGE_RENAME
|
489 |
|
490 |
def _find_content_rename_candidates(self): |
491 |
candidates = self._candidates = []
|
492 |
# TODO: Optimizations:
|
493 |
# - Compare object sizes before counting blocks.
|
494 |
# - Skip if delete's S_IFMT differs from all adds.
|
495 |
# - Skip if adds or deletes is empty.
|
496 |
# Match C git's behavior of not attempting to find content renames if
|
497 |
# the matrix size exceeds the threshold.
|
498 |
if not self._should_find_content_renames(): |
499 |
return
|
500 |
|
501 |
block_cache = {} |
502 |
check_paths = self._rename_threshold is not None |
503 |
for delete in self._deletes: |
504 |
if S_ISGITLINK(delete.old.mode):
|
505 |
continue # Git links don't exist in this repo. |
506 |
old_sha = delete.old.sha |
507 |
old_obj = self._store[old_sha]
|
508 |
block_cache[old_sha] = _count_blocks(old_obj) |
509 |
for add in self._adds: |
510 |
if stat.S_IFMT(delete.old.mode) != stat.S_IFMT(add.new.mode):
|
511 |
continue
|
512 |
new_obj = self._store[add.new.sha]
|
513 |
score = _similarity_score(old_obj, new_obj, |
514 |
block_cache=block_cache) |
515 |
if score > self._rename_threshold: |
516 |
new_type = self._rename_type(check_paths, delete, add)
|
517 |
rename = TreeChange(new_type, delete.old, add.new) |
518 |
candidates.append((-score, rename)) |
519 |
|
520 |
def _choose_content_renames(self): |
521 |
# Sort scores from highest to lowest, but keep names in ascending
|
522 |
# order.
|
523 |
self._candidates.sort()
|
524 |
|
525 |
delete_paths = set()
|
526 |
add_paths = set()
|
527 |
for _, change in self._candidates: |
528 |
new_path = change.new.path |
529 |
if new_path in add_paths: |
530 |
continue
|
531 |
old_path = change.old.path |
532 |
orig_type = change.type |
533 |
if old_path in delete_paths: |
534 |
change = TreeChange(CHANGE_COPY, change.old, change.new) |
535 |
|
536 |
# If the candidate was originally a copy, that means it came from a
|
537 |
# modified or unchanged path, so we don't want to prune it.
|
538 |
if orig_type != CHANGE_COPY:
|
539 |
delete_paths.add(old_path) |
540 |
add_paths.add(new_path) |
541 |
self._changes.append(change)
|
542 |
self._prune(add_paths, delete_paths)
|
543 |
|
544 |
def _join_modifies(self): |
545 |
if self._rewrite_threshold is None: |
546 |
return
|
547 |
|
548 |
modifies = {} |
549 |
delete_map = dict((d.old.path, d) for d in self._deletes) |
550 |
for add in self._adds: |
551 |
path = add.new.path |
552 |
delete = delete_map.get(path) |
553 |
if (delete is not None and |
554 |
stat.S_IFMT(delete.old.mode) == stat.S_IFMT(add.new.mode)): |
555 |
modifies[path] = TreeChange(CHANGE_MODIFY, delete.old, add.new) |
556 |
|
557 |
self._adds = [a for a in self._adds if a.new.path not in modifies] |
558 |
self._deletes = [a for a in self._deletes if a.new.path not in |
559 |
modifies] |
560 |
self._changes += modifies.values()
|
561 |
|
562 |
def _sorted_changes(self): |
563 |
result = [] |
564 |
result.extend(self._adds)
|
565 |
result.extend(self._deletes)
|
566 |
result.extend(self._changes)
|
567 |
result.sort(key=_tree_change_key) |
568 |
return result
|
569 |
|
570 |
def _prune_unchanged(self): |
571 |
if self._want_unchanged: |
572 |
return
|
573 |
self._deletes = [d for d in self._deletes if d.type != CHANGE_UNCHANGED] |
574 |
|
575 |
def changes_with_renames(self, tree1_id, tree2_id, want_unchanged=False): |
576 |
"""Iterate TreeChanges between two tree SHAs, with rename detection."""
|
577 |
self._reset()
|
578 |
self._want_unchanged = want_unchanged
|
579 |
self._collect_changes(tree1_id, tree2_id)
|
580 |
self._find_exact_renames()
|
581 |
self._find_content_rename_candidates()
|
582 |
self._choose_content_renames()
|
583 |
self._join_modifies()
|
584 |
self._prune_unchanged()
|
585 |
return self._sorted_changes() |
586 |
|
587 |
|
588 |
# Hold on to the pure-python implementations for testing.
|
589 |
_is_tree_py = _is_tree |
590 |
_merge_entries_py = _merge_entries |
591 |
_count_blocks_py = _count_blocks |
592 |
try:
|
593 |
# Try to import C versions
|
594 |
from dulwich._diff_tree import _is_tree, _merge_entries, _count_blocks |
595 |
except ImportError: |
596 |
pass
|