gvsig-scripting / org.gvsig.scripting / trunk / org.gvsig.scripting / org.gvsig.scripting.app / org.gvsig.scripting.app.mainplugin / src / main / resources-plugin / scripting / lib / dulwich / walk.py @ 959
History | View | Annotate | Download (15.3 KB)
1 |
# walk.py -- General implementation of walking commits and their contents.
|
---|---|
2 |
# Copyright (C) 2010 Google, Inc.
|
3 |
#
|
4 |
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
|
5 |
# General Public License as public by the Free Software Foundation; version 2.0
|
6 |
# or (at your option) any later version. You can redistribute it and/or
|
7 |
# modify it under the terms of either of these two licenses.
|
8 |
#
|
9 |
# Unless required by applicable law or agreed to in writing, software
|
10 |
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
# See the License for the specific language governing permissions and
|
13 |
# limitations under the License.
|
14 |
#
|
15 |
# You should have received a copy of the licenses; if not, see
|
16 |
# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
|
17 |
# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
|
18 |
# License, Version 2.0.
|
19 |
#
|
20 |
|
21 |
"""General implementation of walking commits and their contents."""
|
22 |
|
23 |
|
24 |
from collections import defaultdict |
25 |
|
26 |
import collections |
27 |
import heapq |
28 |
from itertools import chain |
29 |
|
30 |
from dulwich.diff_tree import ( |
31 |
RENAME_CHANGE_TYPES, |
32 |
tree_changes, |
33 |
tree_changes_for_merge, |
34 |
RenameDetector, |
35 |
) |
36 |
from dulwich.errors import ( |
37 |
MissingCommitError, |
38 |
) |
39 |
|
40 |
ORDER_DATE = 'date'
|
41 |
ORDER_TOPO = 'topo'
|
42 |
|
43 |
ALL_ORDERS = (ORDER_DATE, ORDER_TOPO) |
44 |
|
45 |
# Maximum number of commits to walk past a commit time boundary.
|
46 |
_MAX_EXTRA_COMMITS = 5
|
47 |
|
48 |
|
49 |
class WalkEntry(object): |
50 |
"""Object encapsulating a single result from a walk."""
|
51 |
|
52 |
def __init__(self, walker, commit): |
53 |
self.commit = commit
|
54 |
self._store = walker.store
|
55 |
self._get_parents = walker.get_parents
|
56 |
self._changes = {}
|
57 |
self._rename_detector = walker.rename_detector
|
58 |
|
59 |
def changes(self, path_prefix=None): |
60 |
"""Get the tree changes for this entry.
|
61 |
|
62 |
:param path_prefix: Portion of the path in the repository to
|
63 |
use to filter changes. Must be a directory name. Must be
|
64 |
a full, valid, path reference (no partial names or wildcards).
|
65 |
:return: For commits with up to one parent, a list of TreeChange
|
66 |
objects; if the commit has no parents, these will be relative to the
|
67 |
empty tree. For merge commits, a list of lists of TreeChange
|
68 |
objects; see dulwich.diff.tree_changes_for_merge.
|
69 |
"""
|
70 |
cached = self._changes.get(path_prefix)
|
71 |
if cached is None: |
72 |
commit = self.commit
|
73 |
if not self._get_parents(commit): |
74 |
changes_func = tree_changes |
75 |
parent = None
|
76 |
elif len(self._get_parents(commit)) == 1: |
77 |
changes_func = tree_changes |
78 |
parent = self._store[self._get_parents(commit)[0]].tree |
79 |
if path_prefix:
|
80 |
mode, subtree_sha = parent.lookup_path( |
81 |
self._store.__getitem__,
|
82 |
path_prefix, |
83 |
) |
84 |
parent = self._store[subtree_sha]
|
85 |
else:
|
86 |
changes_func = tree_changes_for_merge |
87 |
parent = [self._store[p].tree for p in self._get_parents(commit)] |
88 |
if path_prefix:
|
89 |
parent_trees = [self._store[p] for p in parent] |
90 |
parent = [] |
91 |
for p in parent_trees: |
92 |
try:
|
93 |
mode, st = p.lookup_path( |
94 |
self._store.__getitem__,
|
95 |
path_prefix, |
96 |
) |
97 |
except KeyError: |
98 |
pass
|
99 |
else:
|
100 |
parent.append(st) |
101 |
commit_tree_sha = commit.tree |
102 |
if path_prefix:
|
103 |
commit_tree = self._store[commit_tree_sha]
|
104 |
mode, commit_tree_sha = commit_tree.lookup_path( |
105 |
self._store.__getitem__,
|
106 |
path_prefix, |
107 |
) |
108 |
cached = list(changes_func(
|
109 |
self._store, parent, commit_tree_sha,
|
110 |
rename_detector=self._rename_detector))
|
111 |
self._changes[path_prefix] = cached
|
112 |
return self._changes[path_prefix] |
113 |
|
114 |
def __repr__(self): |
115 |
return '<WalkEntry commit=%s, changes=%r>' % ( |
116 |
self.commit.id, self.changes()) |
117 |
|
118 |
|
119 |
class _CommitTimeQueue(object): |
120 |
"""Priority queue of WalkEntry objects by commit time."""
|
121 |
|
122 |
def __init__(self, walker): |
123 |
self._walker = walker
|
124 |
self._store = walker.store
|
125 |
self._get_parents = walker.get_parents
|
126 |
self._excluded = walker.excluded
|
127 |
self._pq = []
|
128 |
self._pq_set = set() |
129 |
self._seen = set() |
130 |
self._done = set() |
131 |
self._min_time = walker.since
|
132 |
self._last = None |
133 |
self._extra_commits_left = _MAX_EXTRA_COMMITS
|
134 |
self._is_finished = False |
135 |
|
136 |
for commit_id in chain(walker.include, walker.excluded): |
137 |
self._push(commit_id)
|
138 |
|
139 |
def _push(self, commit_id): |
140 |
try:
|
141 |
commit = self._store[commit_id]
|
142 |
except KeyError: |
143 |
raise MissingCommitError(commit_id)
|
144 |
if commit_id not in self._pq_set and commit_id not in self._done: |
145 |
heapq.heappush(self._pq, (-commit.commit_time, commit))
|
146 |
self._pq_set.add(commit_id)
|
147 |
self._seen.add(commit_id)
|
148 |
|
149 |
def _exclude_parents(self, commit): |
150 |
excluded = self._excluded
|
151 |
seen = self._seen
|
152 |
todo = [commit] |
153 |
while todo:
|
154 |
commit = todo.pop() |
155 |
for parent in self._get_parents(commit): |
156 |
if parent not in excluded and parent in seen: |
157 |
# TODO: This is inefficient unless the object store does
|
158 |
# some caching (which DiskObjectStore currently does not).
|
159 |
# We could either add caching in this class or pass around
|
160 |
# parsed queue entry objects instead of commits.
|
161 |
todo.append(self._store[parent])
|
162 |
excluded.add(parent) |
163 |
|
164 |
def next(self): |
165 |
if self._is_finished: |
166 |
return None |
167 |
while self._pq: |
168 |
_, commit = heapq.heappop(self._pq)
|
169 |
sha = commit.id |
170 |
self._pq_set.remove(sha)
|
171 |
if sha in self._done: |
172 |
continue
|
173 |
self._done.add(sha)
|
174 |
|
175 |
for parent_id in self._get_parents(commit): |
176 |
self._push(parent_id)
|
177 |
|
178 |
reset_extra_commits = True
|
179 |
is_excluded = sha in self._excluded |
180 |
if is_excluded:
|
181 |
self._exclude_parents(commit)
|
182 |
if self._pq and all(c.id in self._excluded |
183 |
for _, c in self._pq): |
184 |
_, n = self._pq[0] |
185 |
if self._last and n.commit_time >= self._last.commit_time: |
186 |
# If the next commit is newer than the last one, we need
|
187 |
# to keep walking in case its parents (which we may not
|
188 |
# have seen yet) are excluded. This gives the excluded
|
189 |
# set a chance to "catch up" while the commit is still
|
190 |
# in the Walker's output queue.
|
191 |
reset_extra_commits = True
|
192 |
else:
|
193 |
reset_extra_commits = False
|
194 |
|
195 |
if (self._min_time is not None and |
196 |
commit.commit_time < self._min_time):
|
197 |
# We want to stop walking at min_time, but commits at the
|
198 |
# boundary may be out of order with respect to their parents. So
|
199 |
# we walk _MAX_EXTRA_COMMITS more commits once we hit this
|
200 |
# boundary.
|
201 |
reset_extra_commits = False
|
202 |
|
203 |
if reset_extra_commits:
|
204 |
# We're not at a boundary, so reset the counter.
|
205 |
self._extra_commits_left = _MAX_EXTRA_COMMITS
|
206 |
else:
|
207 |
self._extra_commits_left -= 1 |
208 |
if not self._extra_commits_left: |
209 |
break
|
210 |
|
211 |
if not is_excluded: |
212 |
self._last = commit
|
213 |
return WalkEntry(self._walker, commit) |
214 |
self._is_finished = True |
215 |
return None |
216 |
|
217 |
__next__ = next
|
218 |
|
219 |
|
220 |
class Walker(object): |
221 |
"""Object for performing a walk of commits in a store.
|
222 |
|
223 |
Walker objects are initialized with a store and other options and can then
|
224 |
be treated as iterators of Commit objects.
|
225 |
"""
|
226 |
|
227 |
def __init__(self, store, include, exclude=None, order=ORDER_DATE, |
228 |
reverse=False, max_entries=None, paths=None, |
229 |
rename_detector=None, follow=False, since=None, until=None, |
230 |
get_parents=lambda commit: commit.parents,
|
231 |
queue_cls=_CommitTimeQueue): |
232 |
"""Constructor.
|
233 |
|
234 |
:param store: ObjectStore instance for looking up objects.
|
235 |
:param include: Iterable of SHAs of commits to include along with their
|
236 |
ancestors.
|
237 |
:param exclude: Iterable of SHAs of commits to exclude along with their
|
238 |
ancestors, overriding includes.
|
239 |
:param order: ORDER_* constant specifying the order of results. Anything
|
240 |
other than ORDER_DATE may result in O(n) memory usage.
|
241 |
:param reverse: If True, reverse the order of output, requiring O(n)
|
242 |
memory.
|
243 |
:param max_entries: The maximum number of entries to yield, or None for
|
244 |
no limit.
|
245 |
:param paths: Iterable of file or subtree paths to show entries for.
|
246 |
:param rename_detector: diff.RenameDetector object for detecting
|
247 |
renames.
|
248 |
:param follow: If True, follow path across renames/copies. Forces a
|
249 |
default rename_detector.
|
250 |
:param since: Timestamp to list commits after.
|
251 |
:param until: Timestamp to list commits before.
|
252 |
:param get_parents: Method to retrieve the parents of a commit
|
253 |
:param queue_cls: A class to use for a queue of commits, supporting the
|
254 |
iterator protocol. The constructor takes a single argument, the
|
255 |
Walker.
|
256 |
"""
|
257 |
# Note: when adding arguments to this method, please also update
|
258 |
# dulwich.repo.BaseRepo.get_walker
|
259 |
if order not in ALL_ORDERS: |
260 |
raise ValueError('Unknown walk order %s' % order) |
261 |
self.store = store
|
262 |
if not isinstance(include, list): |
263 |
include = [include] |
264 |
self.include = include
|
265 |
self.excluded = set(exclude or []) |
266 |
self.order = order
|
267 |
self.reverse = reverse
|
268 |
self.max_entries = max_entries
|
269 |
self.paths = paths and set(paths) or None |
270 |
if follow and not rename_detector: |
271 |
rename_detector = RenameDetector(store) |
272 |
self.rename_detector = rename_detector
|
273 |
self.get_parents = get_parents
|
274 |
self.follow = follow
|
275 |
self.since = since
|
276 |
self.until = until
|
277 |
|
278 |
self._num_entries = 0 |
279 |
self._queue = queue_cls(self) |
280 |
self._out_queue = collections.deque()
|
281 |
|
282 |
def _path_matches(self, changed_path): |
283 |
if changed_path is None: |
284 |
return False |
285 |
for followed_path in self.paths: |
286 |
if changed_path == followed_path:
|
287 |
return True |
288 |
if (changed_path.startswith(followed_path) and |
289 |
changed_path[len(followed_path)] == b'/'[0]): |
290 |
return True |
291 |
return False |
292 |
|
293 |
def _change_matches(self, change): |
294 |
if not change: |
295 |
return False |
296 |
|
297 |
old_path = change.old.path |
298 |
new_path = change.new.path |
299 |
if self._path_matches(new_path): |
300 |
if self.follow and change.type in RENAME_CHANGE_TYPES: |
301 |
self.paths.add(old_path)
|
302 |
self.paths.remove(new_path)
|
303 |
return True |
304 |
elif self._path_matches(old_path): |
305 |
return True |
306 |
return False |
307 |
|
308 |
def _should_return(self, entry): |
309 |
"""Determine if a walk entry should be returned..
|
310 |
|
311 |
:param entry: The WalkEntry to consider.
|
312 |
:return: True if the WalkEntry should be returned by this walk, or False
|
313 |
otherwise (e.g. if it doesn't match any requested paths).
|
314 |
"""
|
315 |
commit = entry.commit |
316 |
if self.since is not None and commit.commit_time < self.since: |
317 |
return False |
318 |
if self.until is not None and commit.commit_time > self.until: |
319 |
return False |
320 |
if commit.id in self.excluded: |
321 |
return False |
322 |
|
323 |
if self.paths is None: |
324 |
return True |
325 |
|
326 |
if len(self.get_parents(commit)) > 1: |
327 |
for path_changes in entry.changes(): |
328 |
# For merge commits, only include changes with conflicts for
|
329 |
# this path. Since a rename conflict may include different
|
330 |
# old.paths, we have to check all of them.
|
331 |
for change in path_changes: |
332 |
if self._change_matches(change): |
333 |
return True |
334 |
else:
|
335 |
for change in entry.changes(): |
336 |
if self._change_matches(change): |
337 |
return True |
338 |
return None |
339 |
|
340 |
def _next(self): |
341 |
max_entries = self.max_entries
|
342 |
while max_entries is None or self._num_entries < max_entries: |
343 |
entry = next(self._queue) |
344 |
if entry is not None: |
345 |
self._out_queue.append(entry)
|
346 |
if entry is None or len(self._out_queue) > _MAX_EXTRA_COMMITS: |
347 |
if not self._out_queue: |
348 |
return None |
349 |
entry = self._out_queue.popleft()
|
350 |
if self._should_return(entry): |
351 |
self._num_entries += 1 |
352 |
return entry
|
353 |
return None |
354 |
|
355 |
def _reorder(self, results): |
356 |
"""Possibly reorder a results iterator.
|
357 |
|
358 |
:param results: An iterator of WalkEntry objects, in the order returned
|
359 |
from the queue_cls.
|
360 |
:return: An iterator or list of WalkEntry objects, in the order required
|
361 |
by the Walker.
|
362 |
"""
|
363 |
if self.order == ORDER_TOPO: |
364 |
results = _topo_reorder(results, self.get_parents)
|
365 |
if self.reverse: |
366 |
results = reversed(list(results)) |
367 |
return results
|
368 |
|
369 |
def __iter__(self): |
370 |
return iter(self._reorder(iter(self._next, None))) |
371 |
|
372 |
|
373 |
def _topo_reorder(entries, get_parents=lambda commit: commit.parents): |
374 |
"""Reorder an iterable of entries topologically.
|
375 |
|
376 |
This works best assuming the entries are already in almost-topological
|
377 |
order, e.g. in commit time order.
|
378 |
|
379 |
:param entries: An iterable of WalkEntry objects.
|
380 |
:param get_parents: Optional function for getting the parents of a commit.
|
381 |
:return: iterator over WalkEntry objects from entries in FIFO order, except
|
382 |
where a parent would be yielded before any of its children.
|
383 |
"""
|
384 |
todo = collections.deque() |
385 |
pending = {} |
386 |
num_children = defaultdict(int)
|
387 |
for entry in entries: |
388 |
todo.append(entry) |
389 |
for p in get_parents(entry.commit): |
390 |
num_children[p] += 1
|
391 |
|
392 |
while todo:
|
393 |
entry = todo.popleft() |
394 |
commit = entry.commit |
395 |
commit_id = commit.id |
396 |
if num_children[commit_id]:
|
397 |
pending[commit_id] = entry |
398 |
continue
|
399 |
for parent_id in get_parents(commit): |
400 |
num_children[parent_id] -= 1
|
401 |
if not num_children[parent_id]: |
402 |
parent_entry = pending.pop(parent_id, None)
|
403 |
if parent_entry:
|
404 |
todo.appendleft(parent_entry) |
405 |
yield entry
|