Statistics
| Revision:

gvsig-scripting / org.gvsig.scripting / trunk / org.gvsig.scripting / org.gvsig.scripting.app / org.gvsig.scripting.app.mainplugin / src / main / resources-plugin / scripting / lib / dulwich / walk.py @ 959

History | View | Annotate | Download (15.3 KB)

1
# walk.py -- General implementation of walking commits and their contents.
2
# Copyright (C) 2010 Google, Inc.
3
#
4
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
5
# General Public License as public by the Free Software Foundation; version 2.0
6
# or (at your option) any later version. You can redistribute it and/or
7
# modify it under the terms of either of these two licenses.
8
#
9
# Unless required by applicable law or agreed to in writing, software
10
# distributed under the License is distributed on an "AS IS" BASIS,
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
14
#
15
# You should have received a copy of the licenses; if not, see
16
# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
17
# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
18
# License, Version 2.0.
19
#
20

    
21
"""General implementation of walking commits and their contents."""
22

    
23

    
24
from collections import defaultdict
25

    
26
import collections
27
import heapq
28
from itertools import chain
29

    
30
from dulwich.diff_tree import (
31
    RENAME_CHANGE_TYPES,
32
    tree_changes,
33
    tree_changes_for_merge,
34
    RenameDetector,
35
    )
36
from dulwich.errors import (
37
    MissingCommitError,
38
    )
39

    
40
ORDER_DATE = 'date'
41
ORDER_TOPO = 'topo'
42

    
43
ALL_ORDERS = (ORDER_DATE, ORDER_TOPO)
44

    
45
# Maximum number of commits to walk past a commit time boundary.
46
_MAX_EXTRA_COMMITS = 5
47

    
48

    
49
class WalkEntry(object):
50
    """Object encapsulating a single result from a walk."""
51

    
52
    def __init__(self, walker, commit):
53
        self.commit = commit
54
        self._store = walker.store
55
        self._get_parents = walker.get_parents
56
        self._changes = {}
57
        self._rename_detector = walker.rename_detector
58

    
59
    def changes(self, path_prefix=None):
60
        """Get the tree changes for this entry.
61

62
        :param path_prefix: Portion of the path in the repository to
63
            use to filter changes. Must be a directory name. Must be
64
            a full, valid, path reference (no partial names or wildcards).
65
        :return: For commits with up to one parent, a list of TreeChange
66
            objects; if the commit has no parents, these will be relative to the
67
            empty tree. For merge commits, a list of lists of TreeChange
68
            objects; see dulwich.diff.tree_changes_for_merge.
69
        """
70
        cached = self._changes.get(path_prefix)
71
        if cached is None:
72
            commit = self.commit
73
            if not self._get_parents(commit):
74
                changes_func = tree_changes
75
                parent = None
76
            elif len(self._get_parents(commit)) == 1:
77
                changes_func = tree_changes
78
                parent = self._store[self._get_parents(commit)[0]].tree
79
                if path_prefix:
80
                    mode, subtree_sha = parent.lookup_path(
81
                        self._store.__getitem__,
82
                        path_prefix,
83
                    )
84
                    parent = self._store[subtree_sha]
85
            else:
86
                changes_func = tree_changes_for_merge
87
                parent = [self._store[p].tree for p in self._get_parents(commit)]
88
                if path_prefix:
89
                    parent_trees = [self._store[p] for p in parent]
90
                    parent = []
91
                    for p in parent_trees:
92
                        try:
93
                            mode, st = p.lookup_path(
94
                                self._store.__getitem__,
95
                                path_prefix,
96
                            )
97
                        except KeyError:
98
                            pass
99
                        else:
100
                            parent.append(st)
101
            commit_tree_sha = commit.tree
102
            if path_prefix:
103
                commit_tree = self._store[commit_tree_sha]
104
                mode, commit_tree_sha = commit_tree.lookup_path(
105
                    self._store.__getitem__,
106
                    path_prefix,
107
                )
108
            cached = list(changes_func(
109
              self._store, parent, commit_tree_sha,
110
              rename_detector=self._rename_detector))
111
            self._changes[path_prefix] = cached
112
        return self._changes[path_prefix]
113

    
114
    def __repr__(self):
115
        return '<WalkEntry commit=%s, changes=%r>' % (
116
          self.commit.id, self.changes())
117

    
118

    
119
class _CommitTimeQueue(object):
120
    """Priority queue of WalkEntry objects by commit time."""
121

    
122
    def __init__(self, walker):
123
        self._walker = walker
124
        self._store = walker.store
125
        self._get_parents = walker.get_parents
126
        self._excluded = walker.excluded
127
        self._pq = []
128
        self._pq_set = set()
129
        self._seen = set()
130
        self._done = set()
131
        self._min_time = walker.since
132
        self._last = None
133
        self._extra_commits_left = _MAX_EXTRA_COMMITS
134
        self._is_finished = False
135

    
136
        for commit_id in chain(walker.include, walker.excluded):
137
            self._push(commit_id)
138

    
139
    def _push(self, commit_id):
140
        try:
141
            commit = self._store[commit_id]
142
        except KeyError:
143
            raise MissingCommitError(commit_id)
144
        if commit_id not in self._pq_set and commit_id not in self._done:
145
            heapq.heappush(self._pq, (-commit.commit_time, commit))
146
            self._pq_set.add(commit_id)
147
            self._seen.add(commit_id)
148

    
149
    def _exclude_parents(self, commit):
150
        excluded = self._excluded
151
        seen = self._seen
152
        todo = [commit]
153
        while todo:
154
            commit = todo.pop()
155
            for parent in self._get_parents(commit):
156
                if parent not in excluded and parent in seen:
157
                    # TODO: This is inefficient unless the object store does
158
                    # some caching (which DiskObjectStore currently does not).
159
                    # We could either add caching in this class or pass around
160
                    # parsed queue entry objects instead of commits.
161
                    todo.append(self._store[parent])
162
                excluded.add(parent)
163

    
164
    def next(self):
165
        if self._is_finished:
166
            return None
167
        while self._pq:
168
            _, commit = heapq.heappop(self._pq)
169
            sha = commit.id
170
            self._pq_set.remove(sha)
171
            if sha in self._done:
172
                continue
173
            self._done.add(sha)
174

    
175
            for parent_id in self._get_parents(commit):
176
                self._push(parent_id)
177

    
178
            reset_extra_commits = True
179
            is_excluded = sha in self._excluded
180
            if is_excluded:
181
                self._exclude_parents(commit)
182
                if self._pq and all(c.id in self._excluded
183
                                    for _, c in self._pq):
184
                    _, n = self._pq[0]
185
                    if self._last and n.commit_time >= self._last.commit_time:
186
                        # If the next commit is newer than the last one, we need
187
                        # to keep walking in case its parents (which we may not
188
                        # have seen yet) are excluded. This gives the excluded
189
                        # set a chance to "catch up" while the commit is still
190
                        # in the Walker's output queue.
191
                        reset_extra_commits = True
192
                    else:
193
                        reset_extra_commits = False
194

    
195
            if (self._min_time is not None and
196
                commit.commit_time < self._min_time):
197
                # We want to stop walking at min_time, but commits at the
198
                # boundary may be out of order with respect to their parents. So
199
                # we walk _MAX_EXTRA_COMMITS more commits once we hit this
200
                # boundary.
201
                reset_extra_commits = False
202

    
203
            if reset_extra_commits:
204
                # We're not at a boundary, so reset the counter.
205
                self._extra_commits_left = _MAX_EXTRA_COMMITS
206
            else:
207
                self._extra_commits_left -= 1
208
                if not self._extra_commits_left:
209
                    break
210

    
211
            if not is_excluded:
212
                self._last = commit
213
                return WalkEntry(self._walker, commit)
214
        self._is_finished = True
215
        return None
216

    
217
    __next__ = next
218

    
219

    
220
class Walker(object):
221
    """Object for performing a walk of commits in a store.
222

223
    Walker objects are initialized with a store and other options and can then
224
    be treated as iterators of Commit objects.
225
    """
226

    
227
    def __init__(self, store, include, exclude=None, order=ORDER_DATE,
228
                 reverse=False, max_entries=None, paths=None,
229
                 rename_detector=None, follow=False, since=None, until=None,
230
                 get_parents=lambda commit: commit.parents,
231
                 queue_cls=_CommitTimeQueue):
232
        """Constructor.
233

234
        :param store: ObjectStore instance for looking up objects.
235
        :param include: Iterable of SHAs of commits to include along with their
236
            ancestors.
237
        :param exclude: Iterable of SHAs of commits to exclude along with their
238
            ancestors, overriding includes.
239
        :param order: ORDER_* constant specifying the order of results. Anything
240
            other than ORDER_DATE may result in O(n) memory usage.
241
        :param reverse: If True, reverse the order of output, requiring O(n)
242
            memory.
243
        :param max_entries: The maximum number of entries to yield, or None for
244
            no limit.
245
        :param paths: Iterable of file or subtree paths to show entries for.
246
        :param rename_detector: diff.RenameDetector object for detecting
247
            renames.
248
        :param follow: If True, follow path across renames/copies. Forces a
249
            default rename_detector.
250
        :param since: Timestamp to list commits after.
251
        :param until: Timestamp to list commits before.
252
        :param get_parents: Method to retrieve the parents of a commit
253
        :param queue_cls: A class to use for a queue of commits, supporting the
254
            iterator protocol. The constructor takes a single argument, the
255
            Walker.
256
        """
257
        # Note: when adding arguments to this method, please also update
258
        # dulwich.repo.BaseRepo.get_walker
259
        if order not in ALL_ORDERS:
260
            raise ValueError('Unknown walk order %s' % order)
261
        self.store = store
262
        if not isinstance(include, list):
263
            include = [include]
264
        self.include = include
265
        self.excluded = set(exclude or [])
266
        self.order = order
267
        self.reverse = reverse
268
        self.max_entries = max_entries
269
        self.paths = paths and set(paths) or None
270
        if follow and not rename_detector:
271
            rename_detector = RenameDetector(store)
272
        self.rename_detector = rename_detector
273
        self.get_parents = get_parents
274
        self.follow = follow
275
        self.since = since
276
        self.until = until
277

    
278
        self._num_entries = 0
279
        self._queue = queue_cls(self)
280
        self._out_queue = collections.deque()
281

    
282
    def _path_matches(self, changed_path):
283
        if changed_path is None:
284
            return False
285
        for followed_path in self.paths:
286
            if changed_path == followed_path:
287
                return True
288
            if (changed_path.startswith(followed_path) and
289
                    changed_path[len(followed_path)] == b'/'[0]):
290
                return True
291
        return False
292

    
293
    def _change_matches(self, change):
294
        if not change:
295
            return False
296

    
297
        old_path = change.old.path
298
        new_path = change.new.path
299
        if self._path_matches(new_path):
300
            if self.follow and change.type in RENAME_CHANGE_TYPES:
301
                self.paths.add(old_path)
302
                self.paths.remove(new_path)
303
            return True
304
        elif self._path_matches(old_path):
305
            return True
306
        return False
307

    
308
    def _should_return(self, entry):
309
        """Determine if a walk entry should be returned..
310

311
        :param entry: The WalkEntry to consider.
312
        :return: True if the WalkEntry should be returned by this walk, or False
313
            otherwise (e.g. if it doesn't match any requested paths).
314
        """
315
        commit = entry.commit
316
        if self.since is not None and commit.commit_time < self.since:
317
            return False
318
        if self.until is not None and commit.commit_time > self.until:
319
            return False
320
        if commit.id in self.excluded:
321
            return False
322

    
323
        if self.paths is None:
324
            return True
325

    
326
        if len(self.get_parents(commit)) > 1:
327
            for path_changes in entry.changes():
328
                # For merge commits, only include changes with conflicts for
329
                # this path. Since a rename conflict may include different
330
                # old.paths, we have to check all of them.
331
                for change in path_changes:
332
                    if self._change_matches(change):
333
                        return True
334
        else:
335
            for change in entry.changes():
336
                if self._change_matches(change):
337
                    return True
338
        return None
339

    
340
    def _next(self):
341
        max_entries = self.max_entries
342
        while max_entries is None or self._num_entries < max_entries:
343
            entry = next(self._queue)
344
            if entry is not None:
345
                self._out_queue.append(entry)
346
            if entry is None or len(self._out_queue) > _MAX_EXTRA_COMMITS:
347
                if not self._out_queue:
348
                    return None
349
                entry = self._out_queue.popleft()
350
                if self._should_return(entry):
351
                    self._num_entries += 1
352
                    return entry
353
        return None
354

    
355
    def _reorder(self, results):
356
        """Possibly reorder a results iterator.
357

358
        :param results: An iterator of WalkEntry objects, in the order returned
359
            from the queue_cls.
360
        :return: An iterator or list of WalkEntry objects, in the order required
361
            by the Walker.
362
        """
363
        if self.order == ORDER_TOPO:
364
            results = _topo_reorder(results, self.get_parents)
365
        if self.reverse:
366
            results = reversed(list(results))
367
        return results
368

    
369
    def __iter__(self):
370
        return iter(self._reorder(iter(self._next, None)))
371

    
372

    
373
def _topo_reorder(entries, get_parents=lambda commit: commit.parents):
374
    """Reorder an iterable of entries topologically.
375

376
    This works best assuming the entries are already in almost-topological
377
    order, e.g. in commit time order.
378

379
    :param entries: An iterable of WalkEntry objects.
380
    :param get_parents: Optional function for getting the parents of a commit.
381
    :return: iterator over WalkEntry objects from entries in FIFO order, except
382
        where a parent would be yielded before any of its children.
383
    """
384
    todo = collections.deque()
385
    pending = {}
386
    num_children = defaultdict(int)
387
    for entry in entries:
388
        todo.append(entry)
389
        for p in get_parents(entry.commit):
390
            num_children[p] += 1
391

    
392
    while todo:
393
        entry = todo.popleft()
394
        commit = entry.commit
395
        commit_id = commit.id
396
        if num_children[commit_id]:
397
            pending[commit_id] = entry
398
            continue
399
        for parent_id in get_parents(commit):
400
            num_children[parent_id] -= 1
401
            if not num_children[parent_id]:
402
                parent_entry = pending.pop(parent_id, None)
403
                if parent_entry:
404
                    todo.appendleft(parent_entry)
405
        yield entry