/ - Diff - gvSIG scripting - gvSIG

Revision 959

org.gvsig.scripting/trunk/org.gvsig.scripting/org.gvsig.scripting.app/org.gvsig.scripting.app.mainplugin/src/main/resources-plugin/scripting/lib/.directory
	1	[Dolphin]
	2	Timestamp=2017,10,11,19,8,11
	3	Version=3
	4	ViewMode=1

     # lru_cache.py -- Simple LRU cache for dulwich
     # Copyright (C) 2006, 2008 Canonical Ltd
+    #
     # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
     # General Public License as public by the Free Software Foundation; version 2.0
     # or (at your option) any later version. You can redistribute it and/or
     # modify it under the terms of either of these two licenses.
+    #
     # Unless required by applicable law or agreed to in writing, software
     # distributed under the License is distributed on an "AS IS" BASIS,
     # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     # See the License for the specific language governing permissions and
     # limitations under the License.
+    #
     # You should have received a copy of the licenses; if not, see
     # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
     # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
     # License, Version 2.0.
+    #
     """A simple least-recently-used (LRU) cache."""
     _null_key = object()
     class _LRUNode(object):
         """This maintains the linked-list which is the lru internals."""
         __slots__ = ('prev', 'next_key', 'key', 'value', 'cleanup', 'size')
         def __init__(self, key, value, cleanup=None):
             self.prev = None
             self.next_key = _null_key
             self.key = key
             self.value = value
             self.cleanup = cleanup
             # TODO: We could compute this 'on-the-fly' like we used to, and remove
             #       one pointer from this object, we just need to decide if it
             #       actually costs us much of anything in normal usage
             self.size = None
         def __repr__(self):
             if self.prev is None:
                 prev_key = None
             else:
                 prev_key = self.prev.key
             return '%s(%r n:%r p:%r)' % (self.__class__.__name__, self.key,
                                          self.next_key, prev_key)
         def run_cleanup(self):
             if self.cleanup is not None:
                 self.cleanup(self.key, self.value)
             self.cleanup = None
             # Just make sure to break any refcycles, etc
             self.value = None
     class LRUCache(object):
         """A class which manages a cache of entries, removing unused ones."""
         def __init__(self, max_cache=100, after_cleanup_count=None):
             self._cache = {}
             # The "HEAD" of the lru linked list
             self._most_recently_used = None
             # The "TAIL" of the lru linked list
             self._least_recently_used = None
             self._update_max_cache(max_cache, after_cleanup_count)
         def __contains__(self, key):
             return key in self._cache
         def __getitem__(self, key):
             cache = self._cache
             node = cache[key]
             # Inlined from _record_access to decrease the overhead of __getitem__
             # We also have more knowledge about structure if __getitem__ is
             # succeeding, then we know that self._most_recently_used must not be
             # None, etc.
             mru = self._most_recently_used
             if node is mru:
                 # Nothing to do, this node is already at the head of the queue
                 return node.value
             # Remove this node from the old location
             node_prev = node.prev
             next_key = node.next_key
             # benchmarking shows that the lookup of _null_key in globals is faster
             # than the attribute lookup for (node is self._least_recently_used)
             if next_key is _null_key:
                 # 'node' is the _least_recently_used, because it doesn't have a
                 # 'next' item. So move the current lru to the previous node.
                 self._least_recently_used = node_prev
             else:
                 node_next = cache[next_key]
                 node_next.prev = node_prev
             node_prev.next_key = next_key
             # Insert this node at the front of the list
             node.next_key = mru.key
             mru.prev = node
             self._most_recently_used = node
             node.prev = None
             return node.value
         def __len__(self):
             return len(self._cache)
         def _walk_lru(self):
             """Walk the LRU list, only meant to be used in tests."""
             node = self._most_recently_used
             if node is not None:
                 if node.prev is not None:
                     raise AssertionError('the _most_recently_used entry is not'
                                          ' supposed to have a previous entry'
                                          ' %s' % (node,))
             while node is not None:
                 if node.next_key is _null_key:
                     if node is not self._least_recently_used:
                         raise AssertionError('only the last node should have'
                                              ' no next value: %s' % (node,))
                     node_next = None
                 else:
                     node_next = self._cache[node.next_key]
                     if node_next.prev is not node:
                         raise AssertionError('inconsistency found, node.next.prev'
                                              ' != node: %s' % (node,))
                 if node.prev is None:
                     if node is not self._most_recently_used:
                         raise AssertionError('only the _most_recently_used should'
                                              ' not have a previous node: %s'
                                              % (node,))
                 else:
                     if node.prev.next_key != node.key:
                         raise AssertionError('inconsistency found, node.prev.next'
                                              ' != node: %s' % (node,))
                 yield node
                 node = node_next
         def add(self, key, value, cleanup=None):
             """Add a new value to the cache.
             Also, if the entry is ever removed from the cache, call
             cleanup(key, value).
             :param key: The key to store it under
             :param value: The object to store
             :param cleanup: None or a function taking (key, value) to indicate
                             'value' should be cleaned up.
             """
             if key is _null_key:
                 raise ValueError('cannot use _null_key as a key')
             if key in self._cache:
                 node = self._cache[key]
                 node.run_cleanup()
                 node.value = value
                 node.cleanup = cleanup
             else:
                 node = _LRUNode(key, value, cleanup=cleanup)
                 self._cache[key] = node
             self._record_access(node)
             if len(self._cache) > self._max_cache:
                 # Trigger the cleanup
                 self.cleanup()
         def cache_size(self):
             """Get the number of entries we will cache."""
             return self._max_cache
         def get(self, key, default=None):
             node = self._cache.get(key, None)
             if node is None:
                 return default
             self._record_access(node)
             return node.value
         def keys(self):
             """Get the list of keys currently cached.
             Note that values returned here may not be available by the time you
             request them later. This is simply meant as a peak into the current
             state.
             :return: An unordered list of keys that are currently cached.
             """
             return self._cache.keys()
         def items(self):
             """Get the key:value pairs as a dict."""
             return dict((k, n.value) for k, n in self._cache.items())
         def cleanup(self):
             """Clear the cache until it shrinks to the requested size.
             This does not completely wipe the cache, just makes sure it is under
             the after_cleanup_count.
             """
             # Make sure the cache is shrunk to the correct size
             while len(self._cache) > self._after_cleanup_count:
                 self._remove_lru()
         def __setitem__(self, key, value):
             """Add a value to the cache, there will be no cleanup function."""
             self.add(key, value, cleanup=None)
         def _record_access(self, node):
             """Record that key was accessed."""
             # Move 'node' to the front of the queue
             if self._most_recently_used is None:
                 self._most_recently_used = node
                 self._least_recently_used = node
                 return
             elif node is self._most_recently_used:
                 # Nothing to do, this node is already at the head of the queue
                 return
             # We've taken care of the tail pointer, remove the node, and insert it
             # at the front
             # REMOVE
             if node is self._least_recently_used:
                 self._least_recently_used = node.prev
             if node.prev is not None:
                 node.prev.next_key = node.next_key
             if node.next_key is not _null_key:
                 node_next = self._cache[node.next_key]
                 node_next.prev = node.prev
             # INSERT
             node.next_key = self._most_recently_used.key
             self._most_recently_used.prev = node
             self._most_recently_used = node
             node.prev = None
         def _remove_node(self, node):
             if node is self._least_recently_used:
                 self._least_recently_used = node.prev
             self._cache.pop(node.key)
             # If we have removed all entries, remove the head pointer as well
             if self._least_recently_used is None:
                 self._most_recently_used = None
             node.run_cleanup()
             # Now remove this node from the linked list
             if node.prev is not None:
                 node.prev.next_key = node.next_key
             if node.next_key is not _null_key:
                 node_next = self._cache[node.next_key]
                 node_next.prev = node.prev
             # And remove this node's pointers
             node.prev = None
             node.next_key = _null_key
         def _remove_lru(self):
             """Remove one entry from the lru, and handle consequences.
             If there are no more references to the lru, then this entry should be
             removed from the cache.
             """
             self._remove_node(self._least_recently_used)
         def clear(self):
             """Clear out all of the cache."""
             # Clean up in LRU order
             while self._cache:
                 self._remove_lru()
         def resize(self, max_cache, after_cleanup_count=None):
             """Change the number of entries that will be cached."""
             self._update_max_cache(max_cache,
                                    after_cleanup_count=after_cleanup_count)
         def _update_max_cache(self, max_cache, after_cleanup_count=None):
             self._max_cache = max_cache
             if after_cleanup_count is None:
                 self._after_cleanup_count = self._max_cache * 8 / 10
             else:
                 self._after_cleanup_count = min(after_cleanup_count,
                                                 self._max_cache)
             self.cleanup()
     class LRUSizeCache(LRUCache):
         """An LRUCache that removes things based on the size of the values.
         This differs in that it doesn't care how many actual items there are,
         it just restricts the cache to be cleaned up after so much data is stored.
         The size of items added will be computed using compute_size(value), which
         defaults to len() if not supplied.
         """
         def __init__(self, max_size=1024*1024, after_cleanup_size=None,
                      compute_size=None):
             """Create a new LRUSizeCache.
             :param max_size: The max number of bytes to store before we start
                 clearing out entries.
             :param after_cleanup_size: After cleaning up, shrink everything to this
                 size.
             :param compute_size: A function to compute the size of the values. We
                 use a function here, so that you can pass 'len' if you are just
                 using simple strings, or a more complex function if you are using
                 something like a list of strings, or even a custom object.
                 The function should take the form "compute_size(value) => integer".
                 If not supplied, it defaults to 'len()'
             """
             self._value_size = 0
             self._compute_size = compute_size
             if compute_size is None:
                 self._compute_size = len
             self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
             LRUCache.__init__(self, max_cache=max(int(max_size/512), 1))
         def add(self, key, value, cleanup=None):
             """Add a new value to the cache.
             Also, if the entry is ever removed from the cache, call
             cleanup(key, value).
             :param key: The key to store it under
             :param value: The object to store
             :param cleanup: None or a function taking (key, value) to indicate
                             'value' should be cleaned up.
             """
             if key is _null_key:
                 raise ValueError('cannot use _null_key as a key')
             node = self._cache.get(key, None)
             value_len = self._compute_size(value)
             if value_len >= self._after_cleanup_size:
                 # The new value is 'too big to fit', as it would fill up/overflow
                 # the cache all by itself
                 if node is not None:
                     # We won't be replacing the old node, so just remove it
                     self._remove_node(node)
                 if cleanup is not None:
                     cleanup(key, value)
                 return
             if node is None:
                 node = _LRUNode(key, value, cleanup=cleanup)
                 self._cache[key] = node
             else:
                 self._value_size -= node.size
             node.size = value_len
             self._value_size += value_len
             self._record_access(node)
             if self._value_size > self._max_size:
                 # Time to cleanup
                 self.cleanup()
         def cleanup(self):
             """Clear the cache until it shrinks to the requested size.
             This does not completely wipe the cache, just makes sure it is under
             the after_cleanup_size.
             """
             # Make sure the cache is shrunk to the correct size
             while self._value_size > self._after_cleanup_size:
                 self._remove_lru()
         def _remove_node(self, node):
             self._value_size -= node.size
             LRUCache._remove_node(self, node)
         def resize(self, max_size, after_cleanup_size=None):
             """Change the number of bytes that will be cached."""
             self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
             max_cache = max(int(max_size/512), 1)
             self._update_max_cache(max_cache)
         def _update_max_size(self, max_size, after_cleanup_size=None):
             self._max_size = max_size
             if after_cleanup_size is None:
                 self._after_cleanup_size = self._max_size * 8 // 10
             else:
                 self._after_cleanup_size = min(after_cleanup_size, self._max_size)

     # diff_tree.py -- Utilities for diffing files and trees.
     # Copyright (C) 2010 Google, Inc.
+    #
     # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
     # General Public License as public by the Free Software Foundation; version 2.0
     # or (at your option) any later version. You can redistribute it and/or
     # modify it under the terms of either of these two licenses.
+    #
     # Unless required by applicable law or agreed to in writing, software
     # distributed under the License is distributed on an "AS IS" BASIS,
     # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     # See the License for the specific language governing permissions and
     # limitations under the License.
+    #
     # You should have received a copy of the licenses; if not, see
     # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
     # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
     # License, Version 2.0.
+    #
     """Utilities for diffing files and trees."""
     import sys
     from collections import (
         defaultdict,
         namedtuple,
+        )
     from io import BytesIO
     from itertools import chain
     import stat
     from dulwich.objects import (
         S_ISGITLINK,
         TreeEntry,
+        )
     # TreeChange type constants.
     CHANGE_ADD = 'add'
     CHANGE_MODIFY = 'modify'
     CHANGE_DELETE = 'delete'
     CHANGE_RENAME = 'rename'
     CHANGE_COPY = 'copy'
     CHANGE_UNCHANGED = 'unchanged'
     RENAME_CHANGE_TYPES = (CHANGE_RENAME, CHANGE_COPY)
     _NULL_ENTRY = TreeEntry(None, None, None)
     _MAX_SCORE = 100
     RENAME_THRESHOLD = 60
     MAX_FILES = 200
     REWRITE_THRESHOLD = None
     class TreeChange(namedtuple('TreeChange', ['type', 'old', 'new'])):
         """Named tuple a single change between two trees."""
         @classmethod
         def add(cls, new):
             return cls(CHANGE_ADD, _NULL_ENTRY, new)
         @classmethod
         def delete(cls, old):
             return cls(CHANGE_DELETE, old, _NULL_ENTRY)
     def _tree_entries(path, tree):
         result = []
         if not tree:
             return result
         for entry in tree.iteritems(name_order=True):
             result.append(entry.in_path(path))
         return result
     def _merge_entries(path, tree1, tree2):
         """Merge the entries of two trees.
         :param path: A path to prepend to all tree entry names.
         :param tree1: The first Tree object to iterate, or None.
         :param tree2: The second Tree object to iterate, or None.
         :return: A list of pairs of TreeEntry objects for each pair of entries in
             the trees. If an entry exists in one tree but not the other, the other
             entry will have all attributes set to None. If neither entry's path is
             None, they are guaranteed to match.
         """
         entries1 = _tree_entries(path, tree1)
         entries2 = _tree_entries(path, tree2)
         i1 = i2 = 0
         len1 = len(entries1)
         len2 = len(entries2)
         result = []
         while i1 < len1 and i2 < len2:
             entry1 = entries1[i1]
             entry2 = entries2[i2]
             if entry1.path < entry2.path:
                 result.append((entry1, _NULL_ENTRY))
                 i1 += 1
             elif entry1.path > entry2.path:
                 result.append((_NULL_ENTRY, entry2))
                 i2 += 1
             else:
                 result.append((entry1, entry2))
                 i1 += 1
                 i2 += 1
         for i in range(i1, len1):
             result.append((entries1[i], _NULL_ENTRY))
         for i in range(i2, len2):
             result.append((_NULL_ENTRY, entries2[i]))
         return result
     def _is_tree(entry):
         mode = entry.mode
         if mode is None:
             return False
         return stat.S_ISDIR(mode)
     def walk_trees(store, tree1_id, tree2_id, prune_identical=False):
         """Recursively walk all the entries of two trees.
         Iteration is depth-first pre-order, as in e.g. os.walk.
         :param store: An ObjectStore for looking up objects.
         :param tree1_id: The SHA of the first Tree object to iterate, or None.
         :param tree2_id: The SHA of the second Tree object to iterate, or None.
         :param prune_identical: If True, identical subtrees will not be walked.
         :return: Iterator over Pairs of TreeEntry objects for each pair of entries
             in the trees and their subtrees recursively. If an entry exists in one
             tree but not the other, the other entry will have all attributes set
             to None. If neither entry's path is None, they are guaranteed to
             match.
         """
         # This could be fairly easily generalized to >2 trees if we find a use
         # case.
         mode1 = tree1_id and stat.S_IFDIR or None
         mode2 = tree2_id and stat.S_IFDIR or None
         todo = [(TreeEntry(b'', mode1, tree1_id), TreeEntry(b'', mode2, tree2_id))]
         while todo:
             entry1, entry2 = todo.pop()
             is_tree1 = _is_tree(entry1)
             is_tree2 = _is_tree(entry2)
             if prune_identical and is_tree1 and is_tree2 and entry1 == entry2:
                 continue
             tree1 = is_tree1 and store[entry1.sha] or None
             tree2 = is_tree2 and store[entry2.sha] or None
             path = entry1.path or entry2.path
             todo.extend(reversed(_merge_entries(path, tree1, tree2)))
             yield entry1, entry2
     def _skip_tree(entry):
         if entry.mode is None or stat.S_ISDIR(entry.mode):
             return _NULL_ENTRY
         return entry
     def tree_changes(store, tree1_id, tree2_id, want_unchanged=False,
                      rename_detector=None):
         """Find the differences between the contents of two trees.
         :param store: An ObjectStore for looking up objects.
         :param tree1_id: The SHA of the source tree.
         :param tree2_id: The SHA of the target tree.
         :param want_unchanged: If True, include TreeChanges for unmodified entries
             as well.
         :param rename_detector: RenameDetector object for detecting renames.
         :return: Iterator over TreeChange instances for each change between the
             source and target tree.
         """
         if (rename_detector is not None and tree1_id is not None and
             tree2_id is not None):
             for change in rename_detector.changes_with_renames(
                 tree1_id, tree2_id, want_unchanged=want_unchanged):
                     yield change
             return
         entries = walk_trees(store, tree1_id, tree2_id,
                              prune_identical=(not want_unchanged))
         for entry1, entry2 in entries:
             if entry1 == entry2 and not want_unchanged:
                 continue
             # Treat entries for trees as missing.
             entry1 = _skip_tree(entry1)
             entry2 = _skip_tree(entry2)
             if entry1 != _NULL_ENTRY and entry2 != _NULL_ENTRY:
                 if stat.S_IFMT(entry1.mode) != stat.S_IFMT(entry2.mode):
                     # File type changed: report as delete/add.
                     yield TreeChange.delete(entry1)
                     entry1 = _NULL_ENTRY
                     change_type = CHANGE_ADD
                 elif entry1 == entry2:
                     change_type = CHANGE_UNCHANGED
                 else:
                     change_type = CHANGE_MODIFY
             elif entry1 != _NULL_ENTRY:
                 change_type = CHANGE_DELETE
             elif entry2 != _NULL_ENTRY:
                 change_type = CHANGE_ADD
             else:
                 # Both were None because at least one was a tree.
                 continue
             yield TreeChange(change_type, entry1, entry2)
     def _all_eq(seq, key, value):
         for e in seq:
             if key(e) != value:
                 return False
         return True
     def _all_same(seq, key):
         return _all_eq(seq[1:], key, key(seq[0]))
     def tree_changes_for_merge(store, parent_tree_ids, tree_id,
                                rename_detector=None):
         """Get the tree changes for a merge tree relative to all its parents.
         :param store: An ObjectStore for looking up objects.
         :param parent_tree_ids: An iterable of the SHAs of the parent trees.
         :param tree_id: The SHA of the merge tree.
         :param rename_detector: RenameDetector object for detecting renames.
         :return: Iterator over lists of TreeChange objects, one per conflicted path
             in the merge.
             Each list contains one element per parent, with the TreeChange for that
             path relative to that parent. An element may be None if it never
             existed in one parent and was deleted in two others.
             A path is only included in the output if it is a conflict, i.e. its SHA
             in the merge tree is not found in any of the parents, or in the case of
             deletes, if not all of the old SHAs match.
         """
         all_parent_changes = [tree_changes(store, t, tree_id,
                                            rename_detector=rename_detector)
                               for t in parent_tree_ids]
         num_parents = len(parent_tree_ids)
         changes_by_path = defaultdict(lambda: [None] * num_parents)
         # Organize by path.
         for i, parent_changes in enumerate(all_parent_changes):
             for change in parent_changes:
                 if change.type == CHANGE_DELETE:
                     path = change.old.path
                 else:
                     path = change.new.path
                 changes_by_path[path][i] = change
         old_sha = lambda c: c.old.sha
         change_type = lambda c: c.type
         # Yield only conflicting changes.
         for _, changes in sorted(changes_by_path.items()):
             assert len(changes) == num_parents
             have = [c for c in changes if c is not None]
             if _all_eq(have, change_type, CHANGE_DELETE):
                 if not _all_same(have, old_sha):
                     yield changes
             elif not _all_same(have, change_type):
                 yield changes
             elif None not in changes:
                 # If no change was found relative to one parent, that means the SHA
                 # must have matched the SHA in that parent, so it is not a
                 # conflict.
                 yield changes
     _BLOCK_SIZE = 64
     def _count_blocks(obj):
         """Count the blocks in an object.
         Splits the data into blocks either on lines or <=64-byte chunks of lines.
         :param obj: The object to count blocks for.
         :return: A dict of block hashcode -> total bytes occurring.
         """
         block_counts = defaultdict(int)
         block = BytesIO()
         n = 0
         # Cache attrs as locals to avoid expensive lookups in the inner loop.
         block_write = block.write
         block_seek = block.seek
         block_truncate = block.truncate
         block_getvalue = block.getvalue
         for c in chain(*obj.as_raw_chunks()):
             if sys.version_info[0] == 3:
                 c = c.to_bytes(1, 'big')
             block_write(c)
             n += 1
             if c == b'\n' or n == _BLOCK_SIZE:
                 value = block_getvalue()
                 block_counts[hash(value)] += len(value)
                 block_seek(0)
                 block_truncate()
                 n = 0
         if n > 0:
             last_block = block_getvalue()
             block_counts[hash(last_block)] += len(last_block)
         return block_counts
     def _common_bytes(blocks1, blocks2):
         """Count the number of common bytes in two block count dicts.
         :param block1: The first dict of block hashcode -> total bytes.
         :param block2: The second dict of block hashcode -> total bytes.
         :return: The number of bytes in common between blocks1 and blocks2. This is
             only approximate due to possible hash collisions.
         """
         # Iterate over the smaller of the two dicts, since this is symmetrical.
         if len(blocks1) > len(blocks2):
             blocks1, blocks2 = blocks2, blocks1
         score = 0
         for block, count1 in blocks1.items():
             count2 = blocks2.get(block)
             if count2:
                 score += min(count1, count2)
         return score
     def _similarity_score(obj1, obj2, block_cache=None):
         """Compute a similarity score for two objects.
         :param obj1: The first object to score.
         :param obj2: The second object to score.
         :param block_cache: An optional dict of SHA to block counts to cache
             results between calls.
         :return: The similarity score between the two objects, defined as the
             number of bytes in common between the two objects divided by the
             maximum size, scaled to the range 0-100.
         """
         if block_cache is None:
             block_cache = {}
         if obj1.id not in block_cache:
             block_cache[obj1.id] = _count_blocks(obj1)
         if obj2.id not in block_cache:
             block_cache[obj2.id] = _count_blocks(obj2)
         common_bytes = _common_bytes(block_cache[obj1.id], block_cache[obj2.id])
         max_size = max(obj1.raw_length(), obj2.raw_length())
         if not max_size:
             return _MAX_SCORE
         return int(float(common_bytes) * _MAX_SCORE / max_size)
     def _tree_change_key(entry):
         # Sort by old path then new path. If only one exists, use it for both keys.
         path1 = entry.old.path
         path2 = entry.new.path
         if path1 is None:
             path1 = path2
         if path2 is None:
             path2 = path1
         return (path1, path2)
     class RenameDetector(object):
         """Object for handling rename detection between two trees."""
         def __init__(self, store, rename_threshold=RENAME_THRESHOLD,
                      max_files=MAX_FILES,
                      rewrite_threshold=REWRITE_THRESHOLD,
                      find_copies_harder=False):
             """Initialize the rename detector.
             :param store: An ObjectStore for looking up objects.
             :param rename_threshold: The threshold similarity score for considering
                 an add/delete pair to be a rename/copy; see _similarity_score.
             :param max_files: The maximum number of adds and deletes to consider,
                 or None for no limit. The detector is guaranteed to compare no more
                 than max_files ** 2 add/delete pairs. This limit is provided because
                 rename detection can be quadratic in the project size. If the limit
                 is exceeded, no content rename detection is attempted.
             :param rewrite_threshold: The threshold similarity score below which a
                 modify should be considered a delete/add, or None to not break
                 modifies; see _similarity_score.
             :param find_copies_harder: If True, consider unmodified files when
                 detecting copies.
             """
             self._store = store
             self._rename_threshold = rename_threshold
             self._rewrite_threshold = rewrite_threshold
             self._max_files = max_files
             self._find_copies_harder = find_copies_harder
             self._want_unchanged = False
         def _reset(self):
             self._adds = []
             self._deletes = []
             self._changes = []
         def _should_split(self, change):
             if (self._rewrite_threshold is None or change.type != CHANGE_MODIFY or
                 change.old.sha == change.new.sha):
                 return False
             old_obj = self._store[change.old.sha]
             new_obj = self._store[change.new.sha]
             return _similarity_score(old_obj, new_obj) < self._rewrite_threshold
         def _add_change(self, change):
             if change.type == CHANGE_ADD:
                 self._adds.append(change)
             elif change.type == CHANGE_DELETE:
                 self._deletes.append(change)
             elif self._should_split(change):
                 self._deletes.append(TreeChange.delete(change.old))
                 self._adds.append(TreeChange.add(change.new))
             elif ((self._find_copies_harder and change.type == CHANGE_UNCHANGED)
                   or change.type == CHANGE_MODIFY):
                 # Treat all modifies as potential deletes for rename detection,
                 # but don't split them (to avoid spurious renames). Setting
                 # find_copies_harder means we treat unchanged the same as
                 # modified.
                 self._deletes.append(change)
             else:
                 self._changes.append(change)
         def _collect_changes(self, tree1_id, tree2_id):
             want_unchanged = self._find_copies_harder or self._want_unchanged
             for change in tree_changes(self._store, tree1_id, tree2_id,
                                        want_unchanged=want_unchanged):
                 self._add_change(change)
         def _prune(self, add_paths, delete_paths):
             self._adds = [a for a in self._adds if a.new.path not in add_paths]
             self._deletes = [d for d in self._deletes
                              if d.old.path not in delete_paths]
         def _find_exact_renames(self):
             add_map = defaultdict(list)
             for add in self._adds:
                 add_map[add.new.sha].append(add.new)
             delete_map = defaultdict(list)
             for delete in self._deletes:
                 # Keep track of whether the delete was actually marked as a delete.
                 # If not, it needs to be marked as a copy.
                 is_delete = delete.type == CHANGE_DELETE
                 delete_map[delete.old.sha].append((delete.old, is_delete))
             add_paths = set()
             delete_paths = set()
             for sha, sha_deletes in delete_map.items():
                 sha_adds = add_map[sha]
                 for (old, is_delete), new in zip(sha_deletes, sha_adds):
                     if stat.S_IFMT(old.mode) != stat.S_IFMT(new.mode):
                         continue
                     if is_delete:
                         delete_paths.add(old.path)
                     add_paths.add(new.path)
                     new_type = is_delete and CHANGE_RENAME or CHANGE_COPY
                     self._changes.append(TreeChange(new_type, old, new))
                 num_extra_adds = len(sha_adds) - len(sha_deletes)
                 # TODO(dborowitz): Less arbitrary way of dealing with extra copies.
                 old = sha_deletes[0][0]
                 if num_extra_adds > 0:
                     for new in sha_adds[-num_extra_adds:]:
                         add_paths.add(new.path)
                         self._changes.append(TreeChange(CHANGE_COPY, old, new))
             self._prune(add_paths, delete_paths)
         def _should_find_content_renames(self):
             return len(self._adds) * len(self._deletes) <= self._max_files ** 2
         def _rename_type(self, check_paths, delete, add):
             if check_paths and delete.old.path == add.new.path:
                 # If the paths match, this must be a split modify, so make sure it
                 # comes out as a modify.
                 return CHANGE_MODIFY
             elif delete.type != CHANGE_DELETE:
                 # If it's in deletes but not marked as a delete, it must have been
                 # added due to find_copies_harder, and needs to be marked as a
                 # copy.
                 return CHANGE_COPY
             return CHANGE_RENAME
         def _find_content_rename_candidates(self):
             candidates = self._candidates = []
             # TODO: Optimizations:
             #  - Compare object sizes before counting blocks.
             #  - Skip if delete's S_IFMT differs from all adds.
             #  - Skip if adds or deletes is empty.
             # Match C git's behavior of not attempting to find content renames if
             # the matrix size exceeds the threshold.
             if not self._should_find_content_renames():
                 return
             block_cache = {}
             check_paths = self._rename_threshold is not None
             for delete in self._deletes:
                 if S_ISGITLINK(delete.old.mode):
                     continue  # Git links don't exist in this repo.
                 old_sha = delete.old.sha
                 old_obj = self._store[old_sha]
                 block_cache[old_sha] = _count_blocks(old_obj)
                 for add in self._adds:
                     if stat.S_IFMT(delete.old.mode) != stat.S_IFMT(add.new.mode):
                         continue
                     new_obj = self._store[add.new.sha]
                     score = _similarity_score(old_obj, new_obj,
                                               block_cache=block_cache)
                     if score > self._rename_threshold:
                         new_type = self._rename_type(check_paths, delete, add)
                         rename = TreeChange(new_type, delete.old, add.new)
                         candidates.append((-score, rename))
         def _choose_content_renames(self):
             # Sort scores from highest to lowest, but keep names in ascending
             # order.
             self._candidates.sort()
             delete_paths = set()
             add_paths = set()
             for _, change in self._candidates:
                 new_path = change.new.path
                 if new_path in add_paths:
                     continue
                 old_path = change.old.path
                 orig_type = change.type
                 if old_path in delete_paths:
                     change = TreeChange(CHANGE_COPY, change.old, change.new)
                 # If the candidate was originally a copy, that means it came from a
                 # modified or unchanged path, so we don't want to prune it.
                 if orig_type != CHANGE_COPY:
                     delete_paths.add(old_path)
                 add_paths.add(new_path)
                 self._changes.append(change)
             self._prune(add_paths, delete_paths)
         def _join_modifies(self):
             if self._rewrite_threshold is None:
                 return
             modifies = {}
             delete_map = dict((d.old.path, d) for d in self._deletes)
             for add in self._adds:
                 path = add.new.path
                 delete = delete_map.get(path)
                 if (delete is not None and
                     stat.S_IFMT(delete.old.mode) == stat.S_IFMT(add.new.mode)):
                     modifies[path] = TreeChange(CHANGE_MODIFY, delete.old, add.new)
             self._adds = [a for a in self._adds if a.new.path not in modifies]
             self._deletes = [a for a in self._deletes if a.new.path not in
                              modifies]
             self._changes += modifies.values()
         def _sorted_changes(self):
             result = []
             result.extend(self._adds)
             result.extend(self._deletes)
             result.extend(self._changes)
             result.sort(key=_tree_change_key)
             return result
         def _prune_unchanged(self):
             if self._want_unchanged:
                 return
             self._deletes = [d for d in self._deletes if d.type != CHANGE_UNCHANGED]
         def changes_with_renames(self, tree1_id, tree2_id, want_unchanged=False):
             """Iterate TreeChanges between two tree SHAs, with rename detection."""
             self._reset()
             self._want_unchanged = want_unchanged
             self._collect_changes(tree1_id, tree2_id)
             self._find_exact_renames()
             self._find_content_rename_candidates()
             self._choose_content_renames()
             self._join_modifies()
             self._prune_unchanged()
             return self._sorted_changes()
     # Hold on to the pure-python implementations for testing.
     _is_tree_py = _is_tree
     _merge_entries_py = _merge_entries
     _count_blocks_py = _count_blocks
     try:
         # Try to import C versions
         from dulwich._diff_tree import _is_tree, _merge_entries, _count_blocks
     except ImportError:
         pass

     # objects.py -- Access to base git objects
     # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
     # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
+    #
     # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
     # General Public License as public by the Free Software Foundation; version 2.0
     # or (at your option) any later version. You can redistribute it and/or
     # modify it under the terms of either of these two licenses.
+    #
     # Unless required by applicable law or agreed to in writing, software
     # distributed under the License is distributed on an "AS IS" BASIS,
     # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     # See the License for the specific language governing permissions and
     # limitations under the License.
+    #
     # You should have received a copy of the licenses; if not, see
     # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
     # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
     # License, Version 2.0.
+    #
     """Access to base git objects."""
     import binascii
     from io import BytesIO
     from collections import namedtuple
     import os
     import posixpath
     import stat
     import warnings
     import zlib
     from hashlib import sha1
     from dulwich.errors import (
         ChecksumMismatch,
         NotBlobError,
         NotCommitError,
         NotTagError,
         NotTreeError,
         ObjectFormatException,
+        )
     from dulwich.file import GitFile
     ZERO_SHA = b'0' * 40
     # Header fields for commits
     _TREE_HEADER = b'tree'
     _PARENT_HEADER = b'parent'
     _AUTHOR_HEADER = b'author'
     _COMMITTER_HEADER = b'committer'
     _ENCODING_HEADER = b'encoding'
     _MERGETAG_HEADER = b'mergetag'
     _GPGSIG_HEADER = b'gpgsig'
     # Header fields for objects
     _OBJECT_HEADER = b'object'
     _TYPE_HEADER = b'type'
     _TAG_HEADER = b'tag'
     _TAGGER_HEADER = b'tagger'
     S_IFGITLINK = 0o160000
     def S_ISGITLINK(m):
         """Check if a mode indicates a submodule.
         :param m: Mode to check
         :return: a ``boolean``
         """
         return (stat.S_IFMT(m) == S_IFGITLINK)
     def _decompress(string):
         dcomp = zlib.decompressobj()
         dcomped = dcomp.decompress(string)
         dcomped += dcomp.flush()
         return dcomped
     def sha_to_hex(sha):
         """Takes a string and returns the hex of the sha within"""
         hexsha = binascii.hexlify(sha)
         assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha
         return hexsha
     def hex_to_sha(hex):
         """Takes a hex sha and returns a binary sha"""
         assert len(hex) == 40, "Incorrect length of hexsha: %s" % hex
         try:
             return binascii.unhexlify(hex)
         except TypeError as exc:
             if not isinstance(hex, bytes):
                 raise
             raise ValueError(exc.args[0])
     def valid_hexsha(hex):
         if len(hex) != 40:
             return False
         try:
             binascii.unhexlify(hex)
         except (TypeError, binascii.Error):
             return False
         else:
             return True
     def hex_to_filename(path, hex):
         """Takes a hex sha and returns its filename relative to the given path."""
         # os.path.join accepts bytes or unicode, but all args must be of the same
         # type. Make sure that hex which is expected to be bytes, is the same type
         # as path.
         if getattr(path, 'encode', None) is not None:
             hex = hex.decode('ascii')
         dir = hex[:2]
         file = hex[2:]
         # Check from object dir
         return os.path.join(path, dir, file)
     def filename_to_hex(filename):
         """Takes an object filename and returns its corresponding hex sha."""
         # grab the last (up to) two path components
         names = filename.rsplit(os.path.sep, 2)[-2:]
         errmsg = "Invalid object filename: %s" % filename
         assert len(names) == 2, errmsg
         base, rest = names
         assert len(base) == 2 and len(rest) == 38, errmsg
         hex = (base + rest).encode('ascii')
         hex_to_sha(hex)
         return hex
     def object_header(num_type, length):
         """Return an object header for the given numeric type and text length."""
         return object_class(num_type).type_name + b' ' + str(length).encode('ascii') + b'\0'
     def serializable_property(name, docstring=None):
         """A property that helps tracking whether serialization is necessary.
         """
         def set(obj, value):
             setattr(obj, "_"+name, value)
             obj._needs_serialization = True
         def get(obj):
             return getattr(obj, "_"+name)
         return property(get, set, doc=docstring)
     def object_class(type):
         """Get the object class corresponding to the given type.
         :param type: Either a type name string or a numeric type.
         :return: The ShaFile subclass corresponding to the given type, or None if
             type is not a valid type name/number.
         """
         return _TYPE_MAP.get(type, None)
     def check_hexsha(hex, error_msg):
         """Check if a string is a valid hex sha string.
         :param hex: Hex string to check
         :param error_msg: Error message to use in exception
         :raise ObjectFormatException: Raised when the string is not valid
         """
         if not valid_hexsha(hex):
             raise ObjectFormatException("%s %s" % (error_msg, hex))
     def check_identity(identity, error_msg):
         """Check if the specified identity is valid.
         This will raise an exception if the identity is not valid.
         :param identity: Identity string
         :param error_msg: Error message to use in exception
         """
         email_start = identity.find(b'<')
         email_end = identity.find(b'>')
         if (email_start < 0 or email_end < 0 or email_end <= email_start
             or identity.find(b'<', email_start + 1) >= 0
             or identity.find(b'>', email_end + 1) >= 0
             or not identity.endswith(b'>')):
             raise ObjectFormatException(error_msg)
     def git_line(*items):
         """Formats items into a space sepreated line."""
         return b' '.join(items) + b'\n'
     class FixedSha(object):
         """SHA object that behaves like hashlib's but is given a fixed value."""
         __slots__ = ('_hexsha', '_sha')
         def __init__(self, hexsha):
             if getattr(hexsha, 'encode', None) is not None:
                 hexsha = hexsha.encode('ascii')
             if not isinstance(hexsha, bytes):
                 raise TypeError('Expected bytes for hexsha, got %r' % hexsha)
             self._hexsha = hexsha
             self._sha = hex_to_sha(hexsha)
         def digest(self):
             """Return the raw SHA digest."""
             return self._sha
         def hexdigest(self):
             """Return the hex SHA digest."""
             return self._hexsha.decode('ascii')
     class ShaFile(object):
         """A git SHA file."""
         __slots__ = ('_chunked_text', '_sha', '_needs_serialization')
         @staticmethod
         def _parse_legacy_object_header(magic, f):
             """Parse a legacy object, creating it but not reading the file."""
             bufsize = 1024
             decomp = zlib.decompressobj()
             header = decomp.decompress(magic)
             start = 0
             end = -1
             while end < 0:
                 extra = f.read(bufsize)
                 header += decomp.decompress(extra)
                 magic += extra
                 end = header.find(b'\0', start)
                 start = len(header)
             header = header[:end]
             type_name, size = header.split(b' ', 1)
             size = int(size)  # sanity check
             obj_class = object_class(type_name)
             if not obj_class:
                 raise ObjectFormatException("Not a known type: %s" % type_name)
             return obj_class()
         def _parse_legacy_object(self, map):
             """Parse a legacy object, setting the raw string."""
             text = _decompress(map)
             header_end = text.find(b'\0')
             if header_end < 0:
                 raise ObjectFormatException("Invalid object header, no \\0")
             self.set_raw_string(text[header_end+1:])
         def as_legacy_object_chunks(self):
             """Return chunks representing the object in the experimental format.
             :return: List of strings
             """
             compobj = zlib.compressobj()
             yield compobj.compress(self._header())
             for chunk in self.as_raw_chunks():
                 yield compobj.compress(chunk)
             yield compobj.flush()
         def as_legacy_object(self):
             """Return string representing the object in the experimental format.
             """
             return b''.join(self.as_legacy_object_chunks())
         def as_raw_chunks(self):
             """Return chunks with serialization of the object.
             :return: List of strings, not necessarily one per line
             """
             if self._needs_serialization:
                 self._sha = None
                 self._chunked_text = self._serialize()
                 self._needs_serialization = False
             return self._chunked_text
         def as_raw_string(self):
             """Return raw string with serialization of the object.
             :return: String object
             """
             return b''.join(self.as_raw_chunks())
         def __str__(self):
             """Return raw string serialization of this object."""
             return self.as_raw_string()
         def __hash__(self):
             """Return unique hash for this object."""
             return hash(self.id)
         def as_pretty_string(self):
             """Return a string representing this object, fit for display."""
             return self.as_raw_string()
         def set_raw_string(self, text, sha=None):
             """Set the contents of this object from a serialized string."""
             if not isinstance(text, bytes):
                 raise TypeError('Expected bytes for text, got %r' % text)
             self.set_raw_chunks([text], sha)
         def set_raw_chunks(self, chunks, sha=None):
             """Set the contents of this object from a list of chunks."""
             self._chunked_text = chunks
             self._deserialize(chunks)
             if sha is None:
                 self._sha = None
             else:
                 self._sha = FixedSha(sha)
             self._needs_serialization = False
         @staticmethod
         def _parse_object_header(magic, f):
             """Parse a new style object, creating it but not reading the file."""
             num_type = (ord(magic[0:1]) >> 4) & 7
             obj_class = object_class(num_type)
             if not obj_class:
                 raise ObjectFormatException("Not a known type %d" % num_type)
             return obj_class()
         def _parse_object(self, map):
             """Parse a new style object, setting self._text."""
             # skip type and size; type must have already been determined, and
             # we trust zlib to fail if it's otherwise corrupted
             byte = ord(map[0:1])
             used = 1
             while (byte & 0x80) != 0:
                 byte = ord(map[used:used+1])
                 used += 1
             raw = map[used:]
             self.set_raw_string(_decompress(raw))
         @classmethod
         def _is_legacy_object(cls, magic):
             b0 = ord(magic[0:1])
             b1 = ord(magic[1:2])
             word = (b0 << 8) + b1
             return (b0 & 0x8F) == 0x08 and (word % 31) == 0
         @classmethod
         def _parse_file(cls, f):
             map = f.read()
             if cls._is_legacy_object(map):
                 obj = cls._parse_legacy_object_header(map, f)
                 obj._parse_legacy_object(map)
             else:
                 obj = cls._parse_object_header(map, f)
                 obj._parse_object(map)
             return obj
         def __init__(self):
             """Don't call this directly"""
             self._sha = None
             self._chunked_text = []
             self._needs_serialization = True
         def _deserialize(self, chunks):
             raise NotImplementedError(self._deserialize)
         def _serialize(self):
             raise NotImplementedError(self._serialize)
         @classmethod
         def from_path(cls, path):
             """Open a SHA file from disk."""
             with GitFile(path, 'rb') as f:
                 return cls.from_file(f)
         @classmethod
         def from_file(cls, f):
             """Get the contents of a SHA file on disk."""
             try:
                 obj = cls._parse_file(f)
                 obj._sha = None
                 return obj
             except (IndexError, ValueError):
                 raise ObjectFormatException("invalid object header")
         @staticmethod
         def from_raw_string(type_num, string, sha=None):
             """Creates an object of the indicated type from the raw string given.
             :param type_num: The numeric type of the object.
             :param string: The raw uncompressed contents.
             :param sha: Optional known sha for the object
             """
             obj = object_class(type_num)()
             obj.set_raw_string(string, sha)
             return obj
         @staticmethod
         def from_raw_chunks(type_num, chunks, sha=None):
             """Creates an object of the indicated type from the raw chunks given.
             :param type_num: The numeric type of the object.
             :param chunks: An iterable of the raw uncompressed contents.
             :param sha: Optional known sha for the object
             """
             obj = object_class(type_num)()
             obj.set_raw_chunks(chunks, sha)
             return obj
         @classmethod
         def from_string(cls, string):
             """Create a ShaFile from a string."""
             obj = cls()
             obj.set_raw_string(string)
             return obj
         def _check_has_member(self, member, error_msg):
             """Check that the object has a given member variable.
             :param member: the member variable to check for
             :param error_msg: the message for an error if the member is missing
             :raise ObjectFormatException: with the given error_msg if member is
                 missing or is None
             """
             if getattr(self, member, None) is None:
                 raise ObjectFormatException(error_msg)
         def check(self):
             """Check this object for internal consistency.
             :raise ObjectFormatException: if the object is malformed in some way
             :raise ChecksumMismatch: if the object was created with a SHA that does
                 not match its contents
             """
             # TODO: if we find that error-checking during object parsing is a
             # performance bottleneck, those checks should be moved to the class's
             # check() method during optimization so we can still check the object
             # when necessary.
             old_sha = self.id
             try:
                 self._deserialize(self.as_raw_chunks())
                 self._sha = None
                 new_sha = self.id
             except Exception as e:
                 raise ObjectFormatException(e)
             if old_sha != new_sha:
                 raise ChecksumMismatch(new_sha, old_sha)
         def _header(self):
             return object_header(self.type, self.raw_length())
         def raw_length(self):
             """Returns the length of the raw string of this object."""
             ret = 0
             for chunk in self.as_raw_chunks():
                 ret += len(chunk)
             return ret
         def sha(self):
             """The SHA1 object that is the name of this object."""
             if self._sha is None or self._needs_serialization:
                 # this is a local because as_raw_chunks() overwrites self._sha
                 new_sha = sha1()
                 new_sha.update(self._header())
                 for chunk in self.as_raw_chunks():
                     new_sha.update(chunk)
                 self._sha = new_sha
             return self._sha
         def copy(self):
             """Create a new copy of this SHA1 object from its raw string"""
             obj_class = object_class(self.get_type())
             return obj_class.from_raw_string(
                 self.get_type(),
                 self.as_raw_string(),
                 self.id)
         @property
         def id(self):
             """The hex SHA of this object."""
             return self.sha().hexdigest().encode('ascii')
         def get_type(self):
             """Return the type number for this object class."""
             return self.type_num
         def set_type(self, type):
             """Set the type number for this object class."""
             self.type_num = type
         # DEPRECATED: use type_num or type_name as needed.
         type = property(get_type, set_type)
         def __repr__(self):
             return "<%s %s>" % (self.__class__.__name__, self.id)
         def __ne__(self, other):
             return not isinstance(other, ShaFile) or self.id != other.id
         def __eq__(self, other):
             """Return True if the SHAs of the two objects match.
             It doesn't make sense to talk about an order on ShaFiles, so we don't
             override the rich comparison methods (__le__, etc.).
             """
             return isinstance(other, ShaFile) and self.id == other.id
         def __lt__(self, other):
             if not isinstance(other, ShaFile):
                 raise TypeError
             return self.id < other.id
         def __le__(self, other):
             if not isinstance(other, ShaFile):

... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff

Application: gvSIG desktop » gvSIG scripting

Revision 959