Statistics
| Revision:

gvsig-scripting / org.gvsig.scripting / trunk / org.gvsig.scripting / org.gvsig.scripting.app / org.gvsig.scripting.app.mainplugin / src / main / resources-plugin / scripting / lib / dulwich / lru_cache.py @ 959

History | View | Annotate | Download (14.1 KB)

1
# lru_cache.py -- Simple LRU cache for dulwich
2
# Copyright (C) 2006, 2008 Canonical Ltd
3
#
4
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
5
# General Public License as public by the Free Software Foundation; version 2.0
6
# or (at your option) any later version. You can redistribute it and/or
7
# modify it under the terms of either of these two licenses.
8
#
9
# Unless required by applicable law or agreed to in writing, software
10
# distributed under the License is distributed on an "AS IS" BASIS,
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
14
#
15
# You should have received a copy of the licenses; if not, see
16
# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
17
# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
18
# License, Version 2.0.
19
#
20

    
21
"""A simple least-recently-used (LRU) cache."""
22

    
23
_null_key = object()
24

    
25

    
26
class _LRUNode(object):
27
    """This maintains the linked-list which is the lru internals."""
28

    
29
    __slots__ = ('prev', 'next_key', 'key', 'value', 'cleanup', 'size')
30

    
31
    def __init__(self, key, value, cleanup=None):
32
        self.prev = None
33
        self.next_key = _null_key
34
        self.key = key
35
        self.value = value
36
        self.cleanup = cleanup
37
        # TODO: We could compute this 'on-the-fly' like we used to, and remove
38
        #       one pointer from this object, we just need to decide if it
39
        #       actually costs us much of anything in normal usage
40
        self.size = None
41

    
42
    def __repr__(self):
43
        if self.prev is None:
44
            prev_key = None
45
        else:
46
            prev_key = self.prev.key
47
        return '%s(%r n:%r p:%r)' % (self.__class__.__name__, self.key,
48
                                     self.next_key, prev_key)
49

    
50
    def run_cleanup(self):
51
        if self.cleanup is not None:
52
            self.cleanup(self.key, self.value)
53
        self.cleanup = None
54
        # Just make sure to break any refcycles, etc
55
        self.value = None
56

    
57

    
58
class LRUCache(object):
59
    """A class which manages a cache of entries, removing unused ones."""
60

    
61
    def __init__(self, max_cache=100, after_cleanup_count=None):
62
        self._cache = {}
63
        # The "HEAD" of the lru linked list
64
        self._most_recently_used = None
65
        # The "TAIL" of the lru linked list
66
        self._least_recently_used = None
67
        self._update_max_cache(max_cache, after_cleanup_count)
68

    
69
    def __contains__(self, key):
70
        return key in self._cache
71

    
72
    def __getitem__(self, key):
73
        cache = self._cache
74
        node = cache[key]
75
        # Inlined from _record_access to decrease the overhead of __getitem__
76
        # We also have more knowledge about structure if __getitem__ is
77
        # succeeding, then we know that self._most_recently_used must not be
78
        # None, etc.
79
        mru = self._most_recently_used
80
        if node is mru:
81
            # Nothing to do, this node is already at the head of the queue
82
            return node.value
83
        # Remove this node from the old location
84
        node_prev = node.prev
85
        next_key = node.next_key
86
        # benchmarking shows that the lookup of _null_key in globals is faster
87
        # than the attribute lookup for (node is self._least_recently_used)
88
        if next_key is _null_key:
89
            # 'node' is the _least_recently_used, because it doesn't have a
90
            # 'next' item. So move the current lru to the previous node.
91
            self._least_recently_used = node_prev
92
        else:
93
            node_next = cache[next_key]
94
            node_next.prev = node_prev
95
        node_prev.next_key = next_key
96
        # Insert this node at the front of the list
97
        node.next_key = mru.key
98
        mru.prev = node
99
        self._most_recently_used = node
100
        node.prev = None
101
        return node.value
102

    
103
    def __len__(self):
104
        return len(self._cache)
105

    
106
    def _walk_lru(self):
107
        """Walk the LRU list, only meant to be used in tests."""
108
        node = self._most_recently_used
109
        if node is not None:
110
            if node.prev is not None:
111
                raise AssertionError('the _most_recently_used entry is not'
112
                                     ' supposed to have a previous entry'
113
                                     ' %s' % (node,))
114
        while node is not None:
115
            if node.next_key is _null_key:
116
                if node is not self._least_recently_used:
117
                    raise AssertionError('only the last node should have'
118
                                         ' no next value: %s' % (node,))
119
                node_next = None
120
            else:
121
                node_next = self._cache[node.next_key]
122
                if node_next.prev is not node:
123
                    raise AssertionError('inconsistency found, node.next.prev'
124
                                         ' != node: %s' % (node,))
125
            if node.prev is None:
126
                if node is not self._most_recently_used:
127
                    raise AssertionError('only the _most_recently_used should'
128
                                         ' not have a previous node: %s'
129
                                         % (node,))
130
            else:
131
                if node.prev.next_key != node.key:
132
                    raise AssertionError('inconsistency found, node.prev.next'
133
                                         ' != node: %s' % (node,))
134
            yield node
135
            node = node_next
136

    
137
    def add(self, key, value, cleanup=None):
138
        """Add a new value to the cache.
139

140
        Also, if the entry is ever removed from the cache, call
141
        cleanup(key, value).
142

143
        :param key: The key to store it under
144
        :param value: The object to store
145
        :param cleanup: None or a function taking (key, value) to indicate
146
                        'value' should be cleaned up.
147
        """
148
        if key is _null_key:
149
            raise ValueError('cannot use _null_key as a key')
150
        if key in self._cache:
151
            node = self._cache[key]
152
            node.run_cleanup()
153
            node.value = value
154
            node.cleanup = cleanup
155
        else:
156
            node = _LRUNode(key, value, cleanup=cleanup)
157
            self._cache[key] = node
158
        self._record_access(node)
159

    
160
        if len(self._cache) > self._max_cache:
161
            # Trigger the cleanup
162
            self.cleanup()
163

    
164
    def cache_size(self):
165
        """Get the number of entries we will cache."""
166
        return self._max_cache
167

    
168
    def get(self, key, default=None):
169
        node = self._cache.get(key, None)
170
        if node is None:
171
            return default
172
        self._record_access(node)
173
        return node.value
174

    
175
    def keys(self):
176
        """Get the list of keys currently cached.
177

178
        Note that values returned here may not be available by the time you
179
        request them later. This is simply meant as a peak into the current
180
        state.
181

182
        :return: An unordered list of keys that are currently cached.
183
        """
184
        return self._cache.keys()
185

    
186
    def items(self):
187
        """Get the key:value pairs as a dict."""
188
        return dict((k, n.value) for k, n in self._cache.items())
189

    
190
    def cleanup(self):
191
        """Clear the cache until it shrinks to the requested size.
192

193
        This does not completely wipe the cache, just makes sure it is under
194
        the after_cleanup_count.
195
        """
196
        # Make sure the cache is shrunk to the correct size
197
        while len(self._cache) > self._after_cleanup_count:
198
            self._remove_lru()
199

    
200
    def __setitem__(self, key, value):
201
        """Add a value to the cache, there will be no cleanup function."""
202
        self.add(key, value, cleanup=None)
203

    
204
    def _record_access(self, node):
205
        """Record that key was accessed."""
206
        # Move 'node' to the front of the queue
207
        if self._most_recently_used is None:
208
            self._most_recently_used = node
209
            self._least_recently_used = node
210
            return
211
        elif node is self._most_recently_used:
212
            # Nothing to do, this node is already at the head of the queue
213
            return
214
        # We've taken care of the tail pointer, remove the node, and insert it
215
        # at the front
216
        # REMOVE
217
        if node is self._least_recently_used:
218
            self._least_recently_used = node.prev
219
        if node.prev is not None:
220
            node.prev.next_key = node.next_key
221
        if node.next_key is not _null_key:
222
            node_next = self._cache[node.next_key]
223
            node_next.prev = node.prev
224
        # INSERT
225
        node.next_key = self._most_recently_used.key
226
        self._most_recently_used.prev = node
227
        self._most_recently_used = node
228
        node.prev = None
229

    
230
    def _remove_node(self, node):
231
        if node is self._least_recently_used:
232
            self._least_recently_used = node.prev
233
        self._cache.pop(node.key)
234
        # If we have removed all entries, remove the head pointer as well
235
        if self._least_recently_used is None:
236
            self._most_recently_used = None
237
        node.run_cleanup()
238
        # Now remove this node from the linked list
239
        if node.prev is not None:
240
            node.prev.next_key = node.next_key
241
        if node.next_key is not _null_key:
242
            node_next = self._cache[node.next_key]
243
            node_next.prev = node.prev
244
        # And remove this node's pointers
245
        node.prev = None
246
        node.next_key = _null_key
247

    
248
    def _remove_lru(self):
249
        """Remove one entry from the lru, and handle consequences.
250

251
        If there are no more references to the lru, then this entry should be
252
        removed from the cache.
253
        """
254
        self._remove_node(self._least_recently_used)
255

    
256
    def clear(self):
257
        """Clear out all of the cache."""
258
        # Clean up in LRU order
259
        while self._cache:
260
            self._remove_lru()
261

    
262
    def resize(self, max_cache, after_cleanup_count=None):
263
        """Change the number of entries that will be cached."""
264
        self._update_max_cache(max_cache,
265
                               after_cleanup_count=after_cleanup_count)
266

    
267
    def _update_max_cache(self, max_cache, after_cleanup_count=None):
268
        self._max_cache = max_cache
269
        if after_cleanup_count is None:
270
            self._after_cleanup_count = self._max_cache * 8 / 10
271
        else:
272
            self._after_cleanup_count = min(after_cleanup_count,
273
                                            self._max_cache)
274
        self.cleanup()
275

    
276

    
277
class LRUSizeCache(LRUCache):
278
    """An LRUCache that removes things based on the size of the values.
279

280
    This differs in that it doesn't care how many actual items there are,
281
    it just restricts the cache to be cleaned up after so much data is stored.
282

283
    The size of items added will be computed using compute_size(value), which
284
    defaults to len() if not supplied.
285
    """
286

    
287
    def __init__(self, max_size=1024*1024, after_cleanup_size=None,
288
                 compute_size=None):
289
        """Create a new LRUSizeCache.
290

291
        :param max_size: The max number of bytes to store before we start
292
            clearing out entries.
293
        :param after_cleanup_size: After cleaning up, shrink everything to this
294
            size.
295
        :param compute_size: A function to compute the size of the values. We
296
            use a function here, so that you can pass 'len' if you are just
297
            using simple strings, or a more complex function if you are using
298
            something like a list of strings, or even a custom object.
299
            The function should take the form "compute_size(value) => integer".
300
            If not supplied, it defaults to 'len()'
301
        """
302
        self._value_size = 0
303
        self._compute_size = compute_size
304
        if compute_size is None:
305
            self._compute_size = len
306
        self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
307
        LRUCache.__init__(self, max_cache=max(int(max_size/512), 1))
308

    
309
    def add(self, key, value, cleanup=None):
310
        """Add a new value to the cache.
311

312
        Also, if the entry is ever removed from the cache, call
313
        cleanup(key, value).
314

315
        :param key: The key to store it under
316
        :param value: The object to store
317
        :param cleanup: None or a function taking (key, value) to indicate
318
                        'value' should be cleaned up.
319
        """
320
        if key is _null_key:
321
            raise ValueError('cannot use _null_key as a key')
322
        node = self._cache.get(key, None)
323
        value_len = self._compute_size(value)
324
        if value_len >= self._after_cleanup_size:
325
            # The new value is 'too big to fit', as it would fill up/overflow
326
            # the cache all by itself
327
            if node is not None:
328
                # We won't be replacing the old node, so just remove it
329
                self._remove_node(node)
330
            if cleanup is not None:
331
                cleanup(key, value)
332
            return
333
        if node is None:
334
            node = _LRUNode(key, value, cleanup=cleanup)
335
            self._cache[key] = node
336
        else:
337
            self._value_size -= node.size
338
        node.size = value_len
339
        self._value_size += value_len
340
        self._record_access(node)
341

    
342
        if self._value_size > self._max_size:
343
            # Time to cleanup
344
            self.cleanup()
345

    
346
    def cleanup(self):
347
        """Clear the cache until it shrinks to the requested size.
348

349
        This does not completely wipe the cache, just makes sure it is under
350
        the after_cleanup_size.
351
        """
352
        # Make sure the cache is shrunk to the correct size
353
        while self._value_size > self._after_cleanup_size:
354
            self._remove_lru()
355

    
356
    def _remove_node(self, node):
357
        self._value_size -= node.size
358
        LRUCache._remove_node(self, node)
359

    
360
    def resize(self, max_size, after_cleanup_size=None):
361
        """Change the number of bytes that will be cached."""
362
        self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
363
        max_cache = max(int(max_size/512), 1)
364
        self._update_max_cache(max_cache)
365

    
366
    def _update_max_size(self, max_size, after_cleanup_size=None):
367
        self._max_size = max_size
368
        if after_cleanup_size is None:
369
            self._after_cleanup_size = self._max_size * 8 // 10
370
        else:
371
            self._after_cleanup_size = min(after_cleanup_size, self._max_size)