gvsig-scripting / org.gvsig.scripting / trunk / org.gvsig.scripting / org.gvsig.scripting.app / org.gvsig.scripting.app.mainplugin / src / main / resources-plugin / scripting / lib / dulwich / lru_cache.py @ 959
History | View | Annotate | Download (14.1 KB)
1 |
# lru_cache.py -- Simple LRU cache for dulwich
|
---|---|
2 |
# Copyright (C) 2006, 2008 Canonical Ltd
|
3 |
#
|
4 |
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
|
5 |
# General Public License as public by the Free Software Foundation; version 2.0
|
6 |
# or (at your option) any later version. You can redistribute it and/or
|
7 |
# modify it under the terms of either of these two licenses.
|
8 |
#
|
9 |
# Unless required by applicable law or agreed to in writing, software
|
10 |
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
# See the License for the specific language governing permissions and
|
13 |
# limitations under the License.
|
14 |
#
|
15 |
# You should have received a copy of the licenses; if not, see
|
16 |
# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
|
17 |
# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
|
18 |
# License, Version 2.0.
|
19 |
#
|
20 |
|
21 |
"""A simple least-recently-used (LRU) cache."""
|
22 |
|
23 |
_null_key = object()
|
24 |
|
25 |
|
26 |
class _LRUNode(object): |
27 |
"""This maintains the linked-list which is the lru internals."""
|
28 |
|
29 |
__slots__ = ('prev', 'next_key', 'key', 'value', 'cleanup', 'size') |
30 |
|
31 |
def __init__(self, key, value, cleanup=None): |
32 |
self.prev = None |
33 |
self.next_key = _null_key
|
34 |
self.key = key
|
35 |
self.value = value
|
36 |
self.cleanup = cleanup
|
37 |
# TODO: We could compute this 'on-the-fly' like we used to, and remove
|
38 |
# one pointer from this object, we just need to decide if it
|
39 |
# actually costs us much of anything in normal usage
|
40 |
self.size = None |
41 |
|
42 |
def __repr__(self): |
43 |
if self.prev is None: |
44 |
prev_key = None
|
45 |
else:
|
46 |
prev_key = self.prev.key
|
47 |
return '%s(%r n:%r p:%r)' % (self.__class__.__name__, self.key, |
48 |
self.next_key, prev_key)
|
49 |
|
50 |
def run_cleanup(self): |
51 |
if self.cleanup is not None: |
52 |
self.cleanup(self.key, self.value) |
53 |
self.cleanup = None |
54 |
# Just make sure to break any refcycles, etc
|
55 |
self.value = None |
56 |
|
57 |
|
58 |
class LRUCache(object): |
59 |
"""A class which manages a cache of entries, removing unused ones."""
|
60 |
|
61 |
def __init__(self, max_cache=100, after_cleanup_count=None): |
62 |
self._cache = {}
|
63 |
# The "HEAD" of the lru linked list
|
64 |
self._most_recently_used = None |
65 |
# The "TAIL" of the lru linked list
|
66 |
self._least_recently_used = None |
67 |
self._update_max_cache(max_cache, after_cleanup_count)
|
68 |
|
69 |
def __contains__(self, key): |
70 |
return key in self._cache |
71 |
|
72 |
def __getitem__(self, key): |
73 |
cache = self._cache
|
74 |
node = cache[key] |
75 |
# Inlined from _record_access to decrease the overhead of __getitem__
|
76 |
# We also have more knowledge about structure if __getitem__ is
|
77 |
# succeeding, then we know that self._most_recently_used must not be
|
78 |
# None, etc.
|
79 |
mru = self._most_recently_used
|
80 |
if node is mru: |
81 |
# Nothing to do, this node is already at the head of the queue
|
82 |
return node.value
|
83 |
# Remove this node from the old location
|
84 |
node_prev = node.prev |
85 |
next_key = node.next_key |
86 |
# benchmarking shows that the lookup of _null_key in globals is faster
|
87 |
# than the attribute lookup for (node is self._least_recently_used)
|
88 |
if next_key is _null_key: |
89 |
# 'node' is the _least_recently_used, because it doesn't have a
|
90 |
# 'next' item. So move the current lru to the previous node.
|
91 |
self._least_recently_used = node_prev
|
92 |
else:
|
93 |
node_next = cache[next_key] |
94 |
node_next.prev = node_prev |
95 |
node_prev.next_key = next_key |
96 |
# Insert this node at the front of the list
|
97 |
node.next_key = mru.key |
98 |
mru.prev = node |
99 |
self._most_recently_used = node
|
100 |
node.prev = None
|
101 |
return node.value
|
102 |
|
103 |
def __len__(self): |
104 |
return len(self._cache) |
105 |
|
106 |
def _walk_lru(self): |
107 |
"""Walk the LRU list, only meant to be used in tests."""
|
108 |
node = self._most_recently_used
|
109 |
if node is not None: |
110 |
if node.prev is not None: |
111 |
raise AssertionError('the _most_recently_used entry is not' |
112 |
' supposed to have a previous entry'
|
113 |
' %s' % (node,))
|
114 |
while node is not None: |
115 |
if node.next_key is _null_key: |
116 |
if node is not self._least_recently_used: |
117 |
raise AssertionError('only the last node should have' |
118 |
' no next value: %s' % (node,))
|
119 |
node_next = None
|
120 |
else:
|
121 |
node_next = self._cache[node.next_key]
|
122 |
if node_next.prev is not node: |
123 |
raise AssertionError('inconsistency found, node.next.prev' |
124 |
' != node: %s' % (node,))
|
125 |
if node.prev is None: |
126 |
if node is not self._most_recently_used: |
127 |
raise AssertionError('only the _most_recently_used should' |
128 |
' not have a previous node: %s'
|
129 |
% (node,)) |
130 |
else:
|
131 |
if node.prev.next_key != node.key:
|
132 |
raise AssertionError('inconsistency found, node.prev.next' |
133 |
' != node: %s' % (node,))
|
134 |
yield node
|
135 |
node = node_next |
136 |
|
137 |
def add(self, key, value, cleanup=None): |
138 |
"""Add a new value to the cache.
|
139 |
|
140 |
Also, if the entry is ever removed from the cache, call
|
141 |
cleanup(key, value).
|
142 |
|
143 |
:param key: The key to store it under
|
144 |
:param value: The object to store
|
145 |
:param cleanup: None or a function taking (key, value) to indicate
|
146 |
'value' should be cleaned up.
|
147 |
"""
|
148 |
if key is _null_key: |
149 |
raise ValueError('cannot use _null_key as a key') |
150 |
if key in self._cache: |
151 |
node = self._cache[key]
|
152 |
node.run_cleanup() |
153 |
node.value = value |
154 |
node.cleanup = cleanup |
155 |
else:
|
156 |
node = _LRUNode(key, value, cleanup=cleanup) |
157 |
self._cache[key] = node
|
158 |
self._record_access(node)
|
159 |
|
160 |
if len(self._cache) > self._max_cache: |
161 |
# Trigger the cleanup
|
162 |
self.cleanup()
|
163 |
|
164 |
def cache_size(self): |
165 |
"""Get the number of entries we will cache."""
|
166 |
return self._max_cache |
167 |
|
168 |
def get(self, key, default=None): |
169 |
node = self._cache.get(key, None) |
170 |
if node is None: |
171 |
return default
|
172 |
self._record_access(node)
|
173 |
return node.value
|
174 |
|
175 |
def keys(self): |
176 |
"""Get the list of keys currently cached.
|
177 |
|
178 |
Note that values returned here may not be available by the time you
|
179 |
request them later. This is simply meant as a peak into the current
|
180 |
state.
|
181 |
|
182 |
:return: An unordered list of keys that are currently cached.
|
183 |
"""
|
184 |
return self._cache.keys() |
185 |
|
186 |
def items(self): |
187 |
"""Get the key:value pairs as a dict."""
|
188 |
return dict((k, n.value) for k, n in self._cache.items()) |
189 |
|
190 |
def cleanup(self): |
191 |
"""Clear the cache until it shrinks to the requested size.
|
192 |
|
193 |
This does not completely wipe the cache, just makes sure it is under
|
194 |
the after_cleanup_count.
|
195 |
"""
|
196 |
# Make sure the cache is shrunk to the correct size
|
197 |
while len(self._cache) > self._after_cleanup_count: |
198 |
self._remove_lru()
|
199 |
|
200 |
def __setitem__(self, key, value): |
201 |
"""Add a value to the cache, there will be no cleanup function."""
|
202 |
self.add(key, value, cleanup=None) |
203 |
|
204 |
def _record_access(self, node): |
205 |
"""Record that key was accessed."""
|
206 |
# Move 'node' to the front of the queue
|
207 |
if self._most_recently_used is None: |
208 |
self._most_recently_used = node
|
209 |
self._least_recently_used = node
|
210 |
return
|
211 |
elif node is self._most_recently_used: |
212 |
# Nothing to do, this node is already at the head of the queue
|
213 |
return
|
214 |
# We've taken care of the tail pointer, remove the node, and insert it
|
215 |
# at the front
|
216 |
# REMOVE
|
217 |
if node is self._least_recently_used: |
218 |
self._least_recently_used = node.prev
|
219 |
if node.prev is not None: |
220 |
node.prev.next_key = node.next_key |
221 |
if node.next_key is not _null_key: |
222 |
node_next = self._cache[node.next_key]
|
223 |
node_next.prev = node.prev |
224 |
# INSERT
|
225 |
node.next_key = self._most_recently_used.key
|
226 |
self._most_recently_used.prev = node
|
227 |
self._most_recently_used = node
|
228 |
node.prev = None
|
229 |
|
230 |
def _remove_node(self, node): |
231 |
if node is self._least_recently_used: |
232 |
self._least_recently_used = node.prev
|
233 |
self._cache.pop(node.key)
|
234 |
# If we have removed all entries, remove the head pointer as well
|
235 |
if self._least_recently_used is None: |
236 |
self._most_recently_used = None |
237 |
node.run_cleanup() |
238 |
# Now remove this node from the linked list
|
239 |
if node.prev is not None: |
240 |
node.prev.next_key = node.next_key |
241 |
if node.next_key is not _null_key: |
242 |
node_next = self._cache[node.next_key]
|
243 |
node_next.prev = node.prev |
244 |
# And remove this node's pointers
|
245 |
node.prev = None
|
246 |
node.next_key = _null_key |
247 |
|
248 |
def _remove_lru(self): |
249 |
"""Remove one entry from the lru, and handle consequences.
|
250 |
|
251 |
If there are no more references to the lru, then this entry should be
|
252 |
removed from the cache.
|
253 |
"""
|
254 |
self._remove_node(self._least_recently_used) |
255 |
|
256 |
def clear(self): |
257 |
"""Clear out all of the cache."""
|
258 |
# Clean up in LRU order
|
259 |
while self._cache: |
260 |
self._remove_lru()
|
261 |
|
262 |
def resize(self, max_cache, after_cleanup_count=None): |
263 |
"""Change the number of entries that will be cached."""
|
264 |
self._update_max_cache(max_cache,
|
265 |
after_cleanup_count=after_cleanup_count) |
266 |
|
267 |
def _update_max_cache(self, max_cache, after_cleanup_count=None): |
268 |
self._max_cache = max_cache
|
269 |
if after_cleanup_count is None: |
270 |
self._after_cleanup_count = self._max_cache * 8 / 10 |
271 |
else:
|
272 |
self._after_cleanup_count = min(after_cleanup_count, |
273 |
self._max_cache)
|
274 |
self.cleanup()
|
275 |
|
276 |
|
277 |
class LRUSizeCache(LRUCache): |
278 |
"""An LRUCache that removes things based on the size of the values.
|
279 |
|
280 |
This differs in that it doesn't care how many actual items there are,
|
281 |
it just restricts the cache to be cleaned up after so much data is stored.
|
282 |
|
283 |
The size of items added will be computed using compute_size(value), which
|
284 |
defaults to len() if not supplied.
|
285 |
"""
|
286 |
|
287 |
def __init__(self, max_size=1024*1024, after_cleanup_size=None, |
288 |
compute_size=None):
|
289 |
"""Create a new LRUSizeCache.
|
290 |
|
291 |
:param max_size: The max number of bytes to store before we start
|
292 |
clearing out entries.
|
293 |
:param after_cleanup_size: After cleaning up, shrink everything to this
|
294 |
size.
|
295 |
:param compute_size: A function to compute the size of the values. We
|
296 |
use a function here, so that you can pass 'len' if you are just
|
297 |
using simple strings, or a more complex function if you are using
|
298 |
something like a list of strings, or even a custom object.
|
299 |
The function should take the form "compute_size(value) => integer".
|
300 |
If not supplied, it defaults to 'len()'
|
301 |
"""
|
302 |
self._value_size = 0 |
303 |
self._compute_size = compute_size
|
304 |
if compute_size is None: |
305 |
self._compute_size = len |
306 |
self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
|
307 |
LRUCache.__init__(self, max_cache=max(int(max_size/512), 1)) |
308 |
|
309 |
def add(self, key, value, cleanup=None): |
310 |
"""Add a new value to the cache.
|
311 |
|
312 |
Also, if the entry is ever removed from the cache, call
|
313 |
cleanup(key, value).
|
314 |
|
315 |
:param key: The key to store it under
|
316 |
:param value: The object to store
|
317 |
:param cleanup: None or a function taking (key, value) to indicate
|
318 |
'value' should be cleaned up.
|
319 |
"""
|
320 |
if key is _null_key: |
321 |
raise ValueError('cannot use _null_key as a key') |
322 |
node = self._cache.get(key, None) |
323 |
value_len = self._compute_size(value)
|
324 |
if value_len >= self._after_cleanup_size: |
325 |
# The new value is 'too big to fit', as it would fill up/overflow
|
326 |
# the cache all by itself
|
327 |
if node is not None: |
328 |
# We won't be replacing the old node, so just remove it
|
329 |
self._remove_node(node)
|
330 |
if cleanup is not None: |
331 |
cleanup(key, value) |
332 |
return
|
333 |
if node is None: |
334 |
node = _LRUNode(key, value, cleanup=cleanup) |
335 |
self._cache[key] = node
|
336 |
else:
|
337 |
self._value_size -= node.size
|
338 |
node.size = value_len |
339 |
self._value_size += value_len
|
340 |
self._record_access(node)
|
341 |
|
342 |
if self._value_size > self._max_size: |
343 |
# Time to cleanup
|
344 |
self.cleanup()
|
345 |
|
346 |
def cleanup(self): |
347 |
"""Clear the cache until it shrinks to the requested size.
|
348 |
|
349 |
This does not completely wipe the cache, just makes sure it is under
|
350 |
the after_cleanup_size.
|
351 |
"""
|
352 |
# Make sure the cache is shrunk to the correct size
|
353 |
while self._value_size > self._after_cleanup_size: |
354 |
self._remove_lru()
|
355 |
|
356 |
def _remove_node(self, node): |
357 |
self._value_size -= node.size
|
358 |
LRUCache._remove_node(self, node)
|
359 |
|
360 |
def resize(self, max_size, after_cleanup_size=None): |
361 |
"""Change the number of bytes that will be cached."""
|
362 |
self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
|
363 |
max_cache = max(int(max_size/512), 1) |
364 |
self._update_max_cache(max_cache)
|
365 |
|
366 |
def _update_max_size(self, max_size, after_cleanup_size=None): |
367 |
self._max_size = max_size
|
368 |
if after_cleanup_size is None: |
369 |
self._after_cleanup_size = self._max_size * 8 // 10 |
370 |
else:
|
371 |
self._after_cleanup_size = min(after_cleanup_size, self._max_size) |