Skip to content

Instantly share code, notes, and snippets.

@kmsquire
Created August 23, 2012 23:14
Show Gist options
  • Save kmsquire/3443260 to your computer and use it in GitHub Desktop.
Save kmsquire/3443260 to your computer and use it in GitHub Desktop.
Julia LRU with ranked cache item
This was an experiment in trying to improve the performance of julia's
LRU cache datatype. One bottleneck in the current design is the
search for an item in the vector-based queue. By associating a rank
with each CacheItem that is updated every time the item is accessed,
the queue becomes sorted, so that the search for an item changes from
O(N) to O(log N). Unfortunately, this really only has a measurable
effect when the number of items gets large ( > 5,000-10,000 items),
at which point a doubly-linked list version should be used.
Timing comparison (left = original, right= ranked)
LRU (original) LRU (ranked)
UnboundedLRU(), 50 items UnboundedLRU(), 50 items
Assignment: Assignment:
elapsed time: 0.08705282211303711 seconds elapsed time: 0.0904841423034668 seconds
Lookup, random access: Lookup, random access:
elapsed time: 0.02248096466064453 seconds elapsed time: 0.029716014862060547 seconds
Random mixed workload: Random mixed workload:
elapsed time: 0.0009319782257080078 seconds elapsed time: 0.0009610652923583984 seconds
UnboundedLRU(), 500 items UnboundedLRU(), 500 items
Assignment: Assignment:
elapsed time: 0.0038208961486816406 seconds elapsed time: 0.003871917724609375 seconds
Lookup, random access: Lookup, random access:
elapsed time: 0.004545927047729492 seconds elapsed time: 0.004338979721069336 seconds
Random mixed workload: Random mixed workload:
elapsed time: 0.004336118698120117 seconds elapsed time: 0.004023075103759766 seconds
UnboundedLRU(), 5000 items UnboundedLRU(), 5000 items
Assignment: Assignment:
elapsed time: 0.047534942626953125 seconds elapsed time: 0.048055171966552734 seconds
Lookup, random access: Lookup, random access:
elapsed time: 0.18019700050354004 seconds elapsed time: 0.12550687789916992 seconds
Random mixed workload: Random mixed workload:
elapsed time: 0.11444902420043945 seconds elapsed time: 0.0713810920715332 seconds
UnboundedLRU(), 50000 items UnboundedLRU(), 50000 items
Assignment: Assignment:
elapsed time: 1.5170938968658447 seconds elapsed time: 0.8376049995422363 seconds
Lookup, random access: Lookup, random access:
elapsed time: 9.970831871032715 seconds elapsed time: 6.277849912643433 seconds
Random mixed workload: Random mixed workload:
elapsed time: 5.900052070617676 seconds elapsed time: 3.4924769401550293 seconds
BoundedLRU(100), 50 items BoundedLRU(100), 50 items
Assignment: Assignment:
elapsed time: 0.016585111618041992 seconds elapsed time: 0.018676042556762695 seconds
Lookup, random access: Lookup, random access:
elapsed time: 0.0048370361328125 seconds elapsed time: 0.00746607780456543 seconds
Random mixed workload: Random mixed workload:
elapsed time: 0.0008990764617919922 seconds elapsed time: 0.0009000301361083984 seconds
BoundedLRU(100), 500 items BoundedLRU(100), 500 items
Assignment: Assignment:
elapsed time: 0.018558979034423828 seconds elapsed time: 0.018524885177612305 seconds
Lookup, random access: Lookup, random access:
elapsed time: 0.0024650096893310547 seconds elapsed time: 0.0024650096893310547 seconds
Random mixed workload: Random mixed workload:
elapsed time: 0.0074939727783203125 seconds elapsed time: 0.007397890090942383 seconds
BoundedLRU(100), 5000 items BoundedLRU(100), 5000 items
Assignment: Assignment:
elapsed time: 0.12354588508605957 seconds elapsed time: 0.12208199501037598 seconds
Lookup, random access: Lookup, random access:
elapsed time: 0.022534847259521484 seconds elapsed time: 0.02232503890991211 seconds
Random mixed workload: Random mixed workload:
elapsed time: 0.07372307777404785 seconds elapsed time: 0.07299304008483887 seconds
BoundedLRU(100), 50000 items BoundedLRU(100), 50000 items
Assignment: Assignment:
elapsed time: 1.3016860485076904 seconds elapsed time: 1.2879738807678223 seconds
Lookup, random access: Lookup, random access:
elapsed time: 0.28099608421325684 seconds elapsed time: 0.34238314628601074 seconds
Random mixed workload: Random mixed workload:
elapsed time: 0.8608372211456299 seconds elapsed time: 0.8118419647216797 seconds
BoundedLRU(1000), 50 items BoundedLRU(1000), 50 items
Assignment: Assignment:
elapsed time: 0.002232074737548828 seconds elapsed time: 0.0025310516357421875 seconds
Lookup, random access: Lookup, random access:
elapsed time: 0.0003650188446044922 seconds elapsed time: 0.000370025634765625 seconds
Random mixed workload: Random mixed workload:
elapsed time: 0.0008628368377685547 seconds elapsed time: 0.0008938312530517578 seconds
BoundedLRU(1000), 500 items BoundedLRU(1000), 500 items
Assignment: Assignment:
elapsed time: 0.013031959533691406 seconds elapsed time: 0.013067007064819336 seconds
Lookup, random access: Lookup, random access:
elapsed time: 0.0045490264892578125 seconds elapsed time: 0.004341840744018555 seconds
Random mixed workload: Random mixed workload:
elapsed time: 0.00899505615234375 seconds elapsed time: 0.008810997009277344 seconds
BoundedLRU(1000), 5000 items BoundedLRU(1000), 5000 items
Assignment: Assignment:
elapsed time: 0.13265490531921387 seconds elapsed time: 0.13269495964050293 seconds
Lookup, random access: Lookup, random access:
elapsed time: 0.03271818161010742 seconds elapsed time: 0.03151297569274902 seconds
Random mixed workload: Random mixed workload:
elapsed time: 0.07929801940917969 seconds elapsed time: 0.07927393913269043 seconds
BoundedLRU(1000), 50000 items BoundedLRU(1000), 50000 items
Assignment: Assignment:
elapsed time: 1.3893730640411377 seconds elapsed time: 1.3932721614837646 seconds
Lookup, random access: Lookup, random access:
elapsed time: 0.40679407119750977 seconds elapsed time: 0.41121602058410645 seconds
Random mixed workload: Random mixed workload:
elapsed time: 0.864138126373291 seconds elapsed time: 0.8650519847869873 seconds
BoundedLRU(10000), 50 items BoundedLRU(10000), 50 items
Assignment: Assignment:
elapsed time: 0.0028390884399414062 seconds elapsed time: 0.0037109851837158203 seconds
Lookup, random access: Lookup, random access:
elapsed time: 0.00036597251892089844 seconds elapsed time: 0.00036597251892089844 seconds
Random mixed workload: Random mixed workload:
elapsed time: 0.0009210109710693359 seconds elapsed time: 0.0008800029754638672 seconds
BoundedLRU(10000), 500 items BoundedLRU(10000), 500 items
Assignment: Assignment:
elapsed time: 0.013064861297607422 seconds elapsed time: 0.012997865676879883 seconds
Lookup, random access: Lookup, random access:
elapsed time: 0.00463414192199707 seconds elapsed time: 0.004346132278442383 seconds
Random mixed workload: Random mixed workload:
elapsed time: 0.009072065353393555 seconds elapsed time: 0.008729934692382812 seconds
BoundedLRU(10000), 5000 items BoundedLRU(10000), 5000 items
Assignment: Assignment:
elapsed time: 0.14038586616516113 seconds elapsed time: 0.14023590087890625 seconds
Lookup, random access: Lookup, random access:
elapsed time: 0.1850268840789795 seconds elapsed time: 0.12741684913635254 seconds
Random mixed workload: Random mixed workload:
elapsed time: 0.16317105293273926 seconds elapsed time: 0.1193549633026123 seconds
BoundedLRU(10000), 50000 items BoundedLRU(10000), 50000 items
Assignment: Assignment:
elapsed time: 1.5147151947021484 seconds elapsed time: 1.514051914215088 seconds
Lookup, random access: Lookup, random access:
elapsed time: 1.1219160556793213 seconds elapsed time: 0.8908340930938721 seconds
Random mixed workload: Random mixed workload:
elapsed time: 0.9878139495849609 seconds elapsed time: 1.0012459754943848 seconds
# An LRU (Least Recently Used) cache is an associative data structure which
# maintains its contents in an order such that the most recently used item
# is at the beginning of the structure, and the least recently used at the end.
#
# This file specifies two types of LRU caches, both with and without a size
# limit. BoundedLRU has a limit and evicts the LRU item if a new item is added
# after that bound is reached. UnboundedLRU does not have a maximum size, but
# can be used as a basis for more complex LRUs.
#
# LRUs should follow the interfaces for general collections, indexable
# collections, and associative collections.
# The standard implementation of an LRU backs a hash table with a doubly-linked
# list for O(1) operations when reordering on access and eviction. The Julia
# implementation instead backs the table with a Vector. For moderately-sized
# collections, the difference in performance is small, and this implmentation
# is simpler and easier to understand.
# This version is updated slightly to add a rank to each CacheItem, which is
# updated whenever that item is moved to the front of the cache. This maintains
# the queue as a sorted list, which allows sorted_search to be used and changes
# the order of this step from O(N) to O(log N).
abstract LRU{K,V} <: Associative{K,V}
# Default cache size
const __MAXCACHE = 1024
type CacheItem{K,V}
k::K
v::V
rank::Int64
end
isless(a::CacheItem, b::CacheItem) = a.rank > b.rank # Decending order
type UnboundedLRU{K,V} <: LRU{K,V}
ht::Dict
q::Vector{CacheItem}
access_count::Int64
UnboundedLRU() = new(Dict(), similar(Array(CacheItem,1), 0), 0)
end
UnboundedLRU() = UnboundedLRU{Any, Any}()
type BoundedLRU{K,V} <: LRU{K,V}
ht::Dict
q::Vector{CacheItem}
maxsize::Int
access_count::Int64
BoundedLRU(m) = new(Dict(), similar(Array(CacheItem,1), 0), m, 0)
BoundedLRU() = BoundedLRU(__MAXCACHE)
end
BoundedLRU(m) = BoundedLRU{Any, Any}(m)
BoundedLRU() = BoundedLRU{Any, Any}()
## collections ##
isempty(lru::LRU) = isempty(lru.q)
numel(lru::LRU) = numel(lru.q)
length(lru::LRU) = length(lru.q)
has(lru::LRU, key) = has(lru.ht, key)
## associative ##
# Should this check count as an access?
has(lru::LRU, key) = has(lru.ht, key)
get(lru::LRU, key, default) = has(lru, key) ? lru[key] : default
function del_all(lru::LRU)
del_all(lru.ht)
del_all(lru.q)
end
show(lru::BoundedLRU) = print("BoundedLRU($(lru.maxsize))")
## indexable ##
# Method to do the second, slow lookup in the list with early return.
# function locate(q, x)
# for i = 1:length(q)
# if q[i] == x
# return i
# end
# end
# error("Item not found.")
# end
locate(q, x) = search_sorted(q, x)
function ref(lru::LRU, key)
lru.access_count += 1
item = lru.ht[key]
idx = locate(lru.q, item)
del(lru.q, idx)
item.rank = lru.access_count
enqueue(lru.q, item)
item.v
end
function assign(lru::LRU, v, key)
lru.access_count += 1
if has(lru, key)
item = lru.ht[key]
idx = locate(lru.q, item)
item.v = v
item.rank = lru.access_count
del(lru.q, idx)
else
item = CacheItem(key, v, lru.access_count)
lru.ht[key] = item
end
enqueue(lru.q, item)
end
# Eviction
function assign{V,K}(lru::BoundedLRU, v::V, key::K)
invoke(assign, (LRU, V, K), lru, v, key)
nrm = length(lru) - lru.maxsize
for i in 1:nrm
rm = pop(lru.q)
del(lru.ht, rm.k)
end
end
## associative ##
function del(lru::LRU, key)
item = lru.ht[key]
idx = locate(lru.q, item)
del(lru.ht, key)
del(lru.q, idx)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment