Last active
August 29, 2015 14:20
-
-
Save boydgreenfield/9c742c3f9c0ca6eff31c to your computer and use it in GitHub Desktop.
CountTable broken in Nim v0.11
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# | |
# Nim's Runtime Library | |
# (c) Copyright 2015 Andreas Rumpf | |
# | |
# See the file "copying.txt", included in this | |
# distribution, for details about the copyright. | |
# | |
## The ``tables`` module implements variants of an efficient `hash table`:idx: | |
## (also often named `dictionary`:idx: in other programming languages) that is | |
## a mapping from keys to values. ``Table`` is the usual hash table, | |
## ``OrderedTable`` is like ``Table`` but remembers insertion order | |
## and ``CountTable`` is a mapping from a key to its number of occurrences. | |
## For consistency with every other data type in Nim these have **value** | |
## semantics, this means that ``=`` performs a copy of the hash table. | |
## For **reference** semantics use the ``Ref`` variant: ``TableRef``, | |
## ``OrderedTableRef``, ``CountTableRef``. | |
## | |
## If you are using simple standard types like ``int`` or ``string`` for the | |
## keys of the table you won't have any problems, but as soon as you try to use | |
## a more complex object as a key you will be greeted by a strange compiler | |
## error:: | |
## | |
## Error: type mismatch: got (Person) | |
## but expected one of: | |
## hashes.hash(x: openarray[A]): THash | |
## hashes.hash(x: int): THash | |
## hashes.hash(x: float): THash | |
## … | |
## | |
## What is happening here is that the types used for table keys require to have | |
## a ``hash()`` proc which will convert them to a `THash <hashes.html#THash>`_ | |
## value, and the compiler is listing all the hash functions it knows. | |
## Additionally there has to be a ``==`` operator that provides the same | |
## semantics as its corresponding ``hash`` proc. | |
## | |
## After you add ``hash`` and ``==`` for your custom type everything will work. | |
## Currently however ``hash`` for objects is not defined, whereas | |
## ``system.==`` for objects does exist and performs a "deep" comparison (every | |
## field is compared) which is usually what you want. So in the following | |
## example implementing only ``hash`` suffices: | |
## | |
## .. code-block:: | |
## type | |
## Person = object | |
## firstName, lastName: string | |
## | |
## proc hash(x: Person): THash = | |
## ## Piggyback on the already available string hash proc. | |
## ## | |
## ## Without this proc nothing works! | |
## result = x.firstName.hash !& x.lastName.hash | |
## result = !$result | |
## | |
## var | |
## salaries = initTable[Person, int]() | |
## p1, p2: Person | |
## | |
## p1.firstName = "Jon" | |
## p1.lastName = "Ross" | |
## salaries[p1] = 30_000 | |
## | |
## p2.firstName = "소진" | |
## p2.lastName = "박" | |
## salaries[p2] = 45_000 | |
import | |
hashes, math | |
{.pragma: myShallow.} | |
type | |
KeyValuePair[A, B] = tuple[hcode: THash, key: A, val: B] | |
KeyValuePairSeq[A, B] = seq[KeyValuePair[A, B]] | |
Table* {.myShallow.}[A, B] = object ## generic hash table | |
data: KeyValuePairSeq[A, B] | |
counter: int | |
TableRef*[A,B] = ref Table[A, B] | |
{.deprecated: [TTable: Table, PTable: TableRef].} | |
when not defined(nimhygiene): | |
{.pragma: dirty.} | |
# hcode for real keys cannot be zero. hcode==0 signifies an empty slot. These | |
# two procs retain clarity of that encoding without the space cost of an enum. | |
proc isEmpty(hcode: THash): bool {.inline.} = | |
result = hcode == 0 | |
proc isFilled(hcode: THash): bool {.inline.} = | |
result = hcode != 0 | |
proc len*[A, B](t: Table[A, B]): int = | |
## returns the number of keys in `t`. | |
result = t.counter | |
iterator pairs*[A, B](t: Table[A, B]): (A, B) = | |
## iterates over any (key, value) pair in the table `t`. | |
for h in 0..high(t.data): | |
if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val) | |
iterator mpairs*[A, B](t: var Table[A, B]): (A, var B) = | |
## iterates over any (key, value) pair in the table `t`. The values | |
## can be modified. | |
for h in 0..high(t.data): | |
if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val) | |
iterator keys*[A, B](t: Table[A, B]): A = | |
## iterates over any key in the table `t`. | |
for h in 0..high(t.data): | |
if isFilled(t.data[h].hcode): yield t.data[h].key | |
iterator values*[A, B](t: Table[A, B]): B = | |
## iterates over any value in the table `t`. | |
for h in 0..high(t.data): | |
if isFilled(t.data[h].hcode): yield t.data[h].val | |
iterator mvalues*[A, B](t: var Table[A, B]): var B = | |
## iterates over any value in the table `t`. The values can be modified. | |
for h in 0..high(t.data): | |
if isFilled(t.data[h].hcode): yield t.data[h].val | |
const | |
growthFactor = 2 | |
proc mustRehash(length, counter: int): bool {.inline.} = | |
assert(length > counter) | |
result = (length * 2 < counter * 3) or (length - counter < 4) | |
proc rightSize*(count: Natural): int {.inline.} = | |
## Return the value of `initialSize` to support `count` items. | |
## | |
## If more items are expected to be added, simply add that | |
## expected extra amount to the parameter before calling this. | |
## | |
## Internally, we want mustRehash(rightSize(x), x) == false. | |
result = nextPowerOfTwo(count * 3 div 2 + 4) | |
proc nextTry(h, maxHash: THash): THash {.inline.} = | |
result = (h + 1) and maxHash | |
template rawGetKnownHCImpl() {.dirty.} = | |
var h: THash = hc and high(t.data) # start with real hash value | |
while isFilled(t.data[h].hcode): | |
# Compare hc THEN key with boolean short circuit. This makes the common case | |
# zero ==key's for missing (e.g.inserts) and exactly one ==key for present. | |
# It does slow down succeeding lookups by one extra THash cmp&and..usually | |
# just a few clock cycles, generally worth it for any non-integer-like A. | |
if t.data[h].hcode == hc and t.data[h].key == key: | |
return h | |
h = nextTry(h, high(t.data)) | |
result = -1 - h # < 0 => MISSING; insert idx = -1 - result | |
template rawGetImpl() {.dirty.} = | |
hc = hash(key) | |
if hc == 0: # This almost never taken branch should be very predictable. | |
hc = 314159265 # Value doesn't matter; Any non-zero favorite is fine. | |
rawGetKnownHCImpl() | |
template rawGetDeepImpl() {.dirty.} = # Search algo for unconditional add | |
hc = hash(key) | |
if hc == 0: | |
hc = 314159265 | |
var h: THash = hc and high(t.data) | |
while isFilled(t.data[h].hcode): | |
h = nextTry(h, high(t.data)) | |
result = h | |
template rawInsertImpl() {.dirty.} = | |
data[h].key = key | |
data[h].val = val | |
data[h].hcode = hc | |
proc rawGetKnownHC[A, B](t: Table[A, B], key: A, hc: THash): int {.inline.} = | |
rawGetKnownHCImpl() | |
proc rawGetDeep[A, B](t: Table[A, B], key: A, hc: var THash): int {.inline.} = | |
rawGetDeepImpl() | |
proc rawGet[A, B](t: Table[A, B], key: A, hc: var THash): int {.inline.} = | |
rawGetImpl() | |
proc `[]`*[A, B](t: Table[A, B], key: A): B = | |
## retrieves the value at ``t[key]``. If `key` is not in `t`, | |
## default empty value for the type `B` is returned | |
## and no exception is raised. One can check with ``hasKey`` whether the key | |
## exists. | |
var hc: THash | |
var index = rawGet(t, key, hc) | |
if index >= 0: result = t.data[index].val | |
proc mget*[A, B](t: var Table[A, B], key: A): var B = | |
## retrieves the value at ``t[key]``. The value can be modified. | |
## If `key` is not in `t`, the ``KeyError`` exception is raised. | |
var hc: THash | |
var index = rawGet(t, key, hc) | |
if index >= 0: result = t.data[index].val | |
else: | |
when compiles($key): | |
raise newException(KeyError, "key not found: " & $key) | |
else: | |
raise newException(KeyError, "key not found") | |
iterator allValues*[A, B](t: Table[A, B]; key: A): B = | |
## iterates over any value in the table `t` that belongs to the given `key`. | |
var h: THash = hash(key) and high(t.data) | |
while isFilled(t.data[h].hcode): | |
if t.data[h].key == key: | |
yield t.data[h].val | |
h = nextTry(h, high(t.data)) | |
proc hasKey*[A, B](t: Table[A, B], key: A): bool = | |
## returns true iff `key` is in the table `t`. | |
var hc: THash | |
result = rawGet(t, key, hc) >= 0 | |
proc rawInsert[A, B](t: var Table[A, B], data: var KeyValuePairSeq[A, B], | |
key: A, val: B, hc: THash, h: THash) = | |
rawInsertImpl() | |
proc enlarge[A, B](t: var Table[A, B]) = | |
var n: KeyValuePairSeq[A, B] | |
newSeq(n, len(t.data) * growthFactor) | |
swap(t.data, n) | |
for i in countup(0, high(n)): | |
if isFilled(n[i].hcode): | |
var j = -1 - rawGetKnownHC(t, n[i].key, n[i].hcode) | |
rawInsert(t, t.data, n[i].key, n[i].val, n[i].hcode, j) | |
template addImpl() {.dirty.} = | |
if mustRehash(len(t.data), t.counter): enlarge(t) | |
var hc: THash | |
var j = rawGetDeep(t, key, hc) | |
rawInsert(t, t.data, key, val, hc, j) | |
inc(t.counter) | |
template maybeRehashPutImpl() {.dirty.} = | |
if mustRehash(len(t.data), t.counter): | |
enlarge(t) | |
index = rawGetKnownHC(t, key, hc) | |
index = -1 - index # important to transform for mgetOrPutImpl | |
rawInsert(t, t.data, key, val, hc, index) | |
inc(t.counter) | |
template putImpl() {.dirty.} = | |
var hc: THash | |
var index = rawGet(t, key, hc) | |
if index >= 0: t.data[index].val = val | |
else: maybeRehashPutImpl() | |
template mgetOrPutImpl() {.dirty.} = | |
var hc: THash | |
var index = rawGet(t, key, hc) | |
if index < 0: maybeRehashPutImpl() # not present: insert (flipping index) | |
result = t.data[index].val # either way return modifiable val | |
template hasKeyOrPutImpl() {.dirty.} = | |
var hc: THash | |
var index = rawGet(t, key, hc) | |
if index < 0: | |
result = false | |
maybeRehashPutImpl() | |
else: result = true | |
proc mgetOrPut*[A, B](t: var Table[A, B], key: A, val: B): var B = | |
## retrieves value at ``t[key]`` or puts ``val`` if not present, either way | |
## returning a value which can be modified. | |
mgetOrPutImpl() | |
proc hasKeyOrPut*[A, B](t: var Table[A, B], key: A, val: B): bool = | |
## returns true iff `key` is in the table, otherwise inserts `value`. | |
hasKeyOrPutImpl() | |
proc `[]=`*[A, B](t: var Table[A, B], key: A, val: B) = | |
## puts a (key, value)-pair into `t`. | |
putImpl() | |
proc add*[A, B](t: var Table[A, B], key: A, val: B) = | |
## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists. | |
addImpl() | |
template doWhile(a: expr, b: stmt): stmt = | |
while true: | |
b | |
if not a: break | |
proc del*[A, B](t: var Table[A, B], key: A) = | |
## deletes `key` from hash table `t`. | |
var hc: THash | |
var i = rawGet(t, key, hc) | |
let msk = high(t.data) | |
if i >= 0: | |
t.data[i].hcode = 0 | |
dec(t.counter) | |
while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1 | |
var j = i # The correctness of this depends on (h+1) in nextTry, | |
var r = j # though may be adaptable to other simple sequences. | |
t.data[i].hcode = 0 # mark current EMPTY | |
doWhile ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)): | |
i = (i + 1) and msk # increment mod table size | |
if isEmpty(t.data[i].hcode): # end of collision cluster; So all done | |
return | |
r = t.data[i].hcode and msk # "home" location of key@i | |
shallowCopy(t.data[j], t.data[i]) # data[j] will be marked EMPTY next loop | |
proc initTable*[A, B](initialSize=64): Table[A, B] = | |
## creates a new hash table that is empty. | |
## | |
## `initialSize` needs to be a power of two. If you need to accept runtime | |
## values for this you could use the ``nextPowerOfTwo`` proc from the | |
## `math <math.html>`_ module or the ``rightSize`` proc from this module. | |
assert isPowerOfTwo(initialSize) | |
result.counter = 0 | |
newSeq(result.data, initialSize) | |
proc toTable*[A, B](pairs: openArray[(A, | |
B)]): Table[A, B] = | |
## creates a new hash table that contains the given `pairs`. | |
result = initTable[A, B](rightSize(pairs.len)) | |
for key, val in items(pairs): result[key] = val | |
template dollarImpl(): stmt {.dirty.} = | |
if t.len == 0: | |
result = "{:}" | |
else: | |
result = "{" | |
for key, val in pairs(t): | |
if result.len > 1: result.add(", ") | |
result.add($key) | |
result.add(": ") | |
result.add($val) | |
result.add("}") | |
proc `$`*[A, B](t: Table[A, B]): string = | |
## The `$` operator for hash tables. | |
dollarImpl() | |
template equalsImpl() = | |
if s.counter == t.counter: | |
# different insertion orders mean different 'data' seqs, so we have | |
# to use the slow route here: | |
for key, val in s: | |
# prefix notation leads to automatic dereference in case of PTable | |
if not t.hasKey(key): return false | |
if t[key] != val: return false | |
return true | |
proc `==`*[A, B](s, t: Table[A, B]): bool = | |
equalsImpl() | |
proc indexBy*[A, B, C](collection: A, index: proc(x: B): C): Table[C, B] = | |
## Index the collection with the proc provided. | |
# TODO: As soon as supported, change collection: A to collection: A[B] | |
result = initTable[C, B]() | |
for item in collection: | |
result[index(item)] = item | |
proc len*[A, B](t: TableRef[A, B]): int = | |
## returns the number of keys in `t`. | |
result = t.counter | |
iterator pairs*[A, B](t: TableRef[A, B]): (A, B) = | |
## iterates over any (key, value) pair in the table `t`. | |
for h in 0..high(t.data): | |
if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val) | |
iterator mpairs*[A, B](t: TableRef[A, B]): (A, var B) = | |
## iterates over any (key, value) pair in the table `t`. The values | |
## can be modified. | |
for h in 0..high(t.data): | |
if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val) | |
iterator keys*[A, B](t: TableRef[A, B]): A = | |
## iterates over any key in the table `t`. | |
for h in 0..high(t.data): | |
if isFilled(t.data[h].hcode): yield t.data[h].key | |
iterator values*[A, B](t: TableRef[A, B]): B = | |
## iterates over any value in the table `t`. | |
for h in 0..high(t.data): | |
if isFilled(t.data[h].hcode): yield t.data[h].val | |
iterator mvalues*[A, B](t: TableRef[A, B]): var B = | |
## iterates over any value in the table `t`. The values can be modified. | |
for h in 0..high(t.data): | |
if isFilled(t.data[h].hcode): yield t.data[h].val | |
proc `[]`*[A, B](t: TableRef[A, B], key: A): B = | |
## retrieves the value at ``t[key]``. If `key` is not in `t`, | |
## default empty value for the type `B` is returned | |
## and no exception is raised. One can check with ``hasKey`` whether the key | |
## exists. | |
result = t[][key] | |
proc mget*[A, B](t: TableRef[A, B], key: A): var B = | |
## retrieves the value at ``t[key]``. The value can be modified. | |
## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. | |
t[].mget(key) | |
proc mgetOrPut*[A, B](t: TableRef[A, B], key: A, val: B): var B = | |
## retrieves value at ``t[key]`` or puts ``val`` if not present, either way | |
## returning a value which can be modified. | |
t[].mgetOrPut(key, val) | |
proc hasKeyOrPut*[A, B](t: var TableRef[A, B], key: A, val: B): bool = | |
## returns true iff `key` is in the table, otherwise inserts `value`. | |
t[].hasKeyOrPut(key, val) | |
proc hasKey*[A, B](t: TableRef[A, B], key: A): bool = | |
## returns true iff `key` is in the table `t`. | |
result = t[].hasKey(key) | |
proc `[]=`*[A, B](t: TableRef[A, B], key: A, val: B) = | |
## puts a (key, value)-pair into `t`. | |
t[][key] = val | |
proc add*[A, B](t: TableRef[A, B], key: A, val: B) = | |
## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists. | |
t[].add(key, val) | |
proc del*[A, B](t: TableRef[A, B], key: A) = | |
## deletes `key` from hash table `t`. | |
t[].del(key) | |
proc newTable*[A, B](initialSize=64): TableRef[A, B] = | |
new(result) | |
result[] = initTable[A, B](initialSize) | |
proc newTable*[A, B](pairs: openArray[(A, B)]): TableRef[A, B] = | |
## creates a new hash table that contains the given `pairs`. | |
new(result) | |
result[] = toTable[A, B](pairs) | |
proc `$`*[A, B](t: TableRef[A, B]): string = | |
## The `$` operator for hash tables. | |
dollarImpl() | |
proc `==`*[A, B](s, t: TableRef[A, B]): bool = | |
if isNil(s): result = isNil(t) | |
elif isNil(t): result = false | |
else: equalsImpl() | |
proc newTableFrom*[A, B, C](collection: A, index: proc(x: B): C): TableRef[C, B] = | |
## Index the collection with the proc provided. | |
# TODO: As soon as supported, change collection: A to collection: A[B] | |
result = newTable[C, B]() | |
for item in collection: | |
result[index(item)] = item | |
# ------------------------------ ordered table ------------------------------ | |
type | |
OrderedKeyValuePair[A, B] = tuple[ | |
hcode: THash, next: int, key: A, val: B] | |
OrderedKeyValuePairSeq[A, B] = seq[OrderedKeyValuePair[A, B]] | |
OrderedTable* {. | |
myShallow.}[A, B] = object ## table that remembers insertion order | |
data: OrderedKeyValuePairSeq[A, B] | |
counter, first, last: int | |
OrderedTableRef*[A, B] = ref OrderedTable[A, B] | |
{.deprecated: [TOrderedTable: OrderedTable, POrderedTable: OrderedTableRef].} | |
proc len*[A, B](t: OrderedTable[A, B]): int {.inline.} = | |
## returns the number of keys in `t`. | |
result = t.counter | |
template forAllOrderedPairs(yieldStmt: stmt) {.dirty, immediate.} = | |
var h = t.first | |
while h >= 0: | |
var nxt = t.data[h].next | |
if isFilled(t.data[h].hcode): yieldStmt | |
h = nxt | |
iterator pairs*[A, B](t: OrderedTable[A, B]): (A, B) = | |
## iterates over any (key, value) pair in the table `t` in insertion | |
## order. | |
forAllOrderedPairs: | |
yield (t.data[h].key, t.data[h].val) | |
iterator mpairs*[A, B](t: var OrderedTable[A, B]): (A, var B) = | |
## iterates over any (key, value) pair in the table `t` in insertion | |
## order. The values can be modified. | |
forAllOrderedPairs: | |
yield (t.data[h].key, t.data[h].val) | |
iterator keys*[A, B](t: OrderedTable[A, B]): A = | |
## iterates over any key in the table `t` in insertion order. | |
forAllOrderedPairs: | |
yield t.data[h].key | |
iterator values*[A, B](t: OrderedTable[A, B]): B = | |
## iterates over any value in the table `t` in insertion order. | |
forAllOrderedPairs: | |
yield t.data[h].val | |
iterator mvalues*[A, B](t: var OrderedTable[A, B]): var B = | |
## iterates over any value in the table `t` in insertion order. The values | |
## can be modified. | |
forAllOrderedPairs: | |
yield t.data[h].val | |
proc rawGetKnownHC[A, B](t: OrderedTable[A, B], key: A, hc: THash): int = | |
rawGetKnownHCImpl() | |
proc rawGetDeep[A, B](t: OrderedTable[A, B], key: A, hc: var THash): int {.inline.} = | |
rawGetDeepImpl() | |
proc rawGet[A, B](t: OrderedTable[A, B], key: A, hc: var THash): int = | |
rawGetImpl() | |
proc `[]`*[A, B](t: OrderedTable[A, B], key: A): B = | |
## retrieves the value at ``t[key]``. If `key` is not in `t`, | |
## default empty value for the type `B` is returned | |
## and no exception is raised. One can check with ``hasKey`` whether the key | |
## exists. | |
var hc: THash | |
var index = rawGet(t, key, hc) | |
if index >= 0: result = t.data[index].val | |
proc mget*[A, B](t: var OrderedTable[A, B], key: A): var B = | |
## retrieves the value at ``t[key]``. The value can be modified. | |
## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. | |
var hc: THash | |
var index = rawGet(t, key, hc) | |
if index >= 0: result = t.data[index].val | |
else: raise newException(KeyError, "key not found: " & $key) | |
proc hasKey*[A, B](t: OrderedTable[A, B], key: A): bool = | |
## returns true iff `key` is in the table `t`. | |
var hc: THash | |
result = rawGet(t, key, hc) >= 0 | |
proc rawInsert[A, B](t: var OrderedTable[A, B], | |
data: var OrderedKeyValuePairSeq[A, B], | |
key: A, val: B, hc: THash, h: THash) = | |
rawInsertImpl() | |
data[h].next = -1 | |
if t.first < 0: t.first = h | |
if t.last >= 0: data[t.last].next = h | |
t.last = h | |
proc enlarge[A, B](t: var OrderedTable[A, B]) = | |
var n: OrderedKeyValuePairSeq[A, B] | |
newSeq(n, len(t.data) * growthFactor) | |
var h = t.first | |
t.first = -1 | |
t.last = -1 | |
swap(t.data, n) | |
while h >= 0: | |
var nxt = n[h].next | |
if isFilled(n[h].hcode): | |
var j = -1 - rawGetKnownHC(t, n[h].key, n[h].hcode) | |
rawInsert(t, t.data, n[h].key, n[h].val, n[h].hcode, j) | |
h = nxt | |
proc `[]=`*[A, B](t: var OrderedTable[A, B], key: A, val: B) = | |
## puts a (key, value)-pair into `t`. | |
putImpl() | |
proc add*[A, B](t: var OrderedTable[A, B], key: A, val: B) = | |
## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists. | |
addImpl() | |
proc mgetOrPut*[A, B](t: var OrderedTable[A, B], key: A, val: B): var B = | |
## retrieves value at ``t[key]`` or puts ``value`` if not present, either way | |
## returning a value which can be modified. | |
mgetOrPutImpl() | |
proc hasKeyOrPut*[A, B](t: var OrderedTable[A, B], key: A, val: B): bool = | |
## returns true iff `key` is in the table, otherwise inserts `value`. | |
hasKeyOrPutImpl() | |
proc initOrderedTable*[A, B](initialSize=64): OrderedTable[A, B] = | |
## creates a new ordered hash table that is empty. | |
## | |
## `initialSize` needs to be a power of two. If you need to accept runtime | |
## values for this you could use the ``nextPowerOfTwo`` proc from the | |
## `math <math.html>`_ module or the ``rightSize`` proc from this module. | |
assert isPowerOfTwo(initialSize) | |
result.counter = 0 | |
result.first = -1 | |
result.last = -1 | |
newSeq(result.data, initialSize) | |
proc toOrderedTable*[A, B](pairs: openArray[(A, | |
B)]): OrderedTable[A, B] = | |
## creates a new ordered hash table that contains the given `pairs`. | |
result = initOrderedTable[A, B](rightSize(pairs.len)) | |
for key, val in items(pairs): result[key] = val | |
proc `$`*[A, B](t: OrderedTable[A, B]): string = | |
## The `$` operator for ordered hash tables. | |
dollarImpl() | |
proc sort*[A, B](t: var OrderedTable[A, B], | |
cmp: proc (x,y: (A, B)): int) = | |
## sorts `t` according to `cmp`. This modifies the internal list | |
## that kept the insertion order, so insertion order is lost after this | |
## call but key lookup and insertions remain possible after `sort` (in | |
## contrast to the `sort` for count tables). | |
var list = t.first | |
var | |
p, q, e, tail, oldhead: int | |
nmerges, psize, qsize, i: int | |
if t.counter == 0: return | |
var insize = 1 | |
while true: | |
p = list; oldhead = list | |
list = -1; tail = -1; nmerges = 0 | |
while p >= 0: | |
inc(nmerges) | |
q = p | |
psize = 0 | |
i = 0 | |
while i < insize: | |
inc(psize) | |
q = t.data[q].next | |
if q < 0: break | |
inc(i) | |
qsize = insize | |
while psize > 0 or (qsize > 0 and q >= 0): | |
if psize == 0: | |
e = q; q = t.data[q].next; dec(qsize) | |
elif qsize == 0 or q < 0: | |
e = p; p = t.data[p].next; dec(psize) | |
elif cmp((t.data[p].key, t.data[p].val), | |
(t.data[q].key, t.data[q].val)) <= 0: | |
e = p; p = t.data[p].next; dec(psize) | |
else: | |
e = q; q = t.data[q].next; dec(qsize) | |
if tail >= 0: t.data[tail].next = e | |
else: list = e | |
tail = e | |
p = q | |
t.data[tail].next = -1 | |
if nmerges <= 1: break | |
insize = insize * 2 | |
t.first = list | |
t.last = tail | |
proc len*[A, B](t: OrderedTableRef[A, B]): int {.inline.} = | |
## returns the number of keys in `t`. | |
result = t.counter | |
template forAllOrderedPairs(yieldStmt: stmt) {.dirty, immediate.} = | |
var h = t.first | |
while h >= 0: | |
var nxt = t.data[h].next | |
if isFilled(t.data[h].hcode): yieldStmt | |
h = nxt | |
iterator pairs*[A, B](t: OrderedTableRef[A, B]): (A, B) = | |
## iterates over any (key, value) pair in the table `t` in insertion | |
## order. | |
forAllOrderedPairs: | |
yield (t.data[h].key, t.data[h].val) | |
iterator mpairs*[A, B](t: OrderedTableRef[A, B]): (A, var B) = | |
## iterates over any (key, value) pair in the table `t` in insertion | |
## order. The values can be modified. | |
forAllOrderedPairs: | |
yield (t.data[h].key, t.data[h].val) | |
iterator keys*[A, B](t: OrderedTableRef[A, B]): A = | |
## iterates over any key in the table `t` in insertion order. | |
forAllOrderedPairs: | |
yield t.data[h].key | |
iterator values*[A, B](t: OrderedTableRef[A, B]): B = | |
## iterates over any value in the table `t` in insertion order. | |
forAllOrderedPairs: | |
yield t.data[h].val | |
iterator mvalues*[A, B](t: OrderedTableRef[A, B]): var B = | |
## iterates over any value in the table `t` in insertion order. The values | |
## can be modified. | |
forAllOrderedPairs: | |
yield t.data[h].val | |
proc `[]`*[A, B](t: OrderedTableRef[A, B], key: A): B = | |
## retrieves the value at ``t[key]``. If `key` is not in `t`, | |
## default empty value for the type `B` is returned | |
## and no exception is raised. One can check with ``hasKey`` whether the key | |
## exists. | |
result = t[][key] | |
proc mget*[A, B](t: OrderedTableRef[A, B], key: A): var B = | |
## retrieves the value at ``t[key]``. The value can be modified. | |
## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. | |
result = t[].mget(key) | |
proc mgetOrPut*[A, B](t: OrderedTableRef[A, B], key: A, val: B): var B = | |
## retrieves value at ``t[key]`` or puts ``val`` if not present, either way | |
## returning a value which can be modified. | |
result = t[].mgetOrPut(key, val) | |
proc hasKeyOrPut*[A, B](t: var OrderedTableRef[A, B], key: A, val: B): bool = | |
## returns true iff `key` is in the table, otherwise inserts `val`. | |
result = t[].hasKeyOrPut(key, val) | |
proc hasKey*[A, B](t: OrderedTableRef[A, B], key: A): bool = | |
## returns true iff `key` is in the table `t`. | |
result = t[].hasKey(key) | |
proc `[]=`*[A, B](t: OrderedTableRef[A, B], key: A, val: B) = | |
## puts a (key, value)-pair into `t`. | |
t[][key] = val | |
proc add*[A, B](t: OrderedTableRef[A, B], key: A, val: B) = | |
## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists. | |
t[].add(key, val) | |
proc newOrderedTable*[A, B](initialSize=64): OrderedTableRef[A, B] = | |
## creates a new ordered hash table that is empty. | |
## | |
## `initialSize` needs to be a power of two. If you need to accept runtime | |
## values for this you could use the ``nextPowerOfTwo`` proc from the | |
## `math <math.html>`_ module or the ``rightSize`` proc from this module. | |
new(result) | |
result[] = initOrderedTable[A, B]() | |
proc newOrderedTable*[A, B](pairs: openArray[(A, B)]): OrderedTableRef[A, B] = | |
## creates a new ordered hash table that contains the given `pairs`. | |
result = newOrderedTable[A, B](rightSize(pairs.len)) | |
for key, val in items(pairs): result[key] = val | |
proc `$`*[A, B](t: OrderedTableRef[A, B]): string = | |
## The `$` operator for ordered hash tables. | |
dollarImpl() | |
proc sort*[A, B](t: OrderedTableRef[A, B], | |
cmp: proc (x,y: (A, B)): int) = | |
## sorts `t` according to `cmp`. This modifies the internal list | |
## that kept the insertion order, so insertion order is lost after this | |
## call but key lookup and insertions remain possible after `sort` (in | |
## contrast to the `sort` for count tables). | |
t[].sort(cmp) | |
# ------------------------------ count tables ------------------------------- | |
type | |
CountTable* {.myShallow.}[ | |
A] = object ## table that counts the number of each key | |
data: seq[tuple[key: A, val: int]] | |
counter: int | |
CountTableRef*[A] = ref CountTable[A] | |
{.deprecated: [TCountTable: CountTable, PCountTable: CountTableRef].} | |
proc len*[A](t: CountTable[A]): int = | |
## returns the number of keys in `t`. | |
result = t.counter | |
iterator pairs*[A](t: CountTable[A]): (A, int) = | |
## iterates over any (key, value) pair in the table `t`. | |
for h in 0..high(t.data): | |
if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val) | |
iterator mpairs*[A](t: var CountTable[A]): (A, var int) = | |
## iterates over any (key, value) pair in the table `t`. The values can | |
## be modified. | |
for h in 0..high(t.data): | |
if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val) | |
iterator keys*[A](t: CountTable[A]): A = | |
## iterates over any key in the table `t`. | |
for h in 0..high(t.data): | |
if t.data[h].val != 0: yield t.data[h].key | |
iterator values*[A](t: CountTable[A]): int = | |
## iterates over any value in the table `t`. | |
for h in 0..high(t.data): | |
if t.data[h].val != 0: yield t.data[h].val | |
iterator mvalues*[A](t: CountTable[A]): var int = | |
## iterates over any value in the table `t`. The values can be modified. | |
for h in 0..high(t.data): | |
if t.data[h].val != 0: yield t.data[h].val | |
proc rawGet[A](t: CountTable[A], key: A): int = | |
var h: THash = hash(key) and high(t.data) # start with real hash value | |
while t.data[h].val != 0: | |
if t.data[h].key == key: return h | |
h = nextTry(h, high(t.data)) | |
result = -1 - h # < 0 => MISSING; insert idx = -1 - result | |
proc `[]`*[A](t: CountTable[A], key: A): int = | |
## retrieves the value at ``t[key]``. If `key` is not in `t`, | |
## 0 is returned. One can check with ``hasKey`` whether the key | |
## exists. | |
var index = rawGet(t, key) | |
if index >= 0: result = t.data[index].val | |
proc mget*[A](t: var CountTable[A], key: A): var int = | |
## retrieves the value at ``t[key]``. The value can be modified. | |
## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. | |
var index = rawGet(t, key) | |
if index >= 0: result = t.data[index].val | |
else: raise newException(KeyError, "key not found: " & $key) | |
proc hasKey*[A](t: CountTable[A], key: A): bool = | |
## returns true iff `key` is in the table `t`. | |
result = rawGet(t, key) >= 0 | |
proc rawInsert[A](t: CountTable[A], data: var seq[tuple[key: A, val: int]], | |
key: A, val: int) = | |
var h: THash = hash(key) and high(data) | |
while data[h].val != 0: h = nextTry(h, high(data)) | |
data[h].key = key | |
data[h].val = val | |
proc enlarge[A](t: var CountTable[A]) = | |
var n: seq[tuple[key: A, val: int]] | |
newSeq(n, len(t.data) * growthFactor) | |
for i in countup(0, high(t.data)): | |
if t.data[i].val != 0: rawInsert(t, n, t.data[i].key, t.data[i].val) | |
swap(t.data, n) | |
proc `[]=`*[A](t: var CountTable[A], key: A, val: int) = | |
## puts a (key, value)-pair into `t`. `val` has to be positive. | |
assert val > 0 | |
var h = rawGet(t, key) | |
if h >= 0: | |
t.data[h].val = val | |
else: | |
h = -1 - h | |
t.data[h].key = key | |
t.data[h].val = val | |
proc initCountTable*[A](initialSize=64): CountTable[A] = | |
## creates a new count table that is empty. | |
## | |
## `initialSize` needs to be a power of two. If you need to accept runtime | |
## values for this you could use the ``nextPowerOfTwo`` proc from the | |
## `math <math.html>`_ module or the ``rightSize`` proc in this module. | |
assert isPowerOfTwo(initialSize) | |
result.counter = 0 | |
newSeq(result.data, initialSize) | |
proc toCountTable*[A](keys: openArray[A]): CountTable[A] = | |
## creates a new count table with every key in `keys` having a count of 1. | |
result = initCountTable[A](rightSize(keys.len)) | |
for key in items(keys): result[key] = 1 | |
proc `$`*[A](t: CountTable[A]): string = | |
## The `$` operator for count tables. | |
dollarImpl() | |
proc inc*[A](t: var CountTable[A], key: A, val = 1) = | |
## increments `t[key]` by `val`. | |
var index = rawGet(t, key) | |
if index >= 0: | |
inc(t.data[index].val, val) | |
else: | |
if mustRehash(len(t.data), t.counter): enlarge(t) | |
rawInsert(t, t.data, key, val) | |
inc(t.counter) | |
proc smallest*[A](t: CountTable[A]): tuple[key: A, val: int] = | |
## returns the largest (key,val)-pair. Efficiency: O(n) | |
assert t.len > 0 | |
var minIdx = 0 | |
for h in 1..high(t.data): | |
if t.data[h].val > 0 and t.data[minIdx].val > t.data[h].val: minIdx = h | |
result.key = t.data[minIdx].key | |
result.val = t.data[minIdx].val | |
proc largest*[A](t: CountTable[A]): tuple[key: A, val: int] = | |
## returns the (key,val)-pair with the largest `val`. Efficiency: O(n) | |
assert t.len > 0 | |
var maxIdx = 0 | |
for h in 1..high(t.data): | |
if t.data[maxIdx].val < t.data[h].val: maxIdx = h | |
result.key = t.data[maxIdx].key | |
result.val = t.data[maxIdx].val | |
proc sort*[A](t: var CountTable[A]) = | |
## sorts the count table so that the entry with the highest counter comes | |
## first. This is destructive! You must not modify `t` afterwards! | |
## You can use the iterators `pairs`, `keys`, and `values` to iterate over | |
## `t` in the sorted order. | |
# we use shellsort here; fast enough and simple | |
var h = 1 | |
while true: | |
h = 3 * h + 1 | |
if h >= high(t.data): break | |
while true: | |
h = h div 3 | |
for i in countup(h, high(t.data)): | |
var j = i | |
while t.data[j-h].val <= t.data[j].val: | |
swap(t.data[j], t.data[j-h]) | |
j = j-h | |
if j < h: break | |
if h == 1: break | |
proc len*[A](t: CountTableRef[A]): int = | |
## returns the number of keys in `t`. | |
result = t.counter | |
iterator pairs*[A](t: CountTableRef[A]): (A, int) = | |
## iterates over any (key, value) pair in the table `t`. | |
for h in 0..high(t.data): | |
if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val) | |
iterator mpairs*[A](t: CountTableRef[A]): (A, var int) = | |
## iterates over any (key, value) pair in the table `t`. The values can | |
## be modified. | |
for h in 0..high(t.data): | |
if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val) | |
iterator keys*[A](t: CountTableRef[A]): A = | |
## iterates over any key in the table `t`. | |
for h in 0..high(t.data): | |
if t.data[h].val != 0: yield t.data[h].key | |
iterator values*[A](t: CountTableRef[A]): int = | |
## iterates over any value in the table `t`. | |
for h in 0..high(t.data): | |
if t.data[h].val != 0: yield t.data[h].val | |
iterator mvalues*[A](t: CountTableRef[A]): var int = | |
## iterates over any value in the table `t`. The values can be modified. | |
for h in 0..high(t.data): | |
if t.data[h].val != 0: yield t.data[h].val | |
proc `[]`*[A](t: CountTableRef[A], key: A): int = | |
## retrieves the value at ``t[key]``. If `key` is not in `t`, | |
## 0 is returned. One can check with ``hasKey`` whether the key | |
## exists. | |
result = t[][key] | |
proc mget*[A](t: CountTableRef[A], key: A): var int = | |
## retrieves the value at ``t[key]``. The value can be modified. | |
## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. | |
result = t[].mget(key) | |
proc hasKey*[A](t: CountTableRef[A], key: A): bool = | |
## returns true iff `key` is in the table `t`. | |
result = t[].hasKey(key) | |
proc `[]=`*[A](t: CountTableRef[A], key: A, val: int) = | |
## puts a (key, value)-pair into `t`. `val` has to be positive. | |
assert val > 0 | |
t[][key] = val | |
proc newCountTable*[A](initialSize=64): CountTableRef[A] = | |
## creates a new count table that is empty. | |
## | |
## `initialSize` needs to be a power of two. If you need to accept runtime | |
## values for this you could use the ``nextPowerOfTwo`` proc from the | |
## `math <math.html>`_ module or the ``rightSize`` method in this module. | |
new(result) | |
result[] = initCountTable[A](initialSize) | |
proc newCountTable*[A](keys: openArray[A]): CountTableRef[A] = | |
## creates a new count table with every key in `keys` having a count of 1. | |
result = newCountTable[A](rightSize(keys.len)) | |
for key in items(keys): result[key] = 1 | |
proc `$`*[A](t: CountTableRef[A]): string = | |
## The `$` operator for count tables. | |
dollarImpl() | |
proc inc*[A](t: CountTableRef[A], key: A, val = 1) = | |
## increments `t[key]` by `val`. | |
t[].inc(key, val) | |
proc smallest*[A](t: CountTableRef[A]): (A, int) = | |
## returns the largest (key,val)-pair. Efficiency: O(n) | |
t[].smallest | |
proc largest*[A](t: CountTableRef[A]): (A, int) = | |
## returns the (key,val)-pair with the largest `val`. Efficiency: O(n) | |
t[].largest | |
proc sort*[A](t: CountTableRef[A]) = | |
## sorts the count table so that the entry with the highest counter comes | |
## first. This is destructive! You must not modify `t` afterwards! | |
## You can use the iterators `pairs`, `keys`, and `values` to iterate over | |
## `t` in the sorted order. | |
t[].sort | |
proc merge*[A](s: var CountTable[A], t: CountTable[A]) = | |
## merges the second table into the first one | |
for key, value in t: | |
s.inc(key, value) | |
proc merge*[A](s, t: CountTable[A]): CountTable[A] = | |
## merges the two tables into a new one | |
result = initCountTable[A](nextPowerOfTwo(max(s.len, t.len))) | |
for table in @[s, t]: | |
for key, value in table: | |
result.inc(key, value) | |
proc merge*[A](s, t: CountTableRef[A]) = | |
## merges the second table into the first one | |
s[].merge(t[]) | |
when isMainModule: | |
type | |
Person = object | |
firstName, lastName: string | |
proc hash(x: Person): THash = | |
## Piggyback on the already available string hash proc. | |
## | |
## Without this proc nothing works! | |
result = x.firstName.hash !& x.lastName.hash | |
result = !$result | |
var | |
salaries = initTable[Person, int]() | |
p1, p2: Person | |
p1.firstName = "Jon" | |
p1.lastName = "Ross" | |
salaries[p1] = 30_000 | |
p2.firstName = "소진" | |
p2.lastName = "박" | |
salaries[p2] = 45_000 | |
var | |
s2 = initOrderedTable[Person, int]() | |
s3 = initCountTable[Person]() | |
s2[p1] = 30_000 | |
s2[p2] = 45_000 | |
s3[p1] = 30_000 | |
s3[p2] = 45_000 | |
var | |
t1 = initCountTable[string]() | |
t2 = initCountTable[string]() | |
t1.inc("foo") | |
t1.inc("bar", 2) | |
t1.inc("baz", 3) | |
t2.inc("foo", 4) | |
t2.inc("bar") | |
t2.inc("baz", 11) | |
merge(t1, t2) | |
assert(t1["foo"] == 5) | |
assert(t1["bar"] == 3) | |
assert(t1["baz"] == 14) | |
let | |
t1r = newCountTable[string]() | |
t2r = newCountTable[string]() | |
t1r.inc("foo") | |
t1r.inc("bar", 2) | |
t1r.inc("baz", 3) | |
t2r.inc("foo", 4) | |
t2r.inc("bar") | |
t2r.inc("baz", 11) | |
merge(t1r, t2r) | |
assert(t1r["foo"] == 5) | |
assert(t1r["bar"] == 3) | |
assert(t1r["baz"] == 14) | |
var | |
t1l = initCountTable[string]() | |
t2l = initCountTable[string]() | |
t1l.inc("foo") | |
t1l.inc("bar", 2) | |
t1l.inc("baz", 3) | |
t2l.inc("foo", 4) | |
t2l.inc("bar") | |
t2l.inc("baz", 11) | |
let | |
t1merging = t1l | |
t2merging = t2l | |
let merged = merge(t1merging, t2merging) | |
assert(merged["foo"] == 5) | |
assert(merged["bar"] == 3) | |
assert(merged["baz"] == 14) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# | |
# Nim's Runtime Library | |
# (c) Copyright 2015 Andreas Rumpf | |
# | |
# See the file "copying.txt", included in this | |
# distribution, for details about the copyright. | |
# | |
## The ``tables`` module implements variants of an efficient `hash table`:idx: | |
## (also often named `dictionary`:idx: in other programming languages) that is | |
## a mapping from keys to values. ``Table`` is the usual hash table, | |
## ``OrderedTable`` is like ``Table`` but remembers insertion order | |
## and ``CountTable`` is a mapping from a key to its number of occurances. | |
## For consistency with every other data type in Nim these have **value** | |
## semantics, this means that ``=`` performs a copy of the hash table. | |
## For **reference** semantics use the ``Ref`` variant: ``TableRef``, | |
## ``OrderedTableRef``, ``CountTableRef``. | |
## | |
## If you are using simple standard types like ``int`` or ``string`` for the | |
## keys of the table you won't have any problems, but as soon as you try to use | |
## a more complex object as a key you will be greeted by a strange compiler | |
## error:: | |
## | |
## Error: type mismatch: got (Person) | |
## but expected one of: | |
## hashes.hash(x: openarray[A]): THash | |
## hashes.hash(x: int): THash | |
## hashes.hash(x: float): THash | |
## … | |
## | |
## What is happening here is that the types used for table keys require to have | |
## a ``hash()`` proc which will convert them to a `THash <hashes.html#THash>`_ | |
## value, and the compiler is listing all the hash functions it knows. | |
## Additionally there has to be a ``==`` operator that provides the same | |
## semantics as its corresponding ``hash`` proc. | |
## | |
## After you add ``hash`` and ``==`` for your custom type everything will work. | |
## Currently however ``hash`` for objects is not defined, whereas | |
## ``system.==`` for objects does exist and performs a "deep" comparison (every | |
## field is compared) which is usually what you want. So in the following | |
## example implementing only ``hash`` suffices: | |
## | |
## .. code-block:: | |
## type | |
## Person = object | |
## firstName, lastName: string | |
## | |
## proc hash(x: Person): THash = | |
## ## Piggyback on the already available string hash proc. | |
## ## | |
## ## Without this proc nothing works! | |
## result = x.firstName.hash !& x.lastName.hash | |
## result = !$result | |
## | |
## var | |
## salaries = initTable[Person, int]() | |
## p1, p2: Person | |
## | |
## p1.firstName = "Jon" | |
## p1.lastName = "Ross" | |
## salaries[p1] = 30_000 | |
## | |
## p2.firstName = "소진" | |
## p2.lastName = "박" | |
## salaries[p2] = 45_000 | |
import | |
hashes, math | |
{.pragma: myShallow.} | |
type | |
SlotEnum = enum seEmpty, seFilled, seDeleted | |
KeyValuePair[A, B] = tuple[slot: SlotEnum, key: A, val: B] | |
KeyValuePairSeq[A, B] = seq[KeyValuePair[A, B]] | |
Table* {.myShallow.}[A, B] = object ## generic hash table | |
data: KeyValuePairSeq[A, B] | |
counter: int | |
TableRef*[A,B] = ref Table[A, B] | |
{.deprecated: [TTable: Table, PTable: TableRef].} | |
when not defined(nimhygiene): | |
{.pragma: dirty.} | |
proc len*[A, B](t: Table[A, B]): int = | |
## returns the number of keys in `t`. | |
result = t.counter | |
iterator pairs*[A, B](t: Table[A, B]): tuple[key: A, val: B] = | |
## iterates over any (key, value) pair in the table `t`. | |
for h in 0..high(t.data): | |
if t.data[h].slot == seFilled: yield (t.data[h].key, t.data[h].val) | |
iterator mpairs*[A, B](t: var Table[A, B]): tuple[key: A, val: var B] = | |
## iterates over any (key, value) pair in the table `t`. The values | |
## can be modified. | |
for h in 0..high(t.data): | |
if t.data[h].slot == seFilled: yield (t.data[h].key, t.data[h].val) | |
iterator keys*[A, B](t: Table[A, B]): A = | |
## iterates over any key in the table `t`. | |
for h in 0..high(t.data): | |
if t.data[h].slot == seFilled: yield t.data[h].key | |
iterator values*[A, B](t: Table[A, B]): B = | |
## iterates over any value in the table `t`. | |
for h in 0..high(t.data): | |
if t.data[h].slot == seFilled: yield t.data[h].val | |
iterator mvalues*[A, B](t: var Table[A, B]): var B = | |
## iterates over any value in the table `t`. The values can be modified. | |
for h in 0..high(t.data): | |
if t.data[h].slot == seFilled: yield t.data[h].val | |
const | |
growthFactor = 2 | |
proc mustRehash(length, counter: int): bool {.inline.} = | |
assert(length > counter) | |
result = (length * 2 < counter * 3) or (length - counter < 4) | |
proc nextTry(h, maxHash: THash): THash {.inline.} = | |
result = ((5 * h) + 1) and maxHash | |
template rawGetImpl() {.dirty.} = | |
var h: THash = hash(key) and high(t.data) # start with real hash value | |
while t.data[h].slot != seEmpty: | |
if t.data[h].key == key and t.data[h].slot == seFilled: | |
return h | |
h = nextTry(h, high(t.data)) | |
result = -1 | |
template rawInsertImpl() {.dirty.} = | |
var h: THash = hash(key) and high(data) | |
while data[h].slot == seFilled: | |
h = nextTry(h, high(data)) | |
data[h].key = key | |
data[h].val = val | |
data[h].slot = seFilled | |
proc rawGet[A, B](t: Table[A, B], key: A): int = | |
rawGetImpl() | |
proc `[]`*[A, B](t: Table[A, B], key: A): B = | |
## retrieves the value at ``t[key]``. If `key` is not in `t`, | |
## default empty value for the type `B` is returned | |
## and no exception is raised. One can check with ``hasKey`` whether the key | |
## exists. | |
var index = rawGet(t, key) | |
if index >= 0: result = t.data[index].val | |
proc mget*[A, B](t: var Table[A, B], key: A): var B = | |
## retrieves the value at ``t[key]``. The value can be modified. | |
## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. | |
var index = rawGet(t, key) | |
if index >= 0: result = t.data[index].val | |
else: raise newException(KeyError, "key not found: " & $key) | |
iterator allValues*[A, B](t: Table[A, B]; key: A): B = | |
## iterates over any value in the table `t` that belongs to the given `key`. | |
var h: THash = hash(key) and high(t.data) | |
while t.data[h].slot != seEmpty: | |
if t.data[h].key == key and t.data[h].slot == seFilled: | |
yield t.data[h].val | |
h = nextTry(h, high(t.data)) | |
proc hasKey*[A, B](t: Table[A, B], key: A): bool = | |
## returns true iff `key` is in the table `t`. | |
result = rawGet(t, key) >= 0 | |
proc rawInsert[A, B](t: var Table[A, B], data: var KeyValuePairSeq[A, B], | |
key: A, val: B) = | |
rawInsertImpl() | |
proc enlarge[A, B](t: var Table[A, B]) = | |
var n: KeyValuePairSeq[A, B] | |
newSeq(n, len(t.data) * growthFactor) | |
for i in countup(0, high(t.data)): | |
if t.data[i].slot == seFilled: rawInsert(t, n, t.data[i].key, t.data[i].val) | |
swap(t.data, n) | |
template addImpl() {.dirty.} = | |
if mustRehash(len(t.data), t.counter): enlarge(t) | |
rawInsert(t, t.data, key, val) | |
inc(t.counter) | |
template putImpl() {.dirty.} = | |
var index = rawGet(t, key) | |
if index >= 0: | |
t.data[index].val = val | |
else: | |
addImpl() | |
when false: | |
# not yet used: | |
template hasKeyOrPutImpl() {.dirty.} = | |
var index = rawGet(t, key) | |
if index >= 0: | |
t.data[index].val = val | |
result = true | |
else: | |
if mustRehash(len(t.data), t.counter): enlarge(t) | |
rawInsert(t, t.data, key, val) | |
inc(t.counter) | |
result = false | |
proc `[]=`*[A, B](t: var Table[A, B], key: A, val: B) = | |
## puts a (key, value)-pair into `t`. | |
putImpl() | |
proc add*[A, B](t: var Table[A, B], key: A, val: B) = | |
## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists. | |
addImpl() | |
proc del*[A, B](t: var Table[A, B], key: A) = | |
## deletes `key` from hash table `t`. | |
let index = rawGet(t, key) | |
if index >= 0: | |
t.data[index].slot = seDeleted | |
dec(t.counter) | |
proc initTable*[A, B](initialSize=64): Table[A, B] = | |
## creates a new hash table that is empty. | |
## | |
## `initialSize` needs to be a power of two. If you need to accept runtime | |
## values for this you could use the ``nextPowerOfTwo`` proc from the | |
## `math <math.html>`_ module. | |
assert isPowerOfTwo(initialSize) | |
result.counter = 0 | |
newSeq(result.data, initialSize) | |
proc toTable*[A, B](pairs: openArray[tuple[key: A, | |
val: B]]): Table[A, B] = | |
## creates a new hash table that contains the given `pairs`. | |
result = initTable[A, B](nextPowerOfTwo(pairs.len+10)) | |
for key, val in items(pairs): result[key] = val | |
template dollarImpl(): stmt {.dirty.} = | |
if t.len == 0: | |
result = "{:}" | |
else: | |
result = "{" | |
for key, val in pairs(t): | |
if result.len > 1: result.add(", ") | |
result.add($key) | |
result.add(": ") | |
result.add($val) | |
result.add("}") | |
proc `$`*[A, B](t: Table[A, B]): string = | |
## The `$` operator for hash tables. | |
dollarImpl() | |
template equalsImpl() = | |
if s.counter == t.counter: | |
# different insertion orders mean different 'data' seqs, so we have | |
# to use the slow route here: | |
for key, val in s: | |
# prefix notation leads to automatic dereference in case of PTable | |
if not t.hasKey(key): return false | |
if t[key] != val: return false | |
return true | |
proc `==`*[A, B](s, t: Table[A, B]): bool = | |
equalsImpl() | |
proc indexBy*[A, B, C](collection: A, index: proc(x: B): C): Table[C, B] = | |
## Index the collection with the proc provided. | |
# TODO: As soon as supported, change collection: A to collection: A[B] | |
result = initTable[C, B]() | |
for item in collection: | |
result[index(item)] = item | |
proc len*[A, B](t: TableRef[A, B]): int = | |
## returns the number of keys in `t`. | |
result = t.counter | |
iterator pairs*[A, B](t: TableRef[A, B]): tuple[key: A, val: B] = | |
## iterates over any (key, value) pair in the table `t`. | |
for h in 0..high(t.data): | |
if t.data[h].slot == seFilled: yield (t.data[h].key, t.data[h].val) | |
iterator mpairs*[A, B](t: TableRef[A, B]): tuple[key: A, val: var B] = | |
## iterates over any (key, value) pair in the table `t`. The values | |
## can be modified. | |
for h in 0..high(t.data): | |
if t.data[h].slot == seFilled: yield (t.data[h].key, t.data[h].val) | |
iterator keys*[A, B](t: TableRef[A, B]): A = | |
## iterates over any key in the table `t`. | |
for h in 0..high(t.data): | |
if t.data[h].slot == seFilled: yield t.data[h].key | |
iterator values*[A, B](t: TableRef[A, B]): B = | |
## iterates over any value in the table `t`. | |
for h in 0..high(t.data): | |
if t.data[h].slot == seFilled: yield t.data[h].val | |
iterator mvalues*[A, B](t: TableRef[A, B]): var B = | |
## iterates over any value in the table `t`. The values can be modified. | |
for h in 0..high(t.data): | |
if t.data[h].slot == seFilled: yield t.data[h].val | |
proc `[]`*[A, B](t: TableRef[A, B], key: A): B = | |
## retrieves the value at ``t[key]``. If `key` is not in `t`, | |
## default empty value for the type `B` is returned | |
## and no exception is raised. One can check with ``hasKey`` whether the key | |
## exists. | |
result = t[][key] | |
proc mget*[A, B](t: TableRef[A, B], key: A): var B = | |
## retrieves the value at ``t[key]``. The value can be modified. | |
## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. | |
t[].mget(key) | |
proc hasKey*[A, B](t: TableRef[A, B], key: A): bool = | |
## returns true iff `key` is in the table `t`. | |
result = t[].hasKey(key) | |
proc `[]=`*[A, B](t: TableRef[A, B], key: A, val: B) = | |
## puts a (key, value)-pair into `t`. | |
t[][key] = val | |
proc add*[A, B](t: TableRef[A, B], key: A, val: B) = | |
## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists. | |
t[].add(key, val) | |
proc del*[A, B](t: TableRef[A, B], key: A) = | |
## deletes `key` from hash table `t`. | |
t[].del(key) | |
proc newTable*[A, B](initialSize=64): TableRef[A, B] = | |
new(result) | |
result[] = initTable[A, B](initialSize) | |
proc newTable*[A, B](pairs: openArray[tuple[key: A, val: B]]): TableRef[A, B] = | |
## creates a new hash table that contains the given `pairs`. | |
new(result) | |
result[] = toTable[A, B](pairs) | |
proc `$`*[A, B](t: TableRef[A, B]): string = | |
## The `$` operator for hash tables. | |
dollarImpl() | |
proc `==`*[A, B](s, t: TableRef[A, B]): bool = | |
if isNil(s): result = isNil(t) | |
elif isNil(t): result = false | |
else: result = equalsImpl() | |
proc newTableFrom*[A, B, C](collection: A, index: proc(x: B): C): TableRef[C, B] = | |
## Index the collection with the proc provided. | |
# TODO: As soon as supported, change collection: A to collection: A[B] | |
result = newTable[C, B]() | |
for item in collection: | |
result[index(item)] = item | |
# ------------------------------ ordered table ------------------------------ | |
type | |
OrderedKeyValuePair[A, B] = tuple[ | |
slot: SlotEnum, next: int, key: A, val: B] | |
OrderedKeyValuePairSeq[A, B] = seq[OrderedKeyValuePair[A, B]] | |
OrderedTable* {. | |
myShallow.}[A, B] = object ## table that remembers insertion order | |
data: OrderedKeyValuePairSeq[A, B] | |
counter, first, last: int | |
OrderedTableRef*[A, B] = ref OrderedTable[A, B] | |
{.deprecated: [TOrderedTable: OrderedTable, POrderedTable: OrderedTableRef].} | |
proc len*[A, B](t: OrderedTable[A, B]): int {.inline.} = | |
## returns the number of keys in `t`. | |
result = t.counter | |
template forAllOrderedPairs(yieldStmt: stmt) {.dirty, immediate.} = | |
var h = t.first | |
while h >= 0: | |
var nxt = t.data[h].next | |
if t.data[h].slot == seFilled: yieldStmt | |
h = nxt | |
iterator pairs*[A, B](t: OrderedTable[A, B]): tuple[key: A, val: B] = | |
## iterates over any (key, value) pair in the table `t` in insertion | |
## order. | |
forAllOrderedPairs: | |
yield (t.data[h].key, t.data[h].val) | |
iterator mpairs*[A, B](t: var OrderedTable[A, B]): tuple[key: A, val: var B] = | |
## iterates over any (key, value) pair in the table `t` in insertion | |
## order. The values can be modified. | |
forAllOrderedPairs: | |
yield (t.data[h].key, t.data[h].val) | |
iterator keys*[A, B](t: OrderedTable[A, B]): A = | |
## iterates over any key in the table `t` in insertion order. | |
forAllOrderedPairs: | |
yield t.data[h].key | |
iterator values*[A, B](t: OrderedTable[A, B]): B = | |
## iterates over any value in the table `t` in insertion order. | |
forAllOrderedPairs: | |
yield t.data[h].val | |
iterator mvalues*[A, B](t: var OrderedTable[A, B]): var B = | |
## iterates over any value in the table `t` in insertion order. The values | |
## can be modified. | |
forAllOrderedPairs: | |
yield t.data[h].val | |
proc rawGet[A, B](t: OrderedTable[A, B], key: A): int = | |
rawGetImpl() | |
proc `[]`*[A, B](t: OrderedTable[A, B], key: A): B = | |
## retrieves the value at ``t[key]``. If `key` is not in `t`, | |
## default empty value for the type `B` is returned | |
## and no exception is raised. One can check with ``hasKey`` whether the key | |
## exists. | |
var index = rawGet(t, key) | |
if index >= 0: result = t.data[index].val | |
proc mget*[A, B](t: var OrderedTable[A, B], key: A): var B = | |
## retrieves the value at ``t[key]``. The value can be modified. | |
## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. | |
var index = rawGet(t, key) | |
if index >= 0: result = t.data[index].val | |
else: raise newException(KeyError, "key not found: " & $key) | |
proc hasKey*[A, B](t: OrderedTable[A, B], key: A): bool = | |
## returns true iff `key` is in the table `t`. | |
result = rawGet(t, key) >= 0 | |
proc rawInsert[A, B](t: var OrderedTable[A, B], | |
data: var OrderedKeyValuePairSeq[A, B], | |
key: A, val: B) = | |
rawInsertImpl() | |
data[h].next = -1 | |
if t.first < 0: t.first = h | |
if t.last >= 0: data[t.last].next = h | |
t.last = h | |
proc enlarge[A, B](t: var OrderedTable[A, B]) = | |
var n: OrderedKeyValuePairSeq[A, B] | |
newSeq(n, len(t.data) * growthFactor) | |
var h = t.first | |
t.first = -1 | |
t.last = -1 | |
while h >= 0: | |
var nxt = t.data[h].next | |
if t.data[h].slot == seFilled: | |
rawInsert(t, n, t.data[h].key, t.data[h].val) | |
h = nxt | |
swap(t.data, n) | |
proc `[]=`*[A, B](t: var OrderedTable[A, B], key: A, val: B) = | |
## puts a (key, value)-pair into `t`. | |
putImpl() | |
proc add*[A, B](t: var OrderedTable[A, B], key: A, val: B) = | |
## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists. | |
addImpl() | |
proc initOrderedTable*[A, B](initialSize=64): OrderedTable[A, B] = | |
## creates a new ordered hash table that is empty. | |
## | |
## `initialSize` needs to be a power of two. If you need to accept runtime | |
## values for this you could use the ``nextPowerOfTwo`` proc from the | |
## `math <math.html>`_ module. | |
assert isPowerOfTwo(initialSize) | |
result.counter = 0 | |
result.first = -1 | |
result.last = -1 | |
newSeq(result.data, initialSize) | |
proc toOrderedTable*[A, B](pairs: openArray[tuple[key: A, | |
val: B]]): OrderedTable[A, B] = | |
## creates a new ordered hash table that contains the given `pairs`. | |
result = initOrderedTable[A, B](nextPowerOfTwo(pairs.len+10)) | |
for key, val in items(pairs): result[key] = val | |
proc `$`*[A, B](t: OrderedTable[A, B]): string = | |
## The `$` operator for ordered hash tables. | |
dollarImpl() | |
proc sort*[A, B](t: var OrderedTable[A, B], | |
cmp: proc (x,y: tuple[key: A, val: B]): int) = | |
## sorts `t` according to `cmp`. This modifies the internal list | |
## that kept the insertion order, so insertion order is lost after this | |
## call but key lookup and insertions remain possible after `sort` (in | |
## contrast to the `sort` for count tables). | |
var list = t.first | |
var | |
p, q, e, tail, oldhead: int | |
nmerges, psize, qsize, i: int | |
if t.counter == 0: return | |
var insize = 1 | |
while true: | |
p = list; oldhead = list | |
list = -1; tail = -1; nmerges = 0 | |
while p >= 0: | |
inc(nmerges) | |
q = p | |
psize = 0 | |
i = 0 | |
while i < insize: | |
inc(psize) | |
q = t.data[q].next | |
if q < 0: break | |
inc(i) | |
qsize = insize | |
while psize > 0 or (qsize > 0 and q >= 0): | |
if psize == 0: | |
e = q; q = t.data[q].next; dec(qsize) | |
elif qsize == 0 or q < 0: | |
e = p; p = t.data[p].next; dec(psize) | |
elif cmp((t.data[p].key, t.data[p].val), | |
(t.data[q].key, t.data[q].val)) <= 0: | |
e = p; p = t.data[p].next; dec(psize) | |
else: | |
e = q; q = t.data[q].next; dec(qsize) | |
if tail >= 0: t.data[tail].next = e | |
else: list = e | |
tail = e | |
p = q | |
t.data[tail].next = -1 | |
if nmerges <= 1: break | |
insize = insize * 2 | |
t.first = list | |
t.last = tail | |
proc len*[A, B](t: OrderedTableRef[A, B]): int {.inline.} = | |
## returns the number of keys in `t`. | |
result = t.counter | |
template forAllOrderedPairs(yieldStmt: stmt) {.dirty, immediate.} = | |
var h = t.first | |
while h >= 0: | |
var nxt = t.data[h].next | |
if t.data[h].slot == seFilled: yieldStmt | |
h = nxt | |
iterator pairs*[A, B](t: OrderedTableRef[A, B]): tuple[key: A, val: B] = | |
## iterates over any (key, value) pair in the table `t` in insertion | |
## order. | |
forAllOrderedPairs: | |
yield (t.data[h].key, t.data[h].val) | |
iterator mpairs*[A, B](t: OrderedTableRef[A, B]): tuple[key: A, val: var B] = | |
## iterates over any (key, value) pair in the table `t` in insertion | |
## order. The values can be modified. | |
forAllOrderedPairs: | |
yield (t.data[h].key, t.data[h].val) | |
iterator keys*[A, B](t: OrderedTableRef[A, B]): A = | |
## iterates over any key in the table `t` in insertion order. | |
forAllOrderedPairs: | |
yield t.data[h].key | |
iterator values*[A, B](t: OrderedTableRef[A, B]): B = | |
## iterates over any value in the table `t` in insertion order. | |
forAllOrderedPairs: | |
yield t.data[h].val | |
iterator mvalues*[A, B](t: OrderedTableRef[A, B]): var B = | |
## iterates over any value in the table `t` in insertion order. The values | |
## can be modified. | |
forAllOrderedPairs: | |
yield t.data[h].val | |
proc `[]`*[A, B](t: OrderedTableRef[A, B], key: A): B = | |
## retrieves the value at ``t[key]``. If `key` is not in `t`, | |
## default empty value for the type `B` is returned | |
## and no exception is raised. One can check with ``hasKey`` whether the key | |
## exists. | |
result = t[][key] | |
proc mget*[A, B](t: OrderedTableRef[A, B], key: A): var B = | |
## retrieves the value at ``t[key]``. The value can be modified. | |
## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. | |
result = t[].mget(key) | |
proc hasKey*[A, B](t: OrderedTableRef[A, B], key: A): bool = | |
## returns true iff `key` is in the table `t`. | |
result = t[].hasKey(key) | |
proc `[]=`*[A, B](t: OrderedTableRef[A, B], key: A, val: B) = | |
## puts a (key, value)-pair into `t`. | |
t[][key] = val | |
proc add*[A, B](t: OrderedTableRef[A, B], key: A, val: B) = | |
## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists. | |
t[].add(key, val) | |
proc newOrderedTable*[A, B](initialSize=64): OrderedTableRef[A, B] = | |
## creates a new ordered hash table that is empty. | |
## | |
## `initialSize` needs to be a power of two. If you need to accept runtime | |
## values for this you could use the ``nextPowerOfTwo`` proc from the | |
## `math <math.html>`_ module. | |
new(result) | |
result[] = initOrderedTable[A, B]() | |
proc newOrderedTable*[A, B](pairs: openArray[tuple[key: A, | |
val: B]]): OrderedTableRef[A, B] = | |
## creates a new ordered hash table that contains the given `pairs`. | |
result = newOrderedTable[A, B](nextPowerOfTwo(pairs.len+10)) | |
for key, val in items(pairs): result[key] = val | |
proc `$`*[A, B](t: OrderedTableRef[A, B]): string = | |
## The `$` operator for ordered hash tables. | |
dollarImpl() | |
proc sort*[A, B](t: OrderedTableRef[A, B], | |
cmp: proc (x,y: tuple[key: A, val: B]): int) = | |
## sorts `t` according to `cmp`. This modifies the internal list | |
## that kept the insertion order, so insertion order is lost after this | |
## call but key lookup and insertions remain possible after `sort` (in | |
## contrast to the `sort` for count tables). | |
t[].sort(cmp) | |
# ------------------------------ count tables ------------------------------- | |
type | |
CountTable* {.myShallow.}[ | |
A] = object ## table that counts the number of each key | |
data: seq[tuple[key: A, val: int]] | |
counter: int | |
CountTableRef*[A] = ref CountTable[A] | |
{.deprecated: [TCountTable: CountTable, PCountTable: CountTableRef].} | |
proc len*[A](t: CountTable[A]): int = | |
## returns the number of keys in `t`. | |
result = t.counter | |
iterator pairs*[A](t: CountTable[A]): tuple[key: A, val: int] = | |
## iterates over any (key, value) pair in the table `t`. | |
for h in 0..high(t.data): | |
if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val) | |
iterator mpairs*[A](t: var CountTable[A]): tuple[key: A, val: var int] = | |
## iterates over any (key, value) pair in the table `t`. The values can | |
## be modified. | |
for h in 0..high(t.data): | |
if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val) | |
iterator keys*[A](t: CountTable[A]): A = | |
## iterates over any key in the table `t`. | |
for h in 0..high(t.data): | |
if t.data[h].val != 0: yield t.data[h].key | |
iterator values*[A](t: CountTable[A]): int = | |
## iterates over any value in the table `t`. | |
for h in 0..high(t.data): | |
if t.data[h].val != 0: yield t.data[h].val | |
iterator mvalues*[A](t: CountTable[A]): var int = | |
## iterates over any value in the table `t`. The values can be modified. | |
for h in 0..high(t.data): | |
if t.data[h].val != 0: yield t.data[h].val | |
proc rawGet[A](t: CountTable[A], key: A): int = | |
var h: THash = hash(key) and high(t.data) # start with real hash value | |
while t.data[h].val != 0: | |
if t.data[h].key == key: return h | |
h = nextTry(h, high(t.data)) | |
result = -1 | |
proc `[]`*[A](t: CountTable[A], key: A): int = | |
## retrieves the value at ``t[key]``. If `key` is not in `t`, | |
## 0 is returned. One can check with ``hasKey`` whether the key | |
## exists. | |
var index = rawGet(t, key) | |
if index >= 0: result = t.data[index].val | |
proc mget*[A](t: var CountTable[A], key: A): var int = | |
## retrieves the value at ``t[key]``. The value can be modified. | |
## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. | |
var index = rawGet(t, key) | |
if index >= 0: result = t.data[index].val | |
else: raise newException(KeyError, "key not found: " & $key) | |
proc hasKey*[A](t: CountTable[A], key: A): bool = | |
## returns true iff `key` is in the table `t`. | |
result = rawGet(t, key) >= 0 | |
proc rawInsert[A](t: CountTable[A], data: var seq[tuple[key: A, val: int]], | |
key: A, val: int) = | |
var h: THash = hash(key) and high(data) | |
while data[h].val != 0: h = nextTry(h, high(data)) | |
data[h].key = key | |
data[h].val = val | |
proc enlarge[A](t: var CountTable[A]) = | |
var n: seq[tuple[key: A, val: int]] | |
newSeq(n, len(t.data) * growthFactor) | |
for i in countup(0, high(t.data)): | |
if t.data[i].val != 0: rawInsert(t, n, t.data[i].key, t.data[i].val) | |
swap(t.data, n) | |
proc `[]=`*[A](t: var CountTable[A], key: A, val: int) = | |
## puts a (key, value)-pair into `t`. `val` has to be positive. | |
assert val > 0 | |
putImpl() | |
proc initCountTable*[A](initialSize=64): CountTable[A] = | |
## creates a new count table that is empty. | |
## | |
## `initialSize` needs to be a power of two. If you need to accept runtime | |
## values for this you could use the ``nextPowerOfTwo`` proc from the | |
## `math <math.html>`_ module. | |
assert isPowerOfTwo(initialSize) | |
result.counter = 0 | |
newSeq(result.data, initialSize) | |
proc toCountTable*[A](keys: openArray[A]): CountTable[A] = | |
## creates a new count table with every key in `keys` having a count of 1. | |
result = initCountTable[A](nextPowerOfTwo(keys.len+10)) | |
for key in items(keys): result[key] = 1 | |
proc `$`*[A](t: CountTable[A]): string = | |
## The `$` operator for count tables. | |
dollarImpl() | |
proc inc*[A](t: var CountTable[A], key: A, val = 1) = | |
## increments `t[key]` by `val`. | |
var index = rawGet(t, key) | |
if index >= 0: | |
inc(t.data[index].val, val) | |
else: | |
if mustRehash(len(t.data), t.counter): enlarge(t) | |
rawInsert(t, t.data, key, val) | |
inc(t.counter) | |
proc smallest*[A](t: CountTable[A]): tuple[key: A, val: int] = | |
## returns the largest (key,val)-pair. Efficiency: O(n) | |
assert t.len > 0 | |
var minIdx = 0 | |
for h in 1..high(t.data): | |
if t.data[h].val > 0 and t.data[minIdx].val > t.data[h].val: minIdx = h | |
result.key = t.data[minIdx].key | |
result.val = t.data[minIdx].val | |
proc largest*[A](t: CountTable[A]): tuple[key: A, val: int] = | |
## returns the (key,val)-pair with the largest `val`. Efficiency: O(n) | |
assert t.len > 0 | |
var maxIdx = 0 | |
for h in 1..high(t.data): | |
if t.data[maxIdx].val < t.data[h].val: maxIdx = h | |
result.key = t.data[maxIdx].key | |
result.val = t.data[maxIdx].val | |
proc sort*[A](t: var CountTable[A]) = | |
## sorts the count table so that the entry with the highest counter comes | |
## first. This is destructive! You must not modify `t` afterwards! | |
## You can use the iterators `pairs`, `keys`, and `values` to iterate over | |
## `t` in the sorted order. | |
# we use shellsort here; fast enough and simple | |
var h = 1 | |
while true: | |
h = 3 * h + 1 | |
if h >= high(t.data): break | |
while true: | |
h = h div 3 | |
for i in countup(h, high(t.data)): | |
var j = i | |
while t.data[j-h].val <= t.data[j].val: | |
swap(t.data[j], t.data[j-h]) | |
j = j-h | |
if j < h: break | |
if h == 1: break | |
proc len*[A](t: CountTableRef[A]): int = | |
## returns the number of keys in `t`. | |
result = t.counter | |
iterator pairs*[A](t: CountTableRef[A]): tuple[key: A, val: int] = | |
## iterates over any (key, value) pair in the table `t`. | |
for h in 0..high(t.data): | |
if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val) | |
iterator mpairs*[A](t: CountTableRef[A]): tuple[key: A, val: var int] = | |
## iterates over any (key, value) pair in the table `t`. The values can | |
## be modified. | |
for h in 0..high(t.data): | |
if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val) | |
iterator keys*[A](t: CountTableRef[A]): A = | |
## iterates over any key in the table `t`. | |
for h in 0..high(t.data): | |
if t.data[h].val != 0: yield t.data[h].key | |
iterator values*[A](t: CountTableRef[A]): int = | |
## iterates over any value in the table `t`. | |
for h in 0..high(t.data): | |
if t.data[h].val != 0: yield t.data[h].val | |
iterator mvalues*[A](t: CountTableRef[A]): var int = | |
## iterates over any value in the table `t`. The values can be modified. | |
for h in 0..high(t.data): | |
if t.data[h].val != 0: yield t.data[h].val | |
proc `[]`*[A](t: CountTableRef[A], key: A): int = | |
## retrieves the value at ``t[key]``. If `key` is not in `t`, | |
## 0 is returned. One can check with ``hasKey`` whether the key | |
## exists. | |
result = t[][key] | |
proc mget*[A](t: CountTableRef[A], key: A): var int = | |
## retrieves the value at ``t[key]``. The value can be modified. | |
## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. | |
result = t[].mget(key) | |
proc hasKey*[A](t: CountTableRef[A], key: A): bool = | |
## returns true iff `key` is in the table `t`. | |
result = t[].hasKey(key) | |
proc `[]=`*[A](t: CountTableRef[A], key: A, val: int) = | |
## puts a (key, value)-pair into `t`. `val` has to be positive. | |
assert val > 0 | |
t[][key] = val | |
proc newCountTable*[A](initialSize=64): CountTableRef[A] = | |
## creates a new count table that is empty. | |
## | |
## `initialSize` needs to be a power of two. If you need to accept runtime | |
## values for this you could use the ``nextPowerOfTwo`` proc from the | |
## `math <math.html>`_ module. | |
new(result) | |
result[] = initCountTable[A](initialSize) | |
proc newCountTable*[A](keys: openArray[A]): CountTableRef[A] = | |
## creates a new count table with every key in `keys` having a count of 1. | |
result = newCountTable[A](nextPowerOfTwo(keys.len+10)) | |
for key in items(keys): result[key] = 1 | |
proc `$`*[A](t: CountTableRef[A]): string = | |
## The `$` operator for count tables. | |
dollarImpl() | |
proc inc*[A](t: CountTableRef[A], key: A, val = 1) = | |
## increments `t[key]` by `val`. | |
t[].inc(key, val) | |
proc smallest*[A](t: CountTableRef[A]): tuple[key: A, val: int] = | |
## returns the largest (key,val)-pair. Efficiency: O(n) | |
t[].smallest | |
proc largest*[A](t: CountTableRef[A]): tuple[key: A, val: int] = | |
## returns the (key,val)-pair with the largest `val`. Efficiency: O(n) | |
t[].largest | |
proc sort*[A](t: CountTableRef[A]) = | |
## sorts the count table so that the entry with the highest counter comes | |
## first. This is destructive! You must not modify `t` afterwards! | |
## You can use the iterators `pairs`, `keys`, and `values` to iterate over | |
## `t` in the sorted order. | |
t[].sort | |
when isMainModule: | |
type | |
Person = object | |
firstName, lastName: string | |
proc hash(x: Person): THash = | |
## Piggyback on the already available string hash proc. | |
## | |
## Without this proc nothing works! | |
result = x.firstName.hash !& x.lastName.hash | |
result = !$result | |
var | |
salaries = initTable[Person, int]() | |
p1, p2: Person | |
p1.firstName = "Jon" | |
p1.lastName = "Ross" | |
salaries[p1] = 30_000 | |
p2.firstName = "소진" | |
p2.lastName = "박" | |
salaries[p2] = 45_000 | |
var | |
s2 = initOrderedTable[Person, int]() | |
s3 = initCountTable[Person]() | |
s2[p1] = 30_000 | |
s2[p2] = 45_000 | |
s3[p1] = 30_000 | |
s3[p2] = 45_000 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import current_tables | |
var substr_counts: CountTable[string] = initCountTable[string]() | |
var my_string = "Hello, this is sadly broken for strings over 64 characters. Note that it *does* appear to work for short strings." | |
# var my_string = "Like, this would work" | |
let s_len = 32 | |
for i in 0..(my_string.len - s_len): | |
let s = my_string[i..i+s_len-1] | |
substr_counts[s] = 1 | |
# substr_counts.inc(s) # This works | |
echo "Iteration ", i | |
echo "Done." |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import old_tables | |
var substr_counts: CountTable[string] = initCountTable[string]() | |
var my_string = "Hello, this is sadly broken for strings over 64 characters. Note that it *does* appear to work for short strings." | |
# var my_string = "Like, this would work" | |
let s_len = 32 | |
for i in 0..(my_string.len - s_len): | |
let s = my_string[i..i+s_len-1] | |
substr_counts[s] = 1 | |
# substr_counts.inc(s) # This works | |
echo "Iteration ", i | |
echo "Done." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment