Created
May 10, 2019 09:21
-
-
Save mratsim/e5a2d1d74adc2763ab7b080a7c40ef1e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import hashes, sets, random, sequtils, times | |
const IntSize = sizeof(int) | |
proc randomString(len: Natural): string = | |
result = newString(len) | |
for c in result.mitems: | |
c = rand('a'..'z') | |
proc sum[T](x: seq[T]): T = | |
foldr(x, a+b) | |
template multibyteHashImpl(result: Hash, x: typed, start, stop: int) = | |
var i = start | |
while i <= stop+1 - IntSize: | |
let n = cast[ptr int](unsafeAddr x[i])[] | |
result = result !& n | |
i += IntSize | |
while i <= stop: | |
result = result !& ord(x[i]) | |
inc i | |
result = !$result | |
proc newHash*(x: string): Hash = | |
## Efficient hashing of strings. | |
## | |
## See also: | |
## * `hashIgnoreStyle <#hashIgnoreStyle,string>`_ | |
## * `hashIgnoreCase <#hashIgnoreCase,string>`_ | |
runnableExamples: | |
doAssert hash("abracadabra") != hash("AbracadabrA") | |
multibyteHashImpl(result, x, 0, high(x)) | |
const | |
Amount = 20_000 | |
Repetitions = 48 | |
StringSize = 9 | |
proc hashTesting(wordlist: seq[string], f: proc (x: string): Hash, name: string): float = | |
var | |
collisions = newSeq[int](Repetitions) | |
highest = newSeq[int](Repetitions) | |
times = newSeq[float](Repetitions) | |
for i in 0 ..< Repetitions: | |
let words = wordList[Amount*i ..< Amount*i+Amount] | |
var s = initHashSet[int](sets.rightSize(Amount)) | |
var t = cpuTime() | |
for w in words: | |
s.incl f(w) | |
t = cpuTime() - t | |
times[i] = t | |
collisions[i] = Amount - s.len | |
highest[i] = s.toSeq.max | |
echo "--" | |
echo name | |
echo highest.min | |
echo collisions.sum / collisions.len | |
echo "max: ", times.max * 1000.0 | |
echo "min: ", times.min * 1000.0 | |
return times.sum / times.len.float * 1000.0 | |
when isMainModule: | |
let wordlist = newSeqWith(Amount*Repetitions, randomString(StringSize)) | |
echo hashTesting(wordlist, hash, "Old") | |
echo hashTesting(wordlist, newHash, "New") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment