Skip to content

Instantly share code, notes, and snippets.

@josch
Last active July 12, 2021 07:54
Show Gist options
  • Save josch/5714060 to your computer and use it in GitHub Desktop.
Save josch/5714060 to your computer and use it in GitHub Desktop.
pearson hash and dolch word list experiments
from random import shuffle
from itertools import permutations
# dolch word list excluding "Santa Claus" (94 words)
words = [ "apple", "baby", "back", "ball", "bear", "bed", "bell", "bird",
"birthday", "boat", "box", "boy", "bread", "brother", "cake", "car",
"cat", "chair", "chicken", "children", "christmas", "coat", "corn",
"cow", "day", "dog", "doll", "door", "duck", "egg", "eye", "farm",
"farmer", "father", "feet", "fire", "fish", "floor", "flower",
"game", "garden", "girl", "good-bye", "grass", "ground", "hand",
"head", "hill", "home", "horse", "house", "kitty", "leg", "letter",
"man", "men", "milk", "money", "morning", "mother", "name", "nest",
"night", "paper", "party", "picture", "pig", "rabbit", "rain",
"ring", "robin", "school", "seed", "sheep", "shoe", "sister", "snow",
"song", "squirrel", "stick", "street", "sun", "table", "thing",
"time", "top", "toy", "tree", "watch", "water", "way", "wind",
"window", "wood"]
# permutation of range(256) that maps all words of the dolch word list above to
# a UNIQUE hash using the pearson hash function
lookup = [ 235, 1, 182, 92, 164, 183, 229, 88, 157, 44, 207, 28, 168, 51, 2,
132, 31, 11, 162, 251, 201, 158, 134, 23, 96, 85, 184, 143, 141, 98,
197, 12, 214, 111, 127, 131, 186, 243, 255, 60, 82, 215, 17, 179,
57, 73, 39, 78, 189, 198, 5, 84, 83, 63, 178, 7, 95, 194, 105, 32,
191, 38, 155, 55, 219, 218, 205, 54, 234, 169, 226, 249, 204, 159,
21, 112, 254, 145, 170, 163, 48, 34, 172, 102, 140, 122, 248, 181,
52, 239, 37, 115, 236, 153, 101, 180, 68, 116, 138, 53, 165, 14, 71,
65, 22, 10, 177, 135, 192, 252, 208, 133, 223, 230, 25, 46, 188,
113, 90, 43, 142, 9, 61, 47, 70, 137, 245, 253, 233, 244, 0, 247,
62, 16, 110, 126, 79, 76, 13, 58, 124, 15, 149, 190, 220, 45, 87,
228, 222, 59, 147, 108, 206, 160, 185, 77, 19, 200, 144, 210, 150,
136, 212, 104, 97, 250, 154, 139, 106, 199, 217, 176, 30, 3, 117,
225, 20, 125, 227, 81, 161, 128, 123, 121, 195, 231, 224, 50, 146,
67, 86, 203, 246, 166, 187, 167, 119, 114, 99, 173, 240, 130, 148,
40, 109, 64, 24, 242, 129, 4, 8, 72, 213, 35, 18, 152, 237, 29, 211,
94, 238, 69, 221, 80, 209, 91, 156, 193, 100, 33, 232, 118, 27, 171,
41, 174, 107, 6, 49, 66, 36, 74, 120, 241, 216, 89, 93, 56, 103, 42,
202, 151, 26, 196, 175, 75]
# python snippet that found above permutation
"""
lookup = range(256)
while 1:
shuffle(lookup)
hlist = list()
valid = True
for w in words:
h = 0
for c in w:
h = lookup[h ^ ord(c)]
if h in hlist:
valid = False
break
hlist.append(h)
if valid:
print lookup
break
"""
# 162 words combined from the dolch word list that will fill the remaining gaps
# in the hash values generated by the 94 original words to in the end have 256
# words mapping uniquely to the numbers from 0 to 255 using the pearson hash
# algorithm
words_add = ['man-snow', 'morning-sheep', 'hand-boy', 'wood-garden',
'school-window', 'ground-man', 'duck-morning', 'squirrel-box',
'corn-boat', 'night-rabbit', 'corn-fire', 'man-top', 'bird-duck',
'nest-box', 'ground-fish', 'mother-squirrel', 'rabbit-morning',
'time-pig', 'girl-corn', 'coat-mother', 'garden-name', 'sun-cake',
'hand-nest', 'kitty-brother', 'shoe-hill', 'kitty-car',
'christmas-robin', 'flower-farmer', 'cow-fish', 'ground-tree',
'street-morning', 'bird-leg', 'ring-kitty', 'pig-bear',
'horse-duck', 'house-bed', 'snow-boat', 'sun-birthday',
'way-flower', 'house-night', 'stick-home', 'back-paper',
'cat-apple', 'mother-farmer', 'house-robin', 'party-men',
'paper-rabbit', 'grass-morning', 'sun-chicken', 'apple-rabbit',
'dog-money', 'flower-sun', 'nest-dog', 'paper-chicken',
'paper-watch', 'farm-apple', 'box-tree', 'corn-chicken',
'boat-letter', 'good-bye-song', 'bell-home', 'apple-bed',
'leg-boat', 'box-robin', 'head-table', 'cat-bread', 'head-rain',
'dog-door', 'tree-cow', 'back-cat', 'party-feet', 'bed-flower',
'house-horse', 'snow-garden', 'duck-hill', 'birthday-money',
'sun-men', 'seed-home', 'tree-feet', 'table-time', 'feet-robin',
'robin-fire', 'letter-bird', 'bird-ball', 'seed-leg', 'snow-men',
'farm-bell', 'chicken-street', 'stick-day', 'garden-rain',
'garden-seed', 'feet-duck', 'back-mother', 'ring-sister',
'game-ring', 'leg-man', 'bread-sheep', 'pig-apple', 'bread-horse',
'corn-feet', 'chair-stick', 'farm-fire', 'bird-christmas',
'fish-bell', 'cat-hand', 'rain-hill', 'toy-snow', 'party-wood',
'eye-bear', 'farm-picture', 'leg-duck', 'rabbit-window',
'garden-party', 'rabbit-baby', 'shoe-robin', 'coat-bed',
'tree-game', 'kitty-father', 'robin-men', 'day-milk',
'cake-morning', 'chair-rain', 'seed-car', 'flower-chair',
'cow-chair', 'bell-boy', 'floor-school', 'door-doll',
'letter-man', 'thing-feet', 'christmas-nest', 'window-squirrel',
'seed-snow', 'school-night', 'eye-money', 'kitty-house',
'shoe-children', 'egg-boat', 'flower-farm', 'apple-leg',
'good-bye-sun', 'ground-robin', 'good-bye-tree', 'chair-fish',
'night-wood', 'boat-farmer', 'pig-nest', 'party-kitty',
'sheep-coat', 'song-night', 'cow-head', 'back-girl', 'shoe-head',
'ball-song', 'wind-paper', 'children-home', 'bell-time',
'stick-nest', 'sister-kitty', 'bell-picture', 'street-school',
'song-dog']
# python snippet that was used to generate above list
"""
keys = list()
for w in words:
h = 0
for c in w:
h = lookup[h ^ ord(c)]
keys.append(h)
add_keys = list()
for k in range(256):
if k not in keys:
shuffle(words)
for perm in permutations(words, 2):
perm = "-".join(perm)
h = 0
for c in perm:
h = lookup[h ^ ord(c)]
if h == k:
add_keys.append(perm)
break
print add_keys
"""
# print out the mapping from hash value to word of the aqcuiered wordlists
combined = [0]*256
for w in words+words_add:
h = 0
for c in w:
h = lookup[h ^ ord(c)]
combined[h] = w
for h, w in enumerate(combined):
print("%d: %s"%(h, w))
@josch
Copy link
Author

josch commented Jul 12, 2021

This is an oldie, and I am using it modified, but the following line needs correction.

Indeed this was so old that it wasn't ready for python3 yet -- I wonder why people find this still useful?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment