Skip to content

Instantly share code, notes, and snippets.

@ykdojo
Created October 29, 2020 03:16
Show Gist options
  • Save ykdojo/a8619959bfb175c6168c9d83239b4ae6 to your computer and use it in GitHub Desktop.
Save ykdojo/a8619959bfb175c6168c9d83239b4ae6 to your computer and use it in GitHub Desktop.
hash_table.py
# My video where I talk about this data structure: https://youtu.be/sfWyugl4JWA
class Hashtable:
# Assumption: table_length is a prime number (for example, 5, 701, or 30011)
def __init__(self, table_length):
self.table = [None] * table_length
## An internal search function.
# If it finds the given key in the table, it will return (True, index)
# If not, it will return (False, the index where it would be inserted)
# If the table is full, it will return (False, -1)
# If test_mode is true, it will also add a third return value that shows
# the number of elements that have been checked (in the case where
# the key is not found and the table is not full).
# Assumption: key is a string.
def _search(self, key, test_mode = False):
hash1 = hash(key)
m = len(self.table)
initial_i = hash1 % m # initial_i's range: [0, m - 1] (inclusive)
count = 1 # the number of elements that have been checked - only used when test_mode = True.
if not self.table[initial_i]:
if test_mode:
return (False, initial_i, count)
return (False, initial_i)
elif self.table[initial_i][0] == key:
return (True, initial_i)
## We have a collision!
# We're going to deal with it with double-hashing.
# First, create a new hashed value by slightly modifying the input key.
hash2 = hash(key + 'd')
# hash2 = hash1 # I tried this method and it worked as well as the above method.
# hash2 = 0 # This would be linear probing. It did NOT perform as well as the above method.
# Our constraint here: 1 <= c < m
# Note that hash2 % (m - 1) produces a number in the range [0, m - 2] inclusive
c = hash2 % (m - 1) + 1
i = (initial_i + c) % m
while i != initial_i:
count += 1
if not self.table[i]:
if test_mode:
return (False, i, count)
return (False, i)
elif self.table[i][0] == key:
return (True, i)
else:
i = (i + c) % m
return (False, -1) # The table is full.
## Inserts a key value pair. If the key exists, it updates the value.
# If it doesn't exit, it inserts it.
# If the table is full, it returns without doing anything.
# Assumption: key is a string.
# Returns: nothing.
def insert(self, key, value):
result = self._search(key)
if result[1] == -1:
return # The table is full.
# If the key already exists, update the value.
if result[0]:
i = result[1]
self.table[i][1] = value
return
# At this point, we'll know that the given key doesn't exist
# in the table yet, and that result[1] is the index
# where the new key-value pair should be inserted.
i = result[1]
self.table[i] = [key, value]
## Returns the value if the key is found.
# If not, it will return False.
# Assumption: key is a string.
def search(self, key):
result = self._search(key)
if result[1] == -1:
return False # The table is full.
if not result[0]:
return False
i = result[1]
return self.table[i][1]
## I haven't implemented this yet.
# To implement this one with open-addressing (double-hashing),
# you should replace the deleted entry with a dummy value instead of deleting it.
def delete(key):
pass
## The following is for testing the Hashtable class.
if __name__ == "__main__":
ht = Hashtable(5)
ht.insert('key1', 9)
ht.insert('key2', 2)
ht.insert('key3', 10)
ht.insert('key4', 100)
assert not ht.search('key5') # Since this key doesn't exist yet, it should return False.
ht.insert('key5', 10)
assert ht.search('key1') == 9
assert ht.search('key2') == 2
assert ht.search('key3') == 10
assert ht.search('key4') == 100
assert ht.search('key5') == 10
assert not ht.search('key6') # Since this key doesn't exist, it should return False.
# You should be able to update existing values, too.
ht.insert('key3', -1)
ht.insert('key5', 42)
assert ht.search('key3') == -1
assert ht.search('key5') == 42
## The following part is for checking the number of
# elements being checked for un unsuccessful search.
ht2 = Hashtable(30011)
for i in range(1, 20004): # We're going to fill in about two thirds of the table.
ht2.insert('key' + str(i), 1)
# Try searching for a bunch of new keys.
# Then, find the average number of elements that were checked.
total = 0
num_trials = 10**5
max_num = 0
for i in range(0, num_trials):
num_checked = ht2._search('new_key_' + str(i), test_mode=True)[2]
total += num_checked
if num_checked > max_num:
max_num = num_checked
average = total / num_trials
print('Average number of elements checked:', average)
print('Max number of elements checked:', max_num)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment