Created
October 29, 2020 03:16
-
-
Save ykdojo/a8619959bfb175c6168c9d83239b4ae6 to your computer and use it in GitHub Desktop.
hash_table.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# My video where I talk about this data structure: https://youtu.be/sfWyugl4JWA | |
class Hashtable: | |
# Assumption: table_length is a prime number (for example, 5, 701, or 30011) | |
def __init__(self, table_length): | |
self.table = [None] * table_length | |
## An internal search function. | |
# If it finds the given key in the table, it will return (True, index) | |
# If not, it will return (False, the index where it would be inserted) | |
# If the table is full, it will return (False, -1) | |
# If test_mode is true, it will also add a third return value that shows | |
# the number of elements that have been checked (in the case where | |
# the key is not found and the table is not full). | |
# Assumption: key is a string. | |
def _search(self, key, test_mode = False): | |
hash1 = hash(key) | |
m = len(self.table) | |
initial_i = hash1 % m # initial_i's range: [0, m - 1] (inclusive) | |
count = 1 # the number of elements that have been checked - only used when test_mode = True. | |
if not self.table[initial_i]: | |
if test_mode: | |
return (False, initial_i, count) | |
return (False, initial_i) | |
elif self.table[initial_i][0] == key: | |
return (True, initial_i) | |
## We have a collision! | |
# We're going to deal with it with double-hashing. | |
# First, create a new hashed value by slightly modifying the input key. | |
hash2 = hash(key + 'd') | |
# hash2 = hash1 # I tried this method and it worked as well as the above method. | |
# hash2 = 0 # This would be linear probing. It did NOT perform as well as the above method. | |
# Our constraint here: 1 <= c < m | |
# Note that hash2 % (m - 1) produces a number in the range [0, m - 2] inclusive | |
c = hash2 % (m - 1) + 1 | |
i = (initial_i + c) % m | |
while i != initial_i: | |
count += 1 | |
if not self.table[i]: | |
if test_mode: | |
return (False, i, count) | |
return (False, i) | |
elif self.table[i][0] == key: | |
return (True, i) | |
else: | |
i = (i + c) % m | |
return (False, -1) # The table is full. | |
## Inserts a key value pair. If the key exists, it updates the value. | |
# If it doesn't exit, it inserts it. | |
# If the table is full, it returns without doing anything. | |
# Assumption: key is a string. | |
# Returns: nothing. | |
def insert(self, key, value): | |
result = self._search(key) | |
if result[1] == -1: | |
return # The table is full. | |
# If the key already exists, update the value. | |
if result[0]: | |
i = result[1] | |
self.table[i][1] = value | |
return | |
# At this point, we'll know that the given key doesn't exist | |
# in the table yet, and that result[1] is the index | |
# where the new key-value pair should be inserted. | |
i = result[1] | |
self.table[i] = [key, value] | |
## Returns the value if the key is found. | |
# If not, it will return False. | |
# Assumption: key is a string. | |
def search(self, key): | |
result = self._search(key) | |
if result[1] == -1: | |
return False # The table is full. | |
if not result[0]: | |
return False | |
i = result[1] | |
return self.table[i][1] | |
## I haven't implemented this yet. | |
# To implement this one with open-addressing (double-hashing), | |
# you should replace the deleted entry with a dummy value instead of deleting it. | |
def delete(key): | |
pass | |
## The following is for testing the Hashtable class. | |
if __name__ == "__main__": | |
ht = Hashtable(5) | |
ht.insert('key1', 9) | |
ht.insert('key2', 2) | |
ht.insert('key3', 10) | |
ht.insert('key4', 100) | |
assert not ht.search('key5') # Since this key doesn't exist yet, it should return False. | |
ht.insert('key5', 10) | |
assert ht.search('key1') == 9 | |
assert ht.search('key2') == 2 | |
assert ht.search('key3') == 10 | |
assert ht.search('key4') == 100 | |
assert ht.search('key5') == 10 | |
assert not ht.search('key6') # Since this key doesn't exist, it should return False. | |
# You should be able to update existing values, too. | |
ht.insert('key3', -1) | |
ht.insert('key5', 42) | |
assert ht.search('key3') == -1 | |
assert ht.search('key5') == 42 | |
## The following part is for checking the number of | |
# elements being checked for un unsuccessful search. | |
ht2 = Hashtable(30011) | |
for i in range(1, 20004): # We're going to fill in about two thirds of the table. | |
ht2.insert('key' + str(i), 1) | |
# Try searching for a bunch of new keys. | |
# Then, find the average number of elements that were checked. | |
total = 0 | |
num_trials = 10**5 | |
max_num = 0 | |
for i in range(0, num_trials): | |
num_checked = ht2._search('new_key_' + str(i), test_mode=True)[2] | |
total += num_checked | |
if num_checked > max_num: | |
max_num = num_checked | |
average = total / num_trials | |
print('Average number of elements checked:', average) | |
print('Max number of elements checked:', max_num) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment