Skip to content

Instantly share code, notes, and snippets.

@270ajay
Created March 5, 2020 06:49
Show Gist options
  • Save 270ajay/54f54725a9e78a74aa29515de2be4961 to your computer and use it in GitHub Desktop.
Save 270ajay/54f54725a9e78a74aa29515de2be4961 to your computer and use it in GitHub Desktop.
hash tables (how python dict and set works)
import random
'''Course: https://www.coursera.org/learn/data-structures#about'''
def isPrime(num):
'''src: https://codereview.stackexchange.com/questions/71212/find-smallest-prime-number-greater-than-given-n'''
if num%2 == 0 or num%3 == 0:
return False
rootNum = int(num ** 0.5)
for i in range(5, rootNum+1, 6):
if num%i == 0:
return False
for i in range(7, rootNum+1, 6):
if num%i == 0:
return False
return True
def getPrimeGreaterThan(num):
num+=1
while True:
if isPrime(num):
return num
num+=1
class HashMap:
'''dict in python; HashMap in java; unordered_map in c++'''
loadFactor = 0.9
defaultMaxIntegerKey = int(1e+10)
def __init__(self, maxIntegerKey=None):
self.size = 2
self.array = []
for i in range(self.size):
self.array.append([])
self.maxIntegerKey = maxIntegerKey if maxIntegerKey != None else HashMap.defaultMaxIntegerKey
self.primeNum = getPrimeGreaterThan(self.maxIntegerKey)
self.hashFuncCoeffA = random.randrange(1, self.primeNum - 1)
self.hasFuncCoeffB = random.randrange(0, self.primeNum - 1)
self.numOfKeys = 0
def polyHash(self, stringObj):
hashValue = 0
for i in range(len(stringObj) - 1, -1, -1):
hashValue = (((hashValue * self.hashFuncCoeffA) + ord(stringObj[i])) % self.primeNum)
return hashValue
def hashFunction(self, obj):
if isinstance(obj, int):
return ((((self.hashFuncCoeffA * obj) + self.hasFuncCoeffB) % self.primeNum) % self.size)
elif isinstance(obj, str):
hashValueOfStr = self.polyHash(obj)
return ((((self.hashFuncCoeffA * hashValueOfStr) + self.hasFuncCoeffB) % self.primeNum) % self.size)
else:
raise Exception("Only takes integer or string as keys")
def hasKey(self, obj):
array = self.array[self.hashFunction(obj)]
for objValueTuple in array:
if objValueTuple[0] == obj:
return True
return False
def get(self, obj):
array = self.array[self.hashFunction(obj)]
for objValueList in array:
if objValueList[0] == obj:
return objValueList[1]
raise Exception("Object not present")
def set(self, obj, value):
if self.numOfKeys/self.size > HashMap.loadFactor:
self.rehash()
array = self.array[self.hashFunction(obj)]
for objValueList in array:
if objValueList[0] == obj:
objValueList[1] = value
return
array.append([obj, value])
self.numOfKeys += 1
def rehash(self):
new2DArray = []
for i in range(self.size * 2):
new2DArray.append([])
self.getNewRandomHashFuncCoeff()
self.size *= 2
for array in self.array:
if array:
for objValueList in array:
newArray = new2DArray[self.hashFunction(objValueList[0])]
newArray.append(objValueList)
self.array = new2DArray
def getNewRandomHashFuncCoeff(self):
self.hashFuncCoeffA = random.randrange(1, self.primeNum - 1)
self.hasFuncCoeffB = random.randrange(0, self.primeNum - 1)
def __str__(self):
string = "{"
for array in self.array:
if array:
for objValueList in array:
string += " "+ str(objValueList[0]) + ": "+ str(objValueList[1])+","
string+= ' }'
return string
class HashSet:
'''set in python, HashSet in java, unordered_set in c++'''
loadFactor = 0.9
defaultMaxIntegerKey = int(1e+10)
def __init__(self, maxIntegerKey=None):
self.size = 2
self.array = []
for i in range(self.size):
self.array.append([])
self.maxIntegerKey = maxIntegerKey if maxIntegerKey != None else HashSet.defaultMaxIntegerKey
self.primeNum = getPrimeGreaterThan(self.maxIntegerKey)
self.hashFuncCoeffA = random.randrange(1, self.primeNum - 1)
self.hasFuncCoeffB = random.randrange(0, self.primeNum - 1)
self.numOfObjects = 0
def polyHash(self, stringObj):
hashValue = 0
for i in range(len(stringObj) - 1, -1, -1):
hashValue = (((hashValue * self.hashFuncCoeffA) + ord(stringObj[i])) % self.primeNum)
return hashValue
def hashFunction(self, obj):
if isinstance(obj, int):
return ((((self.hashFuncCoeffA * obj) + self.hasFuncCoeffB) % self.primeNum) % self.size)
elif isinstance(obj, str):
hashValueOfStr = self.polyHash(obj)
return ((((self.hashFuncCoeffA * hashValueOfStr) + self.hasFuncCoeffB) % self.primeNum) % self.size)
else:
raise Exception("Only takes integer or string as keys")
def find(self, obj):
array = self.array[self.hashFunction(obj)]
for obj1 in array:
if obj1 == obj:
return True
return False
def remove(self, obj):
if not self.find(obj):
return
array = self.array[self.hashFunction(obj)]
array.remove(obj)
def add(self, obj):
if self.numOfObjects/self.size > HashSet.loadFactor:
self.rehash()
array = self.array[self.hashFunction(obj)]
for obj2 in array:
if obj2 == obj:
return
array.append(obj)
self.numOfObjects += 1
def rehash(self):
new2DArray = []
for i in range(self.size * 2):
new2DArray.append([])
self.getNewRandomHashFuncCoeff()
self.size *= 2
for array in self.array:
if array:
for obj in array:
newArray = new2DArray[self.hashFunction(obj)]
newArray.append(obj)
self.array = new2DArray
def getNewRandomHashFuncCoeff(self):
self.hashFuncCoeffA = random.randrange(1, self.primeNum - 1)
self.hasFuncCoeffB = random.randrange(0, self.primeNum - 1)
def __str__(self):
string = "{"
for array in self.array:
if array:
for obj in array:
string += " "+ str(obj) +","
string+= ' }'
return string
def areEqualStrings(string1, string2):
lengthOfString1 = len(string1)
if lengthOfString1 != len(string2):
return False
for i in range(lengthOfString1):
if string1[i] != string2[i]:
return False
return True
def findPatternNaive(text, pattern):
'''returns the index/indices of the pattern in text'''
result = []
lengthOfPattern = len(pattern)
for i in range(len(text) - lengthOfPattern + 1):
if areEqualStrings(text[i:i+lengthOfPattern], pattern):
result.append(i)
return result
def polyHash(string, polyHashCoeff, primeNum):
'''returns the hash value of string'''
hashValue = 0
for i in range(len(string) - 1, -1, -1):
hashValue = (((hashValue * polyHashCoeff) + ord(string[i])) % primeNum)
return hashValue
def rabinKarp(text, pattern):
'''returns the index/indices of the pattern in text. It is not faster than findPatternNaive'''
lengthOfPattern = len(pattern)
lengthOfText = len(text)
primeNum = getPrimeGreaterThan(10 * lengthOfText * lengthOfPattern)
polyHashCoeff = random.randrange(1, primeNum - 1)
result = []
patternHash = polyHash(pattern, polyHashCoeff, primeNum)
for i in range(lengthOfText - lengthOfPattern + 1):
textHash = polyHash(text[i:i+lengthOfPattern], polyHashCoeff, primeNum)
if patternHash != textHash:
continue
if areEqualStrings(text[i:i+lengthOfPattern], pattern):
result.append(i)
return result
def precomputeHashes(text, lengthOfPattern, polyHashCoeff, primeNum):
'''this function is used in rabinKarpImproved function below'''
lengthOfText = len(text)
hashArray = [None] * (lengthOfText - lengthOfPattern + 1)
lastSubStringOfText = text[lengthOfText - lengthOfPattern : lengthOfText]
hashArray[lengthOfText - lengthOfPattern] = polyHash(lastSubStringOfText, polyHashCoeff, primeNum)
y = 1
for _ in range(lengthOfPattern):
y = (y * polyHashCoeff) % primeNum
for i in range(lengthOfText - lengthOfPattern - 1, -1, -1):
hashArray[i] = ((polyHashCoeff * hashArray[i + 1]) + ord(text[i]) - (y * ord(text[i+lengthOfPattern]))) % primeNum
return hashArray
def rabinKarpImproved(text, pattern):
'''returns the index/indices of the pattern in text. It is faster than findPatternNaive'''
lengthOfPattern = len(pattern)
lengthOfText = len(text)
primeNum = getPrimeGreaterThan(10 * lengthOfText * lengthOfPattern)
polyHashCoeff = random.randrange(1, primeNum - 1)
result = []
patternHash = polyHash(pattern, polyHashCoeff, primeNum)
hashArray = precomputeHashes(text, lengthOfPattern, polyHashCoeff, primeNum)
for i in range(lengthOfText - lengthOfPattern + 1):
if patternHash != hashArray[i]:
continue
if areEqualStrings(text[i:i+lengthOfPattern], pattern):
result.append(i)
return result
if __name__ == '__main__':
hashMap = HashMap(maxIntegerKey=100)
hashMap.set(10, 120)
hashMap.set(2, 30)
hashMap.set(14, 300)
hashMap.set(25, 10)
hashMap.set('ajay', 10)
print(hashMap)
print(hashMap.get('ajay'))
hashSet = HashSet(maxIntegerKey=100)
hashSet.add('ajay')
hashSet.add('ajay')
hashSet.add(1)
hashSet.add(3)
print(hashSet)
print("\nNaive approach: thisisanexampletocheck contains word example from indices:", findPatternNaive('thisisanexampletocheck', 'example'))
print("Rabin karp algorithm: thisisanexampletocheck contains word example from indices:", rabinKarp('thisisanexampletocheck', 'example'))
print("Rabin karp improved: thisisanexampletocheck contains word example from indices:", rabinKarpImproved('thisisanexampletocheck', 'example'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment