Skip to content

Instantly share code, notes, and snippets.

@innateessence
Last active August 22, 2024 23:42
Show Gist options
  • Save innateessence/c626cf7d980c1344ec9c301a28e36768 to your computer and use it in GitHub Desktop.
Save innateessence/c626cf7d980c1344ec9c301a28e36768 to your computer and use it in GitHub Desktop.
hashmap.py - custom python hashmap
#!/usr/bin/env python3
from typing import Any
# Consider using the same binary lookup table approach a `switch` statement in C/C++ uses?
# NOTE: I know this is inefficient, I wrote this for fun / out of curiosity and did not allow myself to use any real hashing
# simply trying to leverage `hash()` felt like cheating, looking up hashing algo's also felt like cheating.
# Please don't assume I know nothing about hashing algo's from reading this
class VoidType:
"""
This is designed to mimic the `None` type as closely as possible
but with one key difference
we want `Void is None` to return False
This way we can fill an array with a None-like value
while preserving any real use-cases for `None`
"""
_instance = None
_memory_address = 0
def __new__(cls):
if cls._instance is None:
cls._instance = super(VoidType, cls).__new__(cls)
cls._memory_address = id(cls._instance)
return cls._instance
def __str__(self):
return "Void"
def __repr__(self):
return "Void"
def __bool__(self):
return False
def __hash__(self):
return hash(self._memory_address)
def __eq__(self, other):
return (
hasattr(other, "_memory_address")
and self._memory_address == other._memory_address
)
Void = VoidType()
class PrimeGenerator:
@staticmethod
def _is_prime(num):
if num < 2:
return False
for i in range(2, int(num**0.5) + 1):
if num % i == 0:
return False
return True
@classmethod
def generate_primes(cls):
num = 2
while True:
if cls._is_prime(num):
yield num
num += 1
@classmethod
def primes(cls, n=10) -> set[int]:
retval = set()
gen = cls.generate_primes()
while len(retval) < n:
retval.add(next(gen))
return retval
class HashMap:
def __init__(self, size=20480):
self.__malloc_size = size
self.__chars = "0123456789abcdefghijklmnopqrstuvwxyz"
self.__primes = [i for i in PrimeGenerator.primes(len(self.__chars))]
self._keys = set()
self._values = [Void] * size
self._uuid_nums = size // 32
def add(self, key: str | int, value: Any) -> None:
key = str(key).lower()
idx = self.__hash(key)
while idx >= len(self._values):
self.__malloc()
self._keys.add(key)
self._values[idx] = value
self.__free()
def delete(self, key: str | int) -> None:
key = str(key)
idx = self.__hash(key)
self._keys.remove(key)
self._values[idx] = Void
def values(self) -> list[Any]:
retval = set(self._values)
retval.remove(Void)
return list(retval)
def keys(self) -> list[str]:
return list(self._keys)
def __hash(self, key: str | int) -> int:
'''
This isn't really a hash function, it's more of a unique index generator...
'''
key = str(key).lower()
retval = 1
for char in key:
idx = self.__chars.index(char)
prime = self.__primes[idx]
retval *= prime
return retval
def __malloc(self) -> None:
self._values += [Void] * self.__malloc_size
def __free(self) -> None:
"""
Free any over-allocated memory via removing trailing Void
"""
should_stop = False
for i in range(len(self._values) - 1, -1, -1):
if self._values[i] == Void and not should_stop:
self._values.pop(i)
else:
should_stop = True
def __setitem__(self, key: str | int, value: Any) -> None:
key = str(key)
self.add(key, value)
def __getitem__(self, key: str | int) -> Any:
key = str(key)
lookup_idx = self.__hash(key)
return self._values[lookup_idx]
h1 = HashMap()
h2 = HashMap()
h2["foo"] = "bar"
h1["foo"] = h2
print(h1["foo"])
print(h1["foo"]["foo"])
@innateessence
Copy link
Author

innateessence commented Jul 3, 2024

This is very inefficient. I'm aware I should use something that already exists but I wanted to challenge myself to not rely on any hashing function someone else has already made :)

This was just for fun anyways and should never be used in any kind of production system, because while this in theory will never produce an unexpected collision, this consumes a massive amount of memory for hashmap insertions because of the approach I used to avoid collisions :)

Now I have no reservations against reading the source code of the dict in CPython

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment