Last active
August 22, 2024 23:42
-
-
Save innateessence/c626cf7d980c1344ec9c301a28e36768 to your computer and use it in GitHub Desktop.
hashmap.py - custom python hashmap
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from typing import Any | |
# Consider using the same binary lookup table approach a `switch` statement in C/C++ uses? | |
# NOTE: I know this is inefficient, I wrote this for fun / out of curiosity and did not allow myself to use any real hashing | |
# simply trying to leverage `hash()` felt like cheating, looking up hashing algo's also felt like cheating. | |
# Please don't assume I know nothing about hashing algo's from reading this | |
class VoidType: | |
""" | |
This is designed to mimic the `None` type as closely as possible | |
but with one key difference | |
we want `Void is None` to return False | |
This way we can fill an array with a None-like value | |
while preserving any real use-cases for `None` | |
""" | |
_instance = None | |
_memory_address = 0 | |
def __new__(cls): | |
if cls._instance is None: | |
cls._instance = super(VoidType, cls).__new__(cls) | |
cls._memory_address = id(cls._instance) | |
return cls._instance | |
def __str__(self): | |
return "Void" | |
def __repr__(self): | |
return "Void" | |
def __bool__(self): | |
return False | |
def __hash__(self): | |
return hash(self._memory_address) | |
def __eq__(self, other): | |
return ( | |
hasattr(other, "_memory_address") | |
and self._memory_address == other._memory_address | |
) | |
Void = VoidType() | |
class PrimeGenerator: | |
@staticmethod | |
def _is_prime(num): | |
if num < 2: | |
return False | |
for i in range(2, int(num**0.5) + 1): | |
if num % i == 0: | |
return False | |
return True | |
@classmethod | |
def generate_primes(cls): | |
num = 2 | |
while True: | |
if cls._is_prime(num): | |
yield num | |
num += 1 | |
@classmethod | |
def primes(cls, n=10) -> set[int]: | |
retval = set() | |
gen = cls.generate_primes() | |
while len(retval) < n: | |
retval.add(next(gen)) | |
return retval | |
class HashMap: | |
def __init__(self, size=20480): | |
self.__malloc_size = size | |
self.__chars = "0123456789abcdefghijklmnopqrstuvwxyz" | |
self.__primes = [i for i in PrimeGenerator.primes(len(self.__chars))] | |
self._keys = set() | |
self._values = [Void] * size | |
self._uuid_nums = size // 32 | |
def add(self, key: str | int, value: Any) -> None: | |
key = str(key).lower() | |
idx = self.__hash(key) | |
while idx >= len(self._values): | |
self.__malloc() | |
self._keys.add(key) | |
self._values[idx] = value | |
self.__free() | |
def delete(self, key: str | int) -> None: | |
key = str(key) | |
idx = self.__hash(key) | |
self._keys.remove(key) | |
self._values[idx] = Void | |
def values(self) -> list[Any]: | |
retval = set(self._values) | |
retval.remove(Void) | |
return list(retval) | |
def keys(self) -> list[str]: | |
return list(self._keys) | |
def __hash(self, key: str | int) -> int: | |
''' | |
This isn't really a hash function, it's more of a unique index generator... | |
''' | |
key = str(key).lower() | |
retval = 1 | |
for char in key: | |
idx = self.__chars.index(char) | |
prime = self.__primes[idx] | |
retval *= prime | |
return retval | |
def __malloc(self) -> None: | |
self._values += [Void] * self.__malloc_size | |
def __free(self) -> None: | |
""" | |
Free any over-allocated memory via removing trailing Void | |
""" | |
should_stop = False | |
for i in range(len(self._values) - 1, -1, -1): | |
if self._values[i] == Void and not should_stop: | |
self._values.pop(i) | |
else: | |
should_stop = True | |
def __setitem__(self, key: str | int, value: Any) -> None: | |
key = str(key) | |
self.add(key, value) | |
def __getitem__(self, key: str | int) -> Any: | |
key = str(key) | |
lookup_idx = self.__hash(key) | |
return self._values[lookup_idx] | |
h1 = HashMap() | |
h2 = HashMap() | |
h2["foo"] = "bar" | |
h1["foo"] = h2 | |
print(h1["foo"]) | |
print(h1["foo"]["foo"]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is very inefficient. I'm aware I should use something that already exists but I wanted to challenge myself to not rely on any hashing function someone else has already made :)
This was just for fun anyways and should never be used in any kind of production system, because while this in theory will never produce an unexpected collision, this consumes a massive amount of memory for hashmap insertions because of the approach I used to avoid collisions :)
Now I have no reservations against reading the source code of the
dict
in CPython