Skip to content

Instantly share code, notes, and snippets.

@laytan
Last active November 13, 2024 18:41
Show Gist options
  • Save laytan/39335104fd4ffd2e7a7c2912cc00458d to your computer and use it in GitHub Desktop.
Save laytan/39335104fd4ffd2e7a7c2912cc00458d to your computer and use it in GitHub Desktop.
Case insensitive map in Odin (very basic)
package main
import "base:intrinsics"
import "base:runtime"
import "core:hash"
import "core:mem"
import "core:strings"
import "core:unicode"
import "core:unicode/utf8"
MIN_SIZE :: 8
MAX_OCCUPIED :: 60
Spot :: struct($T: typeid) {
key: string,
value: T,
hash: uint,
}
Case_Map :: struct($T: typeid) {
allocator: runtime.Allocator,
spots: []Spot(T),
len: int,
}
case_map_make :: proc($T: typeid, cap := MIN_SIZE, allocator := context.allocator) -> Case_Map(T) {
length := (max(cap, MIN_SIZE) * 100 / MAX_OCCUPIED) + 1
return {
allocator = allocator,
spots = make([]Spot(T), length, allocator),
}
}
case_map_grow :: proc(m: ^Case_Map($T)) {
nm: Case_Map(T)
nm.allocator = m.allocator.procedure == nil ? context.allocator : m.allocator
err: mem.Allocator_Error
nm.spots, err = make([]Spot(T), max(len(m.spots)*2, MIN_SIZE), m.allocator)
assert(err == nil, "case_map: resize error")
iter: int
for k, v in case_map_iter(m, &iter) {
case_map_insert(&nm, k, v)
}
delete(m.spots, m.allocator)
m^ = nm
return
}
case_map_iter :: proc(m: ^Case_Map($T), state: ^int) -> (string, T, bool) {
if state^ >= len(m.spots) {
return {}, {}, false
}
for spot, i in m.spots[state^:] {
if spot.hash != 0 {
state^ += i + 1
return spot.key, spot.value, true
}
}
return {}, {}, false
}
case_map_insert :: proc(m: ^Case_Map($T), key: string, value: T) #no_bounds_check {
if m.len >= len(m.spots) * MAX_OCCUPIED / 100 {
case_map_grow(m)
}
hashed := hash_key(m, key)
idx := hashed % len(m.spots)
for ; idx < len(m.spots); idx += 1 {
entry := &m.spots[idx]
if entry.hash != 0 && !eq(key, entry.key) {
continue
}
if entry.hash == 0 {
m.len += 1
}
entry^ = Spot(T){
key = key,
value = value,
hash = hashed,
}
return
}
case_map_grow(m)
case_map_insert(m, key, value)
}
case_map_get :: proc(m: ^Case_Map($T), key: string) -> (value: T, ok: bool) #optional_ok #no_bounds_check {
hashed := hash_key(m, key)
for idx := hashed % len(m.spots); idx < len(m.spots); idx += 1 {
entry := m.spots[idx]
if entry.hash == 0 {
return
}
if hashed != entry.hash || !eq(key, entry.key) {
continue
}
return entry.value, true
}
return
}
case_map_delete :: proc(m: ^Case_Map($T), key: string) #no_bounds_check {
hashed := hash_key(m, key)
for idx := hashed % len(m.spots); idx < len(m.spots); idx += 1 {
entry := &m.sports[idx]
if entry.hash == 0 {
return
}
if hashed != entry.hash {
continue
}
if hashed != entry.hash || !eq(key, entry.key) {
continue
}
entry.hash = 0
}
}
hash_key :: proc(m: ^Case_Map($T), k: string) -> (res: uint) #no_bounds_check {
ptr := raw_data(m.spots)
ptr_slice := mem.ptr_to_bytes(&ptr)
when size_of(uint) == 4 {
res = uint(#force_inline hash.fnv32a(ptr_slice))
} else when size_of(uint) == 8 {
res = uint(#force_inline hash.fnv64a(ptr_slice))
} else {
#panic("unsupported int size")
}
k := k
for len(k) > 0 {
r, sz := utf8.decode_rune(k)
k = k[sz:]
lr := unicode.to_lower(r)
bytes := #force_inline mem.ptr_to_bytes(&lr)
when size_of(uint) == 4 {
res = uint(#force_inline hash.fnv32a(bytes, u32(res)))
} else {
res = uint(#force_inline hash.fnv64a(bytes, u64(res)))
}
}
assert(res != 0)
return
}
eq :: proc(k1, k2: string) -> bool #no_bounds_check {
(len(k1) == len(k2)) or_return
k1, k2 := k1, k2
for len(k1) > 0 {
assert(len(k2) > 0)
r1, sz1 := utf8.decode_rune(k1)
k1 = k1[sz1:]
r2, sz2 := utf8.decode_rune(k2)
k2 = k2[sz2:]
(unicode.to_lower(r1) == unicode.to_lower(r2)) or_return
}
return true
}
main :: proc() {
m := case_map_make(int, 10_000)
WORDS := #load("google-10000-english.txt", string)
i: int
for word in strings.split_lines_iterator(&WORDS) {
case_map_insert(&m, word, i)
i += 1
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment