laytan · November 13, 2024 18:41
diff --git a/case_map.odin b/case_map.odin
 package main

 import "base:intrinsics"
 import "base:runtime"

 import "core:hash"
 import "core:mem"
 import "core:strings"
 import "core:unicode"
 import "core:unicode/utf8"

 MIN_SIZE     :: 8
 MAX_OCCUPIED :: 60

 Spot :: struct($T: typeid) {
 	key:   string,
 	value: T,
 	hash:  uint,
 }

 Case_Map :: struct($T: typeid) {
 	allocator: runtime.Allocator,
 	spots:     []Spot(T),
 	len:       int,
 }

 case_map_make :: proc($T: typeid, cap := MIN_SIZE, allocator := context.allocator) -> Case_Map(T) {
 	length := (max(cap, MIN_SIZE) * 100 / MAX_OCCUPIED) + 1

 	return {
 		allocator = allocator,
 		spots     = make([]Spot(T), length, allocator),
 	}
 }

 case_map_grow :: proc(m: ^Case_Map($T)) {
 	nm: Case_Map(T)
 	nm.allocator = m.allocator.procedure == nil ? context.allocator : m.allocator

 	err: mem.Allocator_Error
 	nm.spots, err = make([]Spot(T), max(len(m.spots)*2, MIN_SIZE), m.allocator)
 	assert(err == nil, "case_map: resize error")

 	iter: int
 	for k, v in case_map_iter(m, &iter) {
 		case_map_insert(&nm, k, v)
 	}

 	delete(m.spots, m.allocator)

 	m^ = nm
 	return
 }

 case_map_iter :: proc(m: ^Case_Map($T), state: ^int) -> (string, T, bool) {
 	if state^ >= len(m.spots) {
 		return {}, {}, false
 	}

 	for spot, i in m.spots[state^:] {
 		if spot.hash != 0 {
 			state^ += i + 1
 			return spot.key, spot.value, true
 		}
 	}

 	return {}, {}, false
 }

 case_map_insert :: proc(m: ^Case_Map($T), key: string, value: T) #no_bounds_check {
 	if m.len >= len(m.spots) * MAX_OCCUPIED / 100 {
 		case_map_grow(m)
 	}

 	hashed := hash_key(m, key)
 	idx    := hashed % len(m.spots)

 	for ; idx < len(m.spots); idx += 1 {
 		entry := &m.spots[idx]

 		if entry.hash != 0 && !eq(key, entry.key) {
 			continue
 		}

 		if entry.hash == 0 {
 			m.len += 1
 		}

 		entry^ = Spot(T){
 			key   = key,
 			value = value,
 			hash  = hashed,
 		}

 		return
 	}

 	case_map_grow(m)
 	case_map_insert(m, key, value)
 }

 case_map_get :: proc(m: ^Case_Map($T), key: string) -> (value: T, ok: bool) #optional_ok #no_bounds_check {
 	hashed := hash_key(m, key)
 	for idx := hashed % len(m.spots); idx < len(m.spots); idx += 1 {
 		entry := m.spots[idx]

 		if entry.hash == 0 {
 			return
 		}

 		if hashed != entry.hash || !eq(key, entry.key) {
 			continue
 		}

 		return entry.value, true
 	}

 	return
 }

 case_map_delete :: proc(m: ^Case_Map($T), key: string) #no_bounds_check {
 	hashed := hash_key(m, key)
 	for idx := hashed % len(m.spots); idx < len(m.spots); idx += 1 {
 		entry := &m.sports[idx]

 		if entry.hash == 0 {
 			return
 		}

 		if hashed != entry.hash {
 			continue
 		}

 		if hashed != entry.hash || !eq(key, entry.key) {
 			continue
 		}

 		entry.hash = 0
 	}
 }

 hash_key :: proc(m: ^Case_Map($T), k: string) -> (res: uint) #no_bounds_check {
 	ptr       := raw_data(m.spots)
 	ptr_slice := mem.ptr_to_bytes(&ptr)

 	when size_of(uint) == 4 {
 		res = uint(#force_inline hash.fnv32a(ptr_slice))
 	} else when size_of(uint) == 8 {
 		res = uint(#force_inline hash.fnv64a(ptr_slice))
 	} else {
 		#panic("unsupported int size")
 	}

 	k := k
 	for len(k) > 0 {
 		r, sz := utf8.decode_rune(k)
 		k = k[sz:]

 		lr    := unicode.to_lower(r)
 		bytes := #force_inline mem.ptr_to_bytes(&lr)

 		when size_of(uint) == 4 {
 			res = uint(#force_inline hash.fnv32a(bytes, u32(res)))
 		} else {
 			res = uint(#force_inline hash.fnv64a(bytes, u64(res)))
 		}
 	}

 	assert(res != 0)
 	return
 }

 eq :: proc(k1, k2: string) -> bool #no_bounds_check {
 	(len(k1) == len(k2)) or_return

 	k1, k2 := k1, k2
 	for len(k1) > 0 {
 		assert(len(k2) > 0)

 		r1, sz1 := utf8.decode_rune(k1)
 		k1 = k1[sz1:]

 		r2, sz2 := utf8.decode_rune(k2)
 		k2 = k2[sz2:]

 		(unicode.to_lower(r1) == unicode.to_lower(r2)) or_return
 	}

 	return true
 }

 main :: proc() {
 	m := case_map_make(int, 10_000)

 	WORDS := #load("google-10000-english.txt", string)
 	i: int
 	for word in strings.split_lines_iterator(&WORDS) {
 		case_map_insert(&m, word, i)
 		i += 1
 	}
 }
	package main

	import "base:intrinsics"
	import "base:runtime"

	import "core:hash"
	import "core:mem"
	import "core:strings"
	import "core:unicode"
	import "core:unicode/utf8"

	MIN_SIZE :: 8
	MAX_OCCUPIED :: 60

	Spot :: struct($T: typeid) {
	key: string,
	value: T,
	hash: uint,
	}

	Case_Map :: struct($T: typeid) {
	allocator: runtime.Allocator,
	spots: []Spot(T),
	len: int,
	}

	case_map_make :: proc($T: typeid, cap := MIN_SIZE, allocator := context.allocator) -> Case_Map(T) {
	length := (max(cap, MIN_SIZE) * 100 / MAX_OCCUPIED) + 1

	return {
	allocator = allocator,
	spots = make([]Spot(T), length, allocator),
	}
	}

	case_map_grow :: proc(m: ^Case_Map($T)) {
	nm: Case_Map(T)
	nm.allocator = m.allocator.procedure == nil ? context.allocator : m.allocator

	err: mem.Allocator_Error
	nm.spots, err = make([]Spot(T), max(len(m.spots)*2, MIN_SIZE), m.allocator)
	assert(err == nil, "case_map: resize error")

	iter: int
	for k, v in case_map_iter(m, &iter) {
	case_map_insert(&nm, k, v)
	}

	delete(m.spots, m.allocator)

	m^ = nm
	return
	}

	case_map_iter :: proc(m: ^Case_Map($T), state: ^int) -> (string, T, bool) {
	if state^ >= len(m.spots) {
	return {}, {}, false
	}

	for spot, i in m.spots[state^:] {
	if spot.hash != 0 {
	state^ += i + 1
	return spot.key, spot.value, true
	}
	}

	return {}, {}, false
	}

	case_map_insert :: proc(m: ^Case_Map($T), key: string, value: T) #no_bounds_check {
	if m.len >= len(m.spots) * MAX_OCCUPIED / 100 {
	case_map_grow(m)
	}

	hashed := hash_key(m, key)
	idx := hashed % len(m.spots)

	for ; idx < len(m.spots); idx += 1 {
	entry := &m.spots[idx]

	if entry.hash != 0 && !eq(key, entry.key) {
	continue
	}

	if entry.hash == 0 {
	m.len += 1
	}

	entry^ = Spot(T){
	key = key,
	value = value,
	hash = hashed,
	}

	return
	}

	case_map_grow(m)
	case_map_insert(m, key, value)
	}

	case_map_get :: proc(m: ^Case_Map($T), key: string) -> (value: T, ok: bool) #optional_ok #no_bounds_check {
	hashed := hash_key(m, key)
	for idx := hashed % len(m.spots); idx < len(m.spots); idx += 1 {
	entry := m.spots[idx]

	if entry.hash == 0 {
	return
	}

	if hashed != entry.hash \|\| !eq(key, entry.key) {
	continue
	}

	return entry.value, true
	}

	return
	}

	case_map_delete :: proc(m: ^Case_Map($T), key: string) #no_bounds_check {
	hashed := hash_key(m, key)
	for idx := hashed % len(m.spots); idx < len(m.spots); idx += 1 {
	entry := &m.sports[idx]

	if entry.hash == 0 {
	return
	}

	if hashed != entry.hash {
	continue
	}

	if hashed != entry.hash \|\| !eq(key, entry.key) {
	continue
	}

	entry.hash = 0
	}
	}

	hash_key :: proc(m: ^Case_Map($T), k: string) -> (res: uint) #no_bounds_check {
	ptr := raw_data(m.spots)
	ptr_slice := mem.ptr_to_bytes(&ptr)

	when size_of(uint) == 4 {
	res = uint(#force_inline hash.fnv32a(ptr_slice))
	} else when size_of(uint) == 8 {
	res = uint(#force_inline hash.fnv64a(ptr_slice))
	} else {
	#panic("unsupported int size")
	}

	k := k
	for len(k) > 0 {
	r, sz := utf8.decode_rune(k)
	k = k[sz:]

	lr := unicode.to_lower(r)
	bytes := #force_inline mem.ptr_to_bytes(&lr)

	when size_of(uint) == 4 {
	res = uint(#force_inline hash.fnv32a(bytes, u32(res)))
	} else {
	res = uint(#force_inline hash.fnv64a(bytes, u64(res)))
	}
	}

	assert(res != 0)
	return
	}

	eq :: proc(k1, k2: string) -> bool #no_bounds_check {
	(len(k1) == len(k2)) or_return

	k1, k2 := k1, k2
	for len(k1) > 0 {
	assert(len(k2) > 0)

	r1, sz1 := utf8.decode_rune(k1)
	k1 = k1[sz1:]

	r2, sz2 := utf8.decode_rune(k2)
	k2 = k2[sz2:]

	(unicode.to_lower(r1) == unicode.to_lower(r2)) or_return
	}

	return true
	}

	main :: proc() {
	m := case_map_make(int, 10_000)

	WORDS := #load("google-10000-english.txt", string)
	i: int
	for word in strings.split_lines_iterator(&WORDS) {
	case_map_insert(&m, word, i)
	i += 1
	}
	}