Created
August 31, 2022 11:06
-
-
Save lithdew/34dbfa9a16e5753aa5507d6cbd669ca4 to your computer and use it in GitHub Desktop.
zig: lexicographic hash map
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const std = @import("std"); | |
pub fn LexicographicHashMap(comptime V: type, comptime capacity: u32) type { | |
const shift = @bitSizeOf(u64) - 1 - std.math.log2_int(u64, capacity) + 1; | |
const overflow = capacity / 10 + (@bitSizeOf(u64) - 1 - shift + 1) << 1; | |
const num_entries = capacity + overflow; | |
return struct { | |
pub const Entry = packed struct { | |
key: u32 = 0, | |
value: V = undefined, | |
}; | |
const Self = @This(); | |
entries: [num_entries]Entry = [_]Entry{.{}} ** num_entries, | |
len: u32 = 0, | |
fn idx(ctx: anytype, a: u32) u32 { | |
return @intCast(u32, ctx.getHash(a) >> shift); | |
} | |
pub fn get(self: *const Self, ctx: anytype, key: u32) ?V { | |
var index = idx(ctx, key); | |
while (true) : (index += 1) { | |
const entry = self.entries[index]; | |
const result = ctx.cmp(entry.key, key); | |
if (result.compare(.gte)) { | |
if (result != .eq) { | |
return null; | |
} | |
return entry.value; | |
} | |
} | |
} | |
pub fn put(self: *Self, ctx: anytype, key: u32, value: V) !void { | |
const result = try self.getOrPut(ctx, key); | |
result.value_ptr.* = value; | |
} | |
pub fn delete(self: *Self, ctx: anytype, key: u32) ?V { | |
var index = idx(ctx, key); | |
while (true) : (index += 1) { | |
const entry = self.entries[index]; | |
const result = ctx.cmp(entry.key, key); | |
if (result.compare(.gte)) { | |
if (result != .eq) { | |
return null; | |
} | |
break; | |
} | |
} | |
const value = self.entries[index].value; | |
index += 1; | |
while (index - 1 >= idx(ctx, self.entries[index].key) and !ctx.isEmpty(self.entries[index].key)) : (index += 1) { | |
self.entries[index - 1] = self.entries[index]; | |
} | |
self.entries[index - 1] = .{}; | |
self.len -= 1; | |
return value; | |
} | |
pub const GetOrPutResult = struct { | |
found_existing: bool, | |
value_ptr: *align(1) V, | |
}; | |
pub fn getOrPut(self: *Self, ctx: anytype, key: u32) !GetOrPutResult { | |
if (self.len == capacity) { | |
return error.OutOfMemory; | |
} | |
var it: Self.Entry = .{ .key = key, .value = undefined }; | |
var index = idx(ctx, key); | |
while (true) : (index += 1) { | |
const entry = self.entries[index]; | |
const result = ctx.cmp(entry.key, key); | |
if (result.compare(.gte)) { | |
if (result == .eq) { | |
return GetOrPutResult{ .found_existing = true, .value_ptr = &self.entries[index].value }; | |
} | |
self.entries[index] = it; | |
if (ctx.isEmpty(entry.key)) { | |
self.len += 1; | |
return GetOrPutResult{ .found_existing = false, .value_ptr = &self.entries[index].value }; | |
} | |
it = entry; | |
break; | |
} | |
} | |
const inserted_at = index; | |
index += 1; | |
while (true) : (index += 1) { | |
const entry = self.entries[index]; | |
self.entries[index] = it; | |
if (ctx.isEmpty(entry.key)) { | |
self.len += 1; | |
return GetOrPutResult{ .found_existing = false, .value_ptr = &self.entries[inserted_at].value }; | |
} | |
it = entry; | |
} | |
} | |
}; | |
} | |
test "LexicographicHashMap" { | |
var map: LexicographicHashMap(u32, 16) = .{}; | |
var strings: std.ArrayListUnmanaged([]const u8) = .{}; | |
defer { | |
for (strings.items) |string| { | |
std.testing.allocator.free(string); | |
} | |
strings.deinit(std.testing.allocator); | |
} | |
const empty_string = try std.testing.allocator.dupe(u8, ""); | |
errdefer std.testing.allocator.free(empty_string); | |
try strings.append(std.testing.allocator, empty_string); | |
var ctx: struct { | |
strings: *const std.ArrayListUnmanaged([]const u8), | |
pub fn isEmpty(_: @This(), key: u32) bool { | |
return key == 0; | |
} | |
pub fn cmp(self: @This(), a: u32, b: u32) std.math.Order { | |
if (a == 0) return .gt; | |
if (b == 0) return .lt; | |
if (a == 0 and b == 0) return .eq; | |
return std.mem.order(u8, self.strings.items[a], self.strings.items[b]); | |
} | |
pub fn getHash(self: @This(), key: u32) u64 { | |
var hash = std.mem.zeroes([8]u8); | |
std.mem.copy(u8, &hash, self.strings.items[key][0..@minimum(hash.len, self.strings.items[key].len)]); | |
return std.mem.readIntBig(u64, &hash); | |
} | |
} = .{ .strings = &strings }; | |
var rng = std.rand.DefaultPrng.init(0); | |
comptime var i: usize = 0; | |
inline while (i < 15) : (i += 1) { | |
const string = try std.testing.allocator.alloc(u8, 10 + rng.random().uintLessThan(usize, 40)); | |
errdefer std.testing.allocator.free(string); | |
rng.random().bytes(string); | |
try strings.append(std.testing.allocator, string); | |
} | |
comptime var j: usize = 0; | |
inline while (j < 15) : (j += 1) { | |
try map.put(ctx, 1 + j, 100 * (1 + j)); | |
} | |
// 0: 07d580c33d6fda61fc7b9aec91df0f5c1a5ebe3b8cbf -> 100 | |
// 1: 154eed69ba7ec560cb39dd77e95cc622bf6a7c5a0cced1a56832e1 -> 700 | |
// 2: 20e7fa109f130418fa10acb8e790 -> 400 | |
// 3: 246478ae0beb0f4050bf80 -> 800 | |
// 6: 6899c10632848450fc4daae9 -> 300 | |
// 7: 73be085468b8635802c36b0671a58c0e376a9538d6 -> 1100 | |
// 8: 8216fe5731d82f9614e0ec8c08cc6d85bd4cb21a351787ca848101 -> 1300 | |
// 9: 91bec19c6596e64e46169269775dcba3ed7d688aaa2a009ce29a3d5616 -> 1500 | |
// 10: 9a8df05777c343056e02b55ac79074db59c94b46e64373d8fff08923a0 -> 200 | |
// 11: 9e9d6e690d00da3644108c54609b832aca -> 1200 | |
// 12: a804fcd0d32046a0c39caf301288501d98f635ed7d28be53e108 -> 500 | |
// 13: c057accb24156c21df443a0608eb530a97817748b9 -> 600 | |
// 14: c1e593e1a7894fc57c7039 -> 1400 | |
// 15: cde8d25c8b5475dc13816466f7769ddf41dec0aeb7f82b34 -> 900 | |
// 16: dbd772182a2be12949ec0a648b2d36b6a9a4bbb58e69782eb9af87824b49 -> 1000 | |
for (map.entries) |*entry, k| { | |
if (ctx.isEmpty(entry.key)) { | |
continue; | |
} | |
std.debug.print("{}: {} -> {}\n", .{ k, std.fmt.fmtSliceHexLower(strings.items[entry.key]), entry.value }); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment