Last active
January 19, 2025 03:44
-
-
Save archaeologistdev/91309cf64bf714881371c2c1cc608854 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const std = @import("std"); | |
// Although this function looks imperative, note that its job is to | |
// declaratively construct a build graph that will be executed by an external | |
// runner. | |
pub fn build(b: *std.Build) void { | |
// Standard target options allows the person running `zig build` to choose | |
// what target to build for. Here we do not override the defaults, which | |
// means any target is allowed, and the default is native. Other options | |
// for restricting supported target set are available. | |
const target = b.standardTargetOptions(.{}); | |
// Standard optimization options allow the person running `zig build` to select | |
// between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not | |
// set a preferred release mode, allowing the user to decide how to optimize. | |
const optimize = b.standardOptimizeOption(.{}); | |
const lib = b.addStaticLibrary(.{ | |
.name = "safehashmap", | |
// In this case the main source file is merely a path, however, in more | |
// complicated build scripts, this could be a generated file. | |
.root_source_file = b.path("root.zig"), | |
.target = target, | |
.optimize = optimize, | |
}); | |
// This declares intent for the library to be installed into the standard | |
// location when the user invokes the "install" step (the default step when | |
// running `zig build`). | |
b.installArtifact(lib); | |
// Creates a step for unit testing. This only builds the test executable | |
// but does not run it. | |
const lib_unit_tests = b.addTest(.{ | |
.root_source_file = b.path("root.zig"), | |
.target = target, | |
.optimize = optimize, | |
}); | |
const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests); | |
// Similar to creating the run step earlier, this exposes a `test` step to | |
// the `zig build --help` menu, providing a way for the user to request | |
// running the unit tests. | |
const test_step = b.step("test", "Run unit tests"); | |
test_step.dependOn(&run_lib_unit_tests.step); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.{ | |
// This is the default name used by packages depending on this one. For | |
// example, when a user runs `zig fetch --save <url>`, this field is used | |
// as the key in the `dependencies` table. Although the user can choose a | |
// different name, most users will stick with this provided value. | |
// | |
// It is redundant to include "zig" in this name because it is already | |
// within the Zig package namespace. | |
.name = "safehashmap", | |
// This is a [Semantic Version](https://semver.org/). | |
// In a future version of Zig it will be used for package deduplication. | |
.version = "0.0.0", | |
// This field is optional. | |
// This is currently advisory only; Zig does not yet do anything | |
// with this value. | |
.minimum_zig_version = "0.13.0", | |
// This field is optional. | |
// Each dependency must either provide a `url` and `hash`, or a `path`. | |
// `zig build --fetch` can be used to fetch all dependencies of a package, recursively. | |
// Once all dependencies are fetched, `zig build` no longer requires | |
// internet connectivity. | |
.dependencies = .{ | |
// See `zig fetch --save <url>` for a command-line interface for adding dependencies. | |
//.example = .{ | |
// // When updating this field to a new URL, be sure to delete the corresponding | |
// // `hash`, otherwise you are communicating that you expect to find the old hash at | |
// // the new URL. | |
// .url = "https://example.com/foo.tar.gz", | |
// | |
// // This is computed from the file contents of the directory of files that is | |
// // obtained after fetching `url` and applying the inclusion rules given by | |
// // `paths`. | |
// // | |
// // This field is the source of truth; packages do not come from a `url`; they | |
// // come from a `hash`. `url` is just one of many possible mirrors for how to | |
// // obtain a package matching this `hash`. | |
// // | |
// // Uses the [multihash](https://multiformats.io/multihash/) format. | |
// .hash = "...", | |
// | |
// // When this is provided, the package is found in a directory relative to the | |
// // build root. In this case the package's hash is irrelevant and therefore not | |
// // computed. This field and `url` are mutually exclusive. | |
// .path = "foo", | |
// // When this is set to `true`, a package is declared to be lazily | |
// // fetched. This makes the dependency only get fetched if it is | |
// // actually used. | |
// .lazy = false, | |
//}, | |
}, | |
// Specifies the set of files and directories that are included in this package. | |
// Only files and directories listed here are included in the `hash` that | |
// is computed for this package. Only files listed here will remain on disk | |
// when using the zig package manager. As a rule of thumb, one should list | |
// files required for compilation plus any license(s). | |
// Paths are relative to the build root. Use the empty string (`""`) to refer to | |
// the build root itself. | |
// A directory listed here means that all files within, recursively, are included. | |
.paths = .{ | |
"build.zig", | |
"build.zig.zon", | |
"src", | |
// For example... | |
//"LICENSE", | |
//"README.md", | |
}, | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const std = @import("std"); | |
const SafeHashMap = @import("./safe_hash_map.zig").SafeHashMap; | |
const testing = std.testing; | |
/// Proof of concept illustrating incorrect use of std.HashMap | |
fn poc(iterations: u64, alloc: std.mem.Allocator) !void { | |
var map = std.AutoHashMap(u64, u64).init(alloc); | |
defer map.deinit(); | |
for (0..iterations) |n| { | |
// may reallocate map's storage | |
const a = try map.getOrPutValue(n, 0); | |
// may reallocate map's storage, invalidating a.value_ptr | |
const b = try map.getOrPutValue(iterations + n, 0); | |
// BUG! a.value_ptr may have been invalidated by second call to getOrPutValue | |
a.value_ptr.* += 1; | |
// in contrast, b.value_ptr is okay to dereference | |
b.value_ptr.* += 1; | |
} | |
// verify that the map contains the expected values | |
var it = map.iterator(); | |
while (it.next()) |n| { | |
// this assertion can fail if the hashmap was resized | |
try std.testing.expect(n.value_ptr.* == 1); | |
} | |
} | |
// Possible fix #1: leverage lexical scopes to limit pointer lifetimes | |
// Problem: caller needs to remember to do this | |
fn poc_blocks(iterations: u64, alloc: std.mem.Allocator) !void { | |
var map = std.AutoHashMap(u64, u64).init(alloc); | |
defer map.deinit(); | |
for (0..iterations) |n| { | |
{ | |
// may reallocate map's storage | |
const a = try map.getOrPutValue(n, 0); | |
// before we access pointers, guard against invalidating them | |
map.lockPointers(); | |
defer map.unlockPointers(); | |
// a is guaranteed to be a valid pointer | |
a.value_ptr.* += 1; | |
} | |
{ | |
// may reallocate map's storage, this invalidates a.value_ptr but that's okay as a is already out of scope here | |
const b = try map.getOrPutValue(iterations + n, 0); | |
// before we access pointers, guard against invalidating them | |
map.lockPointers(); | |
defer map.unlockPointers(); | |
// b is guaranteed to be a valid pointer | |
b.value_ptr.* += 1; | |
} | |
} | |
// verify that the map contains the expected values | |
var it = map.iterator(); | |
while (it.next()) |n| { | |
// this assertion should never fail | |
std.debug.assert(n.value_ptr.* == 1); | |
} | |
} | |
// Possible fix #2: can we create a safer abstraction that developers to the "pit of success"? | |
fn poc_safe(iterations: u64, alloc: std.mem.Allocator) !void { | |
var map = SafeHashMap(u64, u64).init(alloc); | |
defer map.deinit(); | |
for (0..iterations) |n| { | |
// inc is a function that takes pointers to the key and value | |
// locks pointers before calling inc, and unlocks them after | |
// therefore inc can rely on pointers being valid | |
// inc is comptime, so no function pointer overhead? (unverified) | |
try map.getOrPutValue(n, 0, inc); | |
// although it's quite verbose, we can also specify the function directly at the callsite | |
// as shown in the following example, this pattern works for getOrPut too | |
// locks pointers before calling valueFn, and unlocks them after | |
// therefore valueFn can rely on pointers being valid | |
// valueFn is comptime, so no function pointer overhead (unverified) | |
try map.getOrPut(iterations + n, struct { | |
fn valueFn(found_existing: bool, _: *u64, value_ptr: *u64) anyerror!void { | |
if (!found_existing) value_ptr.* = 0; | |
value_ptr.* += 1; | |
} | |
}.valueFn); | |
} | |
// verify that the map contains the expected values | |
var it = map.iterator(); | |
while (it.next()) |n| { | |
// this assertion should never fail | |
std.debug.assert(n.value_ptr.* == 1); | |
} | |
} | |
fn inc(_: *u64, value_ptr: *u64) anyerror!void { | |
value_ptr.* += 1; | |
} | |
test "works fine if hashmap is not resized" { | |
try poc(12, std.testing.allocator); | |
} | |
test "segfaults with testing allocator when hashmap is resized" { | |
// uncomment to observe segfault | |
// try poc(13, std.testing.allocator); | |
} | |
test "gives incorrect result with arena allocator" { | |
var arena = std.heap.ArenaAllocator.init(std.testing.allocator); | |
const aa = arena.allocator(); | |
defer arena.deinit(); | |
try std.testing.expectError(error.TestUnexpectedResult, poc(13, aa)); | |
} | |
test "using lexical scope and lockPointers" { | |
try poc_blocks(13, std.testing.allocator); | |
} | |
test "SafeHashMap" { | |
try poc_safe(13, std.testing.allocator); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const std = @import("std"); | |
/// An experimental wrapper around std.AutoHashMap(K, V) that makes it harder to shoot onself in the foot. | |
pub fn SafeHashMap(comptime K: type, comptime V: type) type { | |
return struct { | |
const Self = @This(); | |
map: std.AutoHashMap(K, V), | |
pub fn init(allocator: std.mem.Allocator) Self { | |
return .{ .map = std.AutoHashMap(K, V).init(allocator) }; | |
} | |
pub fn deinit(self: *Self) void { | |
self.map.deinit(); | |
self.* = undefined; | |
} | |
// If `key` does not exist in the map, a new entry mapping it to `value` is inserted. | |
// In any case, valueFn is invoked with pointers to the key and value and may mutate them. | |
pub fn getOrPutValue(self: *Self, key: K, value: V, comptime valueFn: fn (*K, *V) anyerror!void) !void { | |
const entry = try self.map.getOrPutValue(key, value); | |
self.map.lockPointers(); | |
defer self.map.unlockPointers(); | |
try valueFn(entry.key_ptr, entry.value_ptr); | |
} | |
// If `key` does not exist in the map, a new entry mapping it to `undefined` is inserted. | |
// In any case, `valueFn` is invoked with 3 arguments: | |
// 1. `found_existing`, a boolean indicating whether an existing entry was found. | |
// 2. `key_ptr`, a pointer to the key within the map. | |
// 3. `value_ptr`, a pointer to the value within the map, possibly undefined. | |
// If `found_existing` is `false, `value` is `undefined` and must be initialized by `valueFn`. | |
pub fn getOrPut(self: *Self, key: K, comptime valueFn: fn (bool, *K, *V) anyerror!void) !void { | |
const result = try self.map.getOrPut(key); | |
self.map.lockPointers(); | |
defer self.map.unlockPointers(); | |
try valueFn(result.found_existing, result.key_ptr, result.value_ptr); | |
} | |
pub fn iterator(self: *Self) std.AutoHashMap(K, V).Iterator { | |
return self.map.iterator(); | |
} | |
}; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment