Skip to content

Instantly share code, notes, and snippets.

@archaeologistdev
Last active January 19, 2025 03:44
Show Gist options
  • Save archaeologistdev/91309cf64bf714881371c2c1cc608854 to your computer and use it in GitHub Desktop.
Save archaeologistdev/91309cf64bf714881371c2c1cc608854 to your computer and use it in GitHub Desktop.
const std = @import("std");
// Although this function looks imperative, note that its job is to
// declaratively construct a build graph that will be executed by an external
// runner.
pub fn build(b: *std.Build) void {
// Standard target options allows the person running `zig build` to choose
// what target to build for. Here we do not override the defaults, which
// means any target is allowed, and the default is native. Other options
// for restricting supported target set are available.
const target = b.standardTargetOptions(.{});
// Standard optimization options allow the person running `zig build` to select
// between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
// set a preferred release mode, allowing the user to decide how to optimize.
const optimize = b.standardOptimizeOption(.{});
const lib = b.addStaticLibrary(.{
.name = "safehashmap",
// In this case the main source file is merely a path, however, in more
// complicated build scripts, this could be a generated file.
.root_source_file = b.path("root.zig"),
.target = target,
.optimize = optimize,
});
// This declares intent for the library to be installed into the standard
// location when the user invokes the "install" step (the default step when
// running `zig build`).
b.installArtifact(lib);
// Creates a step for unit testing. This only builds the test executable
// but does not run it.
const lib_unit_tests = b.addTest(.{
.root_source_file = b.path("root.zig"),
.target = target,
.optimize = optimize,
});
const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests);
// Similar to creating the run step earlier, this exposes a `test` step to
// the `zig build --help` menu, providing a way for the user to request
// running the unit tests.
const test_step = b.step("test", "Run unit tests");
test_step.dependOn(&run_lib_unit_tests.step);
}
.{
// This is the default name used by packages depending on this one. For
// example, when a user runs `zig fetch --save <url>`, this field is used
// as the key in the `dependencies` table. Although the user can choose a
// different name, most users will stick with this provided value.
//
// It is redundant to include "zig" in this name because it is already
// within the Zig package namespace.
.name = "safehashmap",
// This is a [Semantic Version](https://semver.org/).
// In a future version of Zig it will be used for package deduplication.
.version = "0.0.0",
// This field is optional.
// This is currently advisory only; Zig does not yet do anything
// with this value.
.minimum_zig_version = "0.13.0",
// This field is optional.
// Each dependency must either provide a `url` and `hash`, or a `path`.
// `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
// Once all dependencies are fetched, `zig build` no longer requires
// internet connectivity.
.dependencies = .{
// See `zig fetch --save <url>` for a command-line interface for adding dependencies.
//.example = .{
// // When updating this field to a new URL, be sure to delete the corresponding
// // `hash`, otherwise you are communicating that you expect to find the old hash at
// // the new URL.
// .url = "https://example.com/foo.tar.gz",
//
// // This is computed from the file contents of the directory of files that is
// // obtained after fetching `url` and applying the inclusion rules given by
// // `paths`.
// //
// // This field is the source of truth; packages do not come from a `url`; they
// // come from a `hash`. `url` is just one of many possible mirrors for how to
// // obtain a package matching this `hash`.
// //
// // Uses the [multihash](https://multiformats.io/multihash/) format.
// .hash = "...",
//
// // When this is provided, the package is found in a directory relative to the
// // build root. In this case the package's hash is irrelevant and therefore not
// // computed. This field and `url` are mutually exclusive.
// .path = "foo",
// // When this is set to `true`, a package is declared to be lazily
// // fetched. This makes the dependency only get fetched if it is
// // actually used.
// .lazy = false,
//},
},
// Specifies the set of files and directories that are included in this package.
// Only files and directories listed here are included in the `hash` that
// is computed for this package. Only files listed here will remain on disk
// when using the zig package manager. As a rule of thumb, one should list
// files required for compilation plus any license(s).
// Paths are relative to the build root. Use the empty string (`""`) to refer to
// the build root itself.
// A directory listed here means that all files within, recursively, are included.
.paths = .{
"build.zig",
"build.zig.zon",
"src",
// For example...
//"LICENSE",
//"README.md",
},
}
const std = @import("std");
const SafeHashMap = @import("./safe_hash_map.zig").SafeHashMap;
const testing = std.testing;
/// Proof of concept illustrating incorrect use of std.HashMap
fn poc(iterations: u64, alloc: std.mem.Allocator) !void {
var map = std.AutoHashMap(u64, u64).init(alloc);
defer map.deinit();
for (0..iterations) |n| {
// may reallocate map's storage
const a = try map.getOrPutValue(n, 0);
// may reallocate map's storage, invalidating a.value_ptr
const b = try map.getOrPutValue(iterations + n, 0);
// BUG! a.value_ptr may have been invalidated by second call to getOrPutValue
a.value_ptr.* += 1;
// in contrast, b.value_ptr is okay to dereference
b.value_ptr.* += 1;
}
// verify that the map contains the expected values
var it = map.iterator();
while (it.next()) |n| {
// this assertion can fail if the hashmap was resized
try std.testing.expect(n.value_ptr.* == 1);
}
}
// Possible fix #1: leverage lexical scopes to limit pointer lifetimes
// Problem: caller needs to remember to do this
fn poc_blocks(iterations: u64, alloc: std.mem.Allocator) !void {
var map = std.AutoHashMap(u64, u64).init(alloc);
defer map.deinit();
for (0..iterations) |n| {
{
// may reallocate map's storage
const a = try map.getOrPutValue(n, 0);
// before we access pointers, guard against invalidating them
map.lockPointers();
defer map.unlockPointers();
// a is guaranteed to be a valid pointer
a.value_ptr.* += 1;
}
{
// may reallocate map's storage, this invalidates a.value_ptr but that's okay as a is already out of scope here
const b = try map.getOrPutValue(iterations + n, 0);
// before we access pointers, guard against invalidating them
map.lockPointers();
defer map.unlockPointers();
// b is guaranteed to be a valid pointer
b.value_ptr.* += 1;
}
}
// verify that the map contains the expected values
var it = map.iterator();
while (it.next()) |n| {
// this assertion should never fail
std.debug.assert(n.value_ptr.* == 1);
}
}
// Possible fix #2: can we create a safer abstraction that developers to the "pit of success"?
fn poc_safe(iterations: u64, alloc: std.mem.Allocator) !void {
var map = SafeHashMap(u64, u64).init(alloc);
defer map.deinit();
for (0..iterations) |n| {
// inc is a function that takes pointers to the key and value
// locks pointers before calling inc, and unlocks them after
// therefore inc can rely on pointers being valid
// inc is comptime, so no function pointer overhead? (unverified)
try map.getOrPutValue(n, 0, inc);
// although it's quite verbose, we can also specify the function directly at the callsite
// as shown in the following example, this pattern works for getOrPut too
// locks pointers before calling valueFn, and unlocks them after
// therefore valueFn can rely on pointers being valid
// valueFn is comptime, so no function pointer overhead (unverified)
try map.getOrPut(iterations + n, struct {
fn valueFn(found_existing: bool, _: *u64, value_ptr: *u64) anyerror!void {
if (!found_existing) value_ptr.* = 0;
value_ptr.* += 1;
}
}.valueFn);
}
// verify that the map contains the expected values
var it = map.iterator();
while (it.next()) |n| {
// this assertion should never fail
std.debug.assert(n.value_ptr.* == 1);
}
}
fn inc(_: *u64, value_ptr: *u64) anyerror!void {
value_ptr.* += 1;
}
test "works fine if hashmap is not resized" {
try poc(12, std.testing.allocator);
}
test "segfaults with testing allocator when hashmap is resized" {
// uncomment to observe segfault
// try poc(13, std.testing.allocator);
}
test "gives incorrect result with arena allocator" {
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
const aa = arena.allocator();
defer arena.deinit();
try std.testing.expectError(error.TestUnexpectedResult, poc(13, aa));
}
test "using lexical scope and lockPointers" {
try poc_blocks(13, std.testing.allocator);
}
test "SafeHashMap" {
try poc_safe(13, std.testing.allocator);
}
const std = @import("std");
/// An experimental wrapper around std.AutoHashMap(K, V) that makes it harder to shoot onself in the foot.
pub fn SafeHashMap(comptime K: type, comptime V: type) type {
return struct {
const Self = @This();
map: std.AutoHashMap(K, V),
pub fn init(allocator: std.mem.Allocator) Self {
return .{ .map = std.AutoHashMap(K, V).init(allocator) };
}
pub fn deinit(self: *Self) void {
self.map.deinit();
self.* = undefined;
}
// If `key` does not exist in the map, a new entry mapping it to `value` is inserted.
// In any case, valueFn is invoked with pointers to the key and value and may mutate them.
pub fn getOrPutValue(self: *Self, key: K, value: V, comptime valueFn: fn (*K, *V) anyerror!void) !void {
const entry = try self.map.getOrPutValue(key, value);
self.map.lockPointers();
defer self.map.unlockPointers();
try valueFn(entry.key_ptr, entry.value_ptr);
}
// If `key` does not exist in the map, a new entry mapping it to `undefined` is inserted.
// In any case, `valueFn` is invoked with 3 arguments:
// 1. `found_existing`, a boolean indicating whether an existing entry was found.
// 2. `key_ptr`, a pointer to the key within the map.
// 3. `value_ptr`, a pointer to the value within the map, possibly undefined.
// If `found_existing` is `false, `value` is `undefined` and must be initialized by `valueFn`.
pub fn getOrPut(self: *Self, key: K, comptime valueFn: fn (bool, *K, *V) anyerror!void) !void {
const result = try self.map.getOrPut(key);
self.map.lockPointers();
defer self.map.unlockPointers();
try valueFn(result.found_existing, result.key_ptr, result.value_ptr);
}
pub fn iterator(self: *Self) std.AutoHashMap(K, V).Iterator {
return self.map.iterator();
}
};
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment