Skip to content

Instantly share code, notes, and snippets.

@AndreyArthur
Created May 11, 2024 20:03
Show Gist options
  • Save AndreyArthur/1faac27e88af0175080553e7354c1b41 to your computer and use it in GitHub Desktop.
Save AndreyArthur/1faac27e88af0175080553e7354c1b41 to your computer and use it in GitHub Desktop.
Zig strings beginner cheat-sheet.
// When I was starting to learn Zig, strings were such a pain in the ass, so I
// made this little guide for you to understand better what each type of string
// does and why some of them seems to be "wrong".
//
// I will assume that you already know how pointers, const, var, comptime and
// the stack works. You don't need to be an expert, but I will not waste time
// detailing these concepts.
//
// Sorry for the English mistakes, I'm far from being a fluent English speaker,
// but what counts is the information.
const std = @import("std");
pub fn main() void {}
test "lists" {
// Before jumping to strings we must know what each kind of list does in
// Zig.
//
// Zig does not have a string type, all the types that can represent a
// string in Zig are kinds of list types. Three of them specially.
// Arrays
//
// Arrays in Zig are values, it means that an array variable has all the
// items of an array, not a pointer to the first element, like it would be
// in C.
//
// Arrays must have a fixed length known at compile time. The compiler also
// can infer the length if an underscore is given.
var array = [4]i32{ -1, 0, 1, 2 };
try std.testing.expect(@TypeOf(array) == [4]i32);
const array_infered = [_]i32{ 1, 2 };
try std.testing.expect(@TypeOf(array_infered) == [2]i32);
// Many-item pointers
//
// There are many times we don't know exactly how many items are in an array
// so, because Zig arrays need to have a length known at compile time, we
// use another type, called many-item pointer.
//
// A many-item pointer is a single pointer to the first element of an
// unknown contiguous space of memory.
//
// A simple way to understand a many-item pointer is to think of it as a C
// array. Like:
//
// int items[] = {1, 2, 3, 4, 5};
//
// The `items` variable is just a pointer to `items[0]` and we are unable to
// get the length of `items` unless we know this information at compile
// time.
//
// The main difference between a C array and a Zig many-item pointer is that
// you can not use deference with offset, like you can do in C. For example:
//
// | ----------------------------|-------------------------------------|
// | C | Zig |
// | ----------------------------|-------------------------------------|
// | items[0] == 1 : Ok | items[0] == 1 : Ok |
// | *(items) == 1 : Ok | items.* == 1 : Illegal |
// | *(items + 1) == 2 : Ok | (items + 1).* == 2 : Illegal |
// | 0[items] == 1 : Ok | 0[items] == 1 : Illegal |
// | items++; items[0] == 2 : Ok | items += 1; items[0] == 2 : Ok |
// | ----------------------------|-------------------------------------|
//
// Many-item pointers are very often used to integrate with C. Given that
// their nature is basically the same. You almost never want to declare a
// many-item pointer in your Zig code.
// In Zig, a value of a literal is automatically comptime and then enforced
// to be runtime constant, so, this time, we will declare the an array as a
// var and convert to a many-item pointer after.
var manyitem_array = [_]i32{ 1, 2, 3, 4, 5 };
const manyitem = (&manyitem_array).ptr;
try std.testing.expect(@TypeOf(manyitem) == [*]i32);
const manyitem_comptime_literal = (&[_]i32{ 1, 2, 3, 4, 5 }).ptr;
try std.testing.expect(@TypeOf(manyitem_comptime_literal) == [*]const i32);
// Slices
//
// Finally we reached the cool part, a slice is just a pointer to memory
// that already exists attached to a length. you can think of a slice as a
// struct. Like:
//
// fn Slice(comptime T: type) type {
// return struct {
// ptr = *T,
// len = usize,
// }
// }
//
// But instead of being a struct, its a type managed by the Zig internals.
// There are many ways to coerce many types into slices, but the simplest is
// to just declare the type and pass a range.
// Note that it only works because the `array` variable is a var, if it was
// a const, it would must be coerced into a []const u8 (slice of constants).
const slice_from_array: []i32 = array[0..4];
try std.testing.expect(@TypeOf(slice_from_array) == []i32);
try std.testing.expect(slice_from_array.len == 4);
try std.testing.expect(
@intFromPtr(slice_from_array.ptr) == @intFromPtr(&array),
);
// However, we still can coerce a var into a []const u8 (slice of
// constants).
const slice_of_constants_from_array: []const i32 = array[0..4];
try std.testing.expect(
@TypeOf(slice_of_constants_from_array) == []const i32,
);
try std.testing.expect(slice_of_constants_from_array.len == 4);
try std.testing.expect(
@intFromPtr(slice_of_constants_from_array.ptr) == @intFromPtr(&array),
);
// Zig always lets you be "more safe" and never "less safe." - 40, Ziglings.
// Wait, what is this? We are getting a non const value from a const
// many-item pointer? Yes! The many-item pointer does not hold the value,
// the value is holded by the array itself, that is a var, not a const!
const slice_of_manyitem: []i32 = manyitem[0..5];
try std.testing.expect(@TypeOf(slice_of_manyitem) == []i32);
try std.testing.expect(slice_of_manyitem.len == 5);
try std.testing.expect(
@intFromPtr(slice_of_manyitem.ptr) == @intFromPtr(manyitem),
);
// Other ways to coerce slices
const manual_full_range_slice: []i32 = array[0..4];
const auto_full_range_slice: []i32 = array[0..];
const pointer_syntax_slice: []i32 = &array;
const manual_restricted_range_slice: []i32 = array[1..3];
try std.testing.expectEqualSlices(
i32,
manual_full_range_slice,
auto_full_range_slice,
);
try std.testing.expectEqualSlices(
i32,
auto_full_range_slice,
pointer_syntax_slice,
);
try std.testing.expect(manual_restricted_range_slice[0] == 0);
try std.testing.expect(manual_restricted_range_slice[1] == 1);
try std.testing.expect(manual_restricted_range_slice.len == 2);
// So, to make it clear.
//
// When you see a [<num>]<type> it's an array.
// When you see a [*]<type> it's a many-item pointer.
// When you see a []<type> it's a slice.
}
test "sentinels" {
// A sentinel is a value that marks the end of a list, arrays, many-item
// pointers and slices can have sentinels.
//
// It's very common to lists representing strings to have a `0` sentinel.
//
// Note that the sentinel does not affect the length of the list.
const literal = "Hello, World!";
try std.testing.expect(@TypeOf(literal) == *const [13:0]u8);
try std.testing.expect(literal.len == 13);
var array = literal.*;
try std.testing.expect(@TypeOf(array) == [13:0]u8);
try std.testing.expect(array.len == 13);
const manyitem = (&array).ptr;
try std.testing.expect(@TypeOf(manyitem) == [*:0]u8);
// Sentinels are specially useful with many-item pointers, because with the
// sentinel, format functions (and us, in our programs) can "detect" the
// end of a a many-item pointer list. Otherwise we would need to know the
// length at compile time or write more complex code to detect when we have
// got to the end of a many-item pointer list.
}
test "literals" {
// Finally reached strings, now we will talk about string literals and some
// strategies to deal with them.
//
// String literals in Zig are COMPTIME NOT STACK ALLOCATED values.
//
// And that's why they are a pointers to constant u8 arrays terminated with
// `0` sentinel. (*const [<len>:0]u8).
// You declare a literal like this.
const literal = "Hello, World!";
try std.testing.expect(@TypeOf(literal) == *const [13:0]u8);
// Actually this not a deference, it's making a stack copy of the literal.
_ = literal.*;
// Let's coerce (or copy) it to all kinds of lists.
// Directly
const array_sentinel = literal.*;
try std.testing.expect(@TypeOf(array_sentinel) == [13:0]u8);
const array: [13]u8 = literal.*;
try std.testing.expect(@TypeOf(array) == [13]u8);
const manyitem_to_const_sentinel: [*:0]const u8 = literal;
try std.testing.expect(
@TypeOf(manyitem_to_const_sentinel) == [*:0]const u8,
);
const manyitem_to_const: [*]const u8 = literal;
try std.testing.expect(@TypeOf(manyitem_to_const) == [*]const u8);
const slice_to_const_sentinel: [:0]const u8 = literal;
try std.testing.expect(
@TypeOf(slice_to_const_sentinel) == [:0]const u8,
);
const slice_to_const: []const u8 = literal;
try std.testing.expect(
@TypeOf(slice_to_const) == []const u8,
);
// The other types will need a var array.
// Yes things like `&(literal.*)` wont work.
var var_array = literal.*;
const manyitem_sentinel: [*:0]u8 = &var_array;
try std.testing.expect(@TypeOf(manyitem_sentinel) == [*:0]u8);
const manyitem: [*]u8 = &var_array;
try std.testing.expect(@TypeOf(manyitem) == [*]u8);
const slice: []u8 = &var_array;
try std.testing.expect(@TypeOf(slice) == []u8);
const slice_sentinel: [:0]u8 = &var_array;
try std.testing.expect(@TypeOf(slice_sentinel) == [:0]u8);
// Ensure all of them are equal.
try std.testing.expectEqualStrings(literal, &array_sentinel);
try std.testing.expectEqualStrings(literal, &array);
try std.testing.expectEqualStrings(
literal,
manyitem_to_const_sentinel[0..13],
);
try std.testing.expectEqualStrings(literal, manyitem_to_const[0..13]);
try std.testing.expectEqualStrings(literal, slice_to_const_sentinel);
try std.testing.expectEqualStrings(literal, slice_to_const);
try std.testing.expectEqualStrings(literal, manyitem_sentinel[0..13]);
try std.testing.expectEqualStrings(literal, manyitem[0..13]);
try std.testing.expectEqualStrings(literal, slice);
try std.testing.expectEqualStrings(literal, slice_sentinel);
}
test "function return mistakes" {
const returns = struct {
// We have already saw that a slice does not hold any value from an
// array it just points to a certain item and have a length.
//
// So, if you want to return an in function stack allocated slice, you
// will simply end up with garbage, because the function call stack will
// be cleaned.
fn slice() []u8 {
var slice_array = [_]u8{ 'h', 'e', 'l', 'l', 'o' };
return &slice_array;
}
// Many-item pointers also don't hold memory, they're just pointers.
//
// Many-item pointer to stack allocated arrays also have the value it
// points thrown away when the function ends.
fn manyitem() [*]u8 {
var manyitem_array = [_]u8{ 'h', 'e', 'l', 'l', 'o' };
return (&manyitem_array).ptr;
}
// So that will also not work. Yes? No! I've already told you that Zig
// string literals are comptime and not stack allocated. So, they will
// not go away when the function ends!
fn literal() []const u8 {
const my_literal = "hello";
return my_literal;
}
// So it will work? Yes! But this time is because we're returning an
// array. That IS a value itself. It not points to a value, it is a real
// value.
fn array() [5]u8 {
return [_]u8{ 'h', 'e', 'l', 'l', 'o' };
}
};
try std.testing.expect(
std.mem.eql(u8, "hello", returns.slice()) == false,
);
try std.testing.expect(
std.mem.eql(u8, "hello", returns.manyitem()[0..5]) == false,
);
try std.testing.expect(
std.mem.eql(u8, "hello", returns.literal()) == true,
);
try std.testing.expect(
std.mem.eql(u8, "hello", &returns.array()) == true,
);
}
test "passing around: buffers" {
const returns = struct {
// If you pass a buffer and change it, you CAN return a slice of THIS
// buffer, because it's not part of this function stack. The given
// buffer is part of the "upper" block, it will just thrown away when
// the "upper" block ends.
fn slice(buffer: []u8) []u8 {
buffer[0] = 'e';
buffer[1] = 'a';
buffer[2] = 'r';
buffer[3] = 't';
buffer[4] = 'h';
return buffer;
}
// Same thing, if we receive a buffer, we can return a many-item pointer
// because it will just point to the buffer value;
fn manyitem(buffer: []u8) [*]u8 {
return (&buffer).ptr;
}
// Where's the literal? It's a little obvious, they're not stack
// allocated and they are comptime, we can not assign any runtime value
// into a literal.
// It returns a copy of the buffer, not a reference to it. Cool, no?
fn array(buffer: []u8) [5]u8 {
var arr: [5]u8 = undefined;
arr[0] = buffer[0];
arr[1] = buffer[1];
arr[2] = buffer[2];
arr[3] = buffer[3];
arr[4] = buffer[4];
return arr;
}
};
var buffer: [5]u8 = undefined;
try std.testing.expectEqualStrings(returns.slice(&buffer), "earth");
try std.testing.expectEqualStrings(
returns.manyitem(&buffer)[0..5],
"earth",
);
var array = returns.array(&buffer);
try std.testing.expectEqualStrings(&array, "earth");
array[0] = 'h';
array[1] = 'e';
array[2] = 'l';
array[3] = 'l';
array[4] = 'o';
try std.testing.expectEqualStrings(&buffer, "earth");
try std.testing.expectEqualStrings(&array, "hello");
}
test "passing around: allocators" {
// With allocators there's no secret, just alloc then return. You simply
// don't need to stress with the stack thing anymore.
//
// Remember to free!
const returns = struct {
fn slice(allocator: std.mem.Allocator) []u8 {
var string = allocator.alloc(u8, 5) catch unreachable;
string[0] = 'h';
string[1] = 'e';
string[2] = 'l';
string[3] = 'l';
string[4] = 'o';
return string;
}
fn manyitem(allocator: std.mem.Allocator) [*]u8 {
var string = allocator.alloc(u8, 5) catch unreachable;
string[0] = 'h';
string[1] = 'e';
string[2] = 'l';
string[3] = 'l';
string[4] = 'o';
return string.ptr;
}
};
const allocator = std.testing.allocator;
const slice = returns.slice(allocator);
defer allocator.free(slice);
const manyitem = returns.manyitem(allocator);
defer allocator.free(manyitem[0..5]);
try std.testing.expectEqualStrings(slice, "hello");
try std.testing.expectEqualStrings(manyitem[0..5], "hello");
}
test "some tips" {
// These are the functions that I use more often, check std.mem and see if
// it has something that can help you before trying to reinvent the string
// wheel.
{
// Copy into a buffer. Useful when you have a literal and an array.
var buffer: [5]u8 = undefined;
std.mem.copyForwards(u8, &buffer, "hello");
try std.testing.expectEqualStrings(&buffer, "hello");
}
{
// Copy with allocator.
const slice = try std.testing.allocator.dupe(u8, "hello");
defer std.testing.allocator.free(slice);
try std.testing.expectEqualStrings(slice, "hello");
}
{
// Initialize with zeroes. If you don't want memory "garbage".
var buffer = std.mem.zeroes([64]u8);
std.mem.copyForwards(u8, &buffer, "hello");
var last: usize = undefined;
for (buffer, 0..) |character, index| {
if (character == '\x00') {
last = index;
break;
}
}
try std.testing.expectEqualStrings(buffer[0..last], "hello");
}
{
// Are equal?
try std.testing.expect(std.mem.eql(u8, "mars", "earth") == false);
try std.testing.expect(std.mem.eql(u8, "mars", "mars") == true);
}
}
@kartikynwa
Copy link

Thank you very much for this.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment