Created
May 11, 2024 20:03
-
-
Save AndreyArthur/1faac27e88af0175080553e7354c1b41 to your computer and use it in GitHub Desktop.
Zig strings beginner cheat-sheet.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// When I was starting to learn Zig, strings were such a pain in the ass, so I | |
// made this little guide for you to understand better what each type of string | |
// does and why some of them seems to be "wrong". | |
// | |
// I will assume that you already know how pointers, const, var, comptime and | |
// the stack works. You don't need to be an expert, but I will not waste time | |
// detailing these concepts. | |
// | |
// Sorry for the English mistakes, I'm far from being a fluent English speaker, | |
// but what counts is the information. | |
const std = @import("std"); | |
pub fn main() void {} | |
test "lists" { | |
// Before jumping to strings we must know what each kind of list does in | |
// Zig. | |
// | |
// Zig does not have a string type, all the types that can represent a | |
// string in Zig are kinds of list types. Three of them specially. | |
// Arrays | |
// | |
// Arrays in Zig are values, it means that an array variable has all the | |
// items of an array, not a pointer to the first element, like it would be | |
// in C. | |
// | |
// Arrays must have a fixed length known at compile time. The compiler also | |
// can infer the length if an underscore is given. | |
var array = [4]i32{ -1, 0, 1, 2 }; | |
try std.testing.expect(@TypeOf(array) == [4]i32); | |
const array_infered = [_]i32{ 1, 2 }; | |
try std.testing.expect(@TypeOf(array_infered) == [2]i32); | |
// Many-item pointers | |
// | |
// There are many times we don't know exactly how many items are in an array | |
// so, because Zig arrays need to have a length known at compile time, we | |
// use another type, called many-item pointer. | |
// | |
// A many-item pointer is a single pointer to the first element of an | |
// unknown contiguous space of memory. | |
// | |
// A simple way to understand a many-item pointer is to think of it as a C | |
// array. Like: | |
// | |
// int items[] = {1, 2, 3, 4, 5}; | |
// | |
// The `items` variable is just a pointer to `items[0]` and we are unable to | |
// get the length of `items` unless we know this information at compile | |
// time. | |
// | |
// The main difference between a C array and a Zig many-item pointer is that | |
// you can not use deference with offset, like you can do in C. For example: | |
// | |
// | ----------------------------|-------------------------------------| | |
// | C | Zig | | |
// | ----------------------------|-------------------------------------| | |
// | items[0] == 1 : Ok | items[0] == 1 : Ok | | |
// | *(items) == 1 : Ok | items.* == 1 : Illegal | | |
// | *(items + 1) == 2 : Ok | (items + 1).* == 2 : Illegal | | |
// | 0[items] == 1 : Ok | 0[items] == 1 : Illegal | | |
// | items++; items[0] == 2 : Ok | items += 1; items[0] == 2 : Ok | | |
// | ----------------------------|-------------------------------------| | |
// | |
// Many-item pointers are very often used to integrate with C. Given that | |
// their nature is basically the same. You almost never want to declare a | |
// many-item pointer in your Zig code. | |
// In Zig, a value of a literal is automatically comptime and then enforced | |
// to be runtime constant, so, this time, we will declare the an array as a | |
// var and convert to a many-item pointer after. | |
var manyitem_array = [_]i32{ 1, 2, 3, 4, 5 }; | |
const manyitem = (&manyitem_array).ptr; | |
try std.testing.expect(@TypeOf(manyitem) == [*]i32); | |
const manyitem_comptime_literal = (&[_]i32{ 1, 2, 3, 4, 5 }).ptr; | |
try std.testing.expect(@TypeOf(manyitem_comptime_literal) == [*]const i32); | |
// Slices | |
// | |
// Finally we reached the cool part, a slice is just a pointer to memory | |
// that already exists attached to a length. you can think of a slice as a | |
// struct. Like: | |
// | |
// fn Slice(comptime T: type) type { | |
// return struct { | |
// ptr = *T, | |
// len = usize, | |
// } | |
// } | |
// | |
// But instead of being a struct, its a type managed by the Zig internals. | |
// There are many ways to coerce many types into slices, but the simplest is | |
// to just declare the type and pass a range. | |
// Note that it only works because the `array` variable is a var, if it was | |
// a const, it would must be coerced into a []const u8 (slice of constants). | |
const slice_from_array: []i32 = array[0..4]; | |
try std.testing.expect(@TypeOf(slice_from_array) == []i32); | |
try std.testing.expect(slice_from_array.len == 4); | |
try std.testing.expect( | |
@intFromPtr(slice_from_array.ptr) == @intFromPtr(&array), | |
); | |
// However, we still can coerce a var into a []const u8 (slice of | |
// constants). | |
const slice_of_constants_from_array: []const i32 = array[0..4]; | |
try std.testing.expect( | |
@TypeOf(slice_of_constants_from_array) == []const i32, | |
); | |
try std.testing.expect(slice_of_constants_from_array.len == 4); | |
try std.testing.expect( | |
@intFromPtr(slice_of_constants_from_array.ptr) == @intFromPtr(&array), | |
); | |
// Zig always lets you be "more safe" and never "less safe." - 40, Ziglings. | |
// Wait, what is this? We are getting a non const value from a const | |
// many-item pointer? Yes! The many-item pointer does not hold the value, | |
// the value is holded by the array itself, that is a var, not a const! | |
const slice_of_manyitem: []i32 = manyitem[0..5]; | |
try std.testing.expect(@TypeOf(slice_of_manyitem) == []i32); | |
try std.testing.expect(slice_of_manyitem.len == 5); | |
try std.testing.expect( | |
@intFromPtr(slice_of_manyitem.ptr) == @intFromPtr(manyitem), | |
); | |
// Other ways to coerce slices | |
const manual_full_range_slice: []i32 = array[0..4]; | |
const auto_full_range_slice: []i32 = array[0..]; | |
const pointer_syntax_slice: []i32 = &array; | |
const manual_restricted_range_slice: []i32 = array[1..3]; | |
try std.testing.expectEqualSlices( | |
i32, | |
manual_full_range_slice, | |
auto_full_range_slice, | |
); | |
try std.testing.expectEqualSlices( | |
i32, | |
auto_full_range_slice, | |
pointer_syntax_slice, | |
); | |
try std.testing.expect(manual_restricted_range_slice[0] == 0); | |
try std.testing.expect(manual_restricted_range_slice[1] == 1); | |
try std.testing.expect(manual_restricted_range_slice.len == 2); | |
// So, to make it clear. | |
// | |
// When you see a [<num>]<type> it's an array. | |
// When you see a [*]<type> it's a many-item pointer. | |
// When you see a []<type> it's a slice. | |
} | |
test "sentinels" { | |
// A sentinel is a value that marks the end of a list, arrays, many-item | |
// pointers and slices can have sentinels. | |
// | |
// It's very common to lists representing strings to have a `0` sentinel. | |
// | |
// Note that the sentinel does not affect the length of the list. | |
const literal = "Hello, World!"; | |
try std.testing.expect(@TypeOf(literal) == *const [13:0]u8); | |
try std.testing.expect(literal.len == 13); | |
var array = literal.*; | |
try std.testing.expect(@TypeOf(array) == [13:0]u8); | |
try std.testing.expect(array.len == 13); | |
const manyitem = (&array).ptr; | |
try std.testing.expect(@TypeOf(manyitem) == [*:0]u8); | |
// Sentinels are specially useful with many-item pointers, because with the | |
// sentinel, format functions (and us, in our programs) can "detect" the | |
// end of a a many-item pointer list. Otherwise we would need to know the | |
// length at compile time or write more complex code to detect when we have | |
// got to the end of a many-item pointer list. | |
} | |
test "literals" { | |
// Finally reached strings, now we will talk about string literals and some | |
// strategies to deal with them. | |
// | |
// String literals in Zig are COMPTIME NOT STACK ALLOCATED values. | |
// | |
// And that's why they are a pointers to constant u8 arrays terminated with | |
// `0` sentinel. (*const [<len>:0]u8). | |
// You declare a literal like this. | |
const literal = "Hello, World!"; | |
try std.testing.expect(@TypeOf(literal) == *const [13:0]u8); | |
// Actually this not a deference, it's making a stack copy of the literal. | |
_ = literal.*; | |
// Let's coerce (or copy) it to all kinds of lists. | |
// Directly | |
const array_sentinel = literal.*; | |
try std.testing.expect(@TypeOf(array_sentinel) == [13:0]u8); | |
const array: [13]u8 = literal.*; | |
try std.testing.expect(@TypeOf(array) == [13]u8); | |
const manyitem_to_const_sentinel: [*:0]const u8 = literal; | |
try std.testing.expect( | |
@TypeOf(manyitem_to_const_sentinel) == [*:0]const u8, | |
); | |
const manyitem_to_const: [*]const u8 = literal; | |
try std.testing.expect(@TypeOf(manyitem_to_const) == [*]const u8); | |
const slice_to_const_sentinel: [:0]const u8 = literal; | |
try std.testing.expect( | |
@TypeOf(slice_to_const_sentinel) == [:0]const u8, | |
); | |
const slice_to_const: []const u8 = literal; | |
try std.testing.expect( | |
@TypeOf(slice_to_const) == []const u8, | |
); | |
// The other types will need a var array. | |
// Yes things like `&(literal.*)` wont work. | |
var var_array = literal.*; | |
const manyitem_sentinel: [*:0]u8 = &var_array; | |
try std.testing.expect(@TypeOf(manyitem_sentinel) == [*:0]u8); | |
const manyitem: [*]u8 = &var_array; | |
try std.testing.expect(@TypeOf(manyitem) == [*]u8); | |
const slice: []u8 = &var_array; | |
try std.testing.expect(@TypeOf(slice) == []u8); | |
const slice_sentinel: [:0]u8 = &var_array; | |
try std.testing.expect(@TypeOf(slice_sentinel) == [:0]u8); | |
// Ensure all of them are equal. | |
try std.testing.expectEqualStrings(literal, &array_sentinel); | |
try std.testing.expectEqualStrings(literal, &array); | |
try std.testing.expectEqualStrings( | |
literal, | |
manyitem_to_const_sentinel[0..13], | |
); | |
try std.testing.expectEqualStrings(literal, manyitem_to_const[0..13]); | |
try std.testing.expectEqualStrings(literal, slice_to_const_sentinel); | |
try std.testing.expectEqualStrings(literal, slice_to_const); | |
try std.testing.expectEqualStrings(literal, manyitem_sentinel[0..13]); | |
try std.testing.expectEqualStrings(literal, manyitem[0..13]); | |
try std.testing.expectEqualStrings(literal, slice); | |
try std.testing.expectEqualStrings(literal, slice_sentinel); | |
} | |
test "function return mistakes" { | |
const returns = struct { | |
// We have already saw that a slice does not hold any value from an | |
// array it just points to a certain item and have a length. | |
// | |
// So, if you want to return an in function stack allocated slice, you | |
// will simply end up with garbage, because the function call stack will | |
// be cleaned. | |
fn slice() []u8 { | |
var slice_array = [_]u8{ 'h', 'e', 'l', 'l', 'o' }; | |
return &slice_array; | |
} | |
// Many-item pointers also don't hold memory, they're just pointers. | |
// | |
// Many-item pointer to stack allocated arrays also have the value it | |
// points thrown away when the function ends. | |
fn manyitem() [*]u8 { | |
var manyitem_array = [_]u8{ 'h', 'e', 'l', 'l', 'o' }; | |
return (&manyitem_array).ptr; | |
} | |
// So that will also not work. Yes? No! I've already told you that Zig | |
// string literals are comptime and not stack allocated. So, they will | |
// not go away when the function ends! | |
fn literal() []const u8 { | |
const my_literal = "hello"; | |
return my_literal; | |
} | |
// So it will work? Yes! But this time is because we're returning an | |
// array. That IS a value itself. It not points to a value, it is a real | |
// value. | |
fn array() [5]u8 { | |
return [_]u8{ 'h', 'e', 'l', 'l', 'o' }; | |
} | |
}; | |
try std.testing.expect( | |
std.mem.eql(u8, "hello", returns.slice()) == false, | |
); | |
try std.testing.expect( | |
std.mem.eql(u8, "hello", returns.manyitem()[0..5]) == false, | |
); | |
try std.testing.expect( | |
std.mem.eql(u8, "hello", returns.literal()) == true, | |
); | |
try std.testing.expect( | |
std.mem.eql(u8, "hello", &returns.array()) == true, | |
); | |
} | |
test "passing around: buffers" { | |
const returns = struct { | |
// If you pass a buffer and change it, you CAN return a slice of THIS | |
// buffer, because it's not part of this function stack. The given | |
// buffer is part of the "upper" block, it will just thrown away when | |
// the "upper" block ends. | |
fn slice(buffer: []u8) []u8 { | |
buffer[0] = 'e'; | |
buffer[1] = 'a'; | |
buffer[2] = 'r'; | |
buffer[3] = 't'; | |
buffer[4] = 'h'; | |
return buffer; | |
} | |
// Same thing, if we receive a buffer, we can return a many-item pointer | |
// because it will just point to the buffer value; | |
fn manyitem(buffer: []u8) [*]u8 { | |
return (&buffer).ptr; | |
} | |
// Where's the literal? It's a little obvious, they're not stack | |
// allocated and they are comptime, we can not assign any runtime value | |
// into a literal. | |
// It returns a copy of the buffer, not a reference to it. Cool, no? | |
fn array(buffer: []u8) [5]u8 { | |
var arr: [5]u8 = undefined; | |
arr[0] = buffer[0]; | |
arr[1] = buffer[1]; | |
arr[2] = buffer[2]; | |
arr[3] = buffer[3]; | |
arr[4] = buffer[4]; | |
return arr; | |
} | |
}; | |
var buffer: [5]u8 = undefined; | |
try std.testing.expectEqualStrings(returns.slice(&buffer), "earth"); | |
try std.testing.expectEqualStrings( | |
returns.manyitem(&buffer)[0..5], | |
"earth", | |
); | |
var array = returns.array(&buffer); | |
try std.testing.expectEqualStrings(&array, "earth"); | |
array[0] = 'h'; | |
array[1] = 'e'; | |
array[2] = 'l'; | |
array[3] = 'l'; | |
array[4] = 'o'; | |
try std.testing.expectEqualStrings(&buffer, "earth"); | |
try std.testing.expectEqualStrings(&array, "hello"); | |
} | |
test "passing around: allocators" { | |
// With allocators there's no secret, just alloc then return. You simply | |
// don't need to stress with the stack thing anymore. | |
// | |
// Remember to free! | |
const returns = struct { | |
fn slice(allocator: std.mem.Allocator) []u8 { | |
var string = allocator.alloc(u8, 5) catch unreachable; | |
string[0] = 'h'; | |
string[1] = 'e'; | |
string[2] = 'l'; | |
string[3] = 'l'; | |
string[4] = 'o'; | |
return string; | |
} | |
fn manyitem(allocator: std.mem.Allocator) [*]u8 { | |
var string = allocator.alloc(u8, 5) catch unreachable; | |
string[0] = 'h'; | |
string[1] = 'e'; | |
string[2] = 'l'; | |
string[3] = 'l'; | |
string[4] = 'o'; | |
return string.ptr; | |
} | |
}; | |
const allocator = std.testing.allocator; | |
const slice = returns.slice(allocator); | |
defer allocator.free(slice); | |
const manyitem = returns.manyitem(allocator); | |
defer allocator.free(manyitem[0..5]); | |
try std.testing.expectEqualStrings(slice, "hello"); | |
try std.testing.expectEqualStrings(manyitem[0..5], "hello"); | |
} | |
test "some tips" { | |
// These are the functions that I use more often, check std.mem and see if | |
// it has something that can help you before trying to reinvent the string | |
// wheel. | |
{ | |
// Copy into a buffer. Useful when you have a literal and an array. | |
var buffer: [5]u8 = undefined; | |
std.mem.copyForwards(u8, &buffer, "hello"); | |
try std.testing.expectEqualStrings(&buffer, "hello"); | |
} | |
{ | |
// Copy with allocator. | |
const slice = try std.testing.allocator.dupe(u8, "hello"); | |
defer std.testing.allocator.free(slice); | |
try std.testing.expectEqualStrings(slice, "hello"); | |
} | |
{ | |
// Initialize with zeroes. If you don't want memory "garbage". | |
var buffer = std.mem.zeroes([64]u8); | |
std.mem.copyForwards(u8, &buffer, "hello"); | |
var last: usize = undefined; | |
for (buffer, 0..) |character, index| { | |
if (character == '\x00') { | |
last = index; | |
break; | |
} | |
} | |
try std.testing.expectEqualStrings(buffer[0..last], "hello"); | |
} | |
{ | |
// Are equal? | |
try std.testing.expect(std.mem.eql(u8, "mars", "earth") == false); | |
try std.testing.expect(std.mem.eql(u8, "mars", "mars") == true); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thank you very much for this.