Skip to content

Instantly share code, notes, and snippets.

@maxsei
Created September 17, 2023 20:20
Show Gist options
  • Select an option

  • Save maxsei/f97216d03b0e665674fdd40c1402ee5c to your computer and use it in GitHub Desktop.

Select an option

Save maxsei/f97216d03b0e665674fdd40c1402ee5c to your computer and use it in GitHub Desktop.
two csv parser implemenations in zig
const std = @import("std");
const mem = std.mem;
const Allocator = mem.Allocator;
const log = std.log;
const Csv = struct {
fields: std.ArrayList([][]const u8),
backing: ?[]const u8,
allocator: Allocator,
arena: ?std.heap.ArenaAllocator,
const Self = @This();
pub fn deinit(self: *Self) void {
if (self.backing != null) self.allocator.free(self.backing.?);
if (self.arena != null) self.arena.?.deinit();
self.fields.deinit();
}
};
pub fn GetCsv1(path: []const u8, allocator: Allocator) !Csv {
// Open the file
const file = try std.fs.openFileAbsolute(path, .{});
defer file.close();
var buf = try file.readToEndAllocOptions(allocator, 0xefffffff, 0x4000, @alignOf(u8), null);
// defer allocator.free(buf);
var csv = std.ArrayList([][]const u8).init(allocator);
// defer csv.deinit();
var fields = std.ArrayList([]const u8).init(allocator);
defer fields.deinit();
var cur: []u8 = buf[0..];
while (cur.len > 0) {
var i: usize = 0;
while (cur.len > 0) {
if (cur[i] == '\n') {
try fields.append(cur[0..i]);
try csv.append(try fields.toOwnedSlice());
cur = cur[i + 1 ..];
if (csv.items.len % 10000 == 0) {
log.info("lines: {d} cur.len: {d} i: {d}", .{ csv.items.len, cur.len, i });
}
break;
} else if (cur[i] == ',') {
try fields.append(cur[0..i]);
cur = cur[i + 1 ..];
i = 0;
continue;
}
i += 1;
}
}
return Csv{
.backing = buf,
.fields = csv,
.allocator = allocator,
.arena = null,
};
}
pub fn GetCsv2(path: []const u8, allocator: Allocator) !Csv {
const file = try std.fs.openFileAbsolute(path, .{});
defer file.close();
var buf: [4096]u8 = undefined;
_ = try file.read(buf[0..]);
var fields = std.ArrayList([][]const u8).init(allocator);
var arena = std.heap.ArenaAllocator.init(allocator);
var line = std.ArrayList([]const u8).init(allocator);
defer line.deinit();
const delimeters = ",\n";
var cur: []u8 = buf[0..];
var maybe_extra: ?[]u8 = null;
while (true) {
const tok_end = std.mem.indexOfAny(u8, cur, delimeters) orelse {
if (cur.len == buf.len) {
return error.NotEnoughBufferSpaceToFindAnyTokens;
}
if (cur.len > 0) {
maybe_extra = try arena.allocator().alloc(u8, cur.len);
@memcpy(maybe_extra.?, cur);
}
const n = try file.read(buf[0..]);
if (n == 0) return Csv{
.fields = fields,
.allocator = allocator,
.arena = arena,
.backing = null,
};
cur = buf[0..n];
continue;
};
const tok = cur[0..tok_end];
{
if (maybe_extra != null) {
const extra = maybe_extra.?;
var token_copy = try arena.allocator().realloc(extra, extra.len + tok.len);
@memcpy(token_copy[extra.len..], tok);
try line.append(token_copy);
maybe_extra = null;
} else {
const token_copy = try arena.allocator().alloc(u8, tok.len);
@memcpy(token_copy, tok);
try line.append(token_copy);
}
}
if (cur[tok_end] == '\n') {
try fields.append(try line.toOwnedSlice());
}
cur = cur[tok_end + 1 ..];
}
unreachable;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment