Skip to content

Instantly share code, notes, and snippets.

@cfr
Last active December 3, 2023 18:27
Show Gist options
  • Save cfr/b2ccc39168bddf2c72def8ce8bd3ac98 to your computer and use it in GitHub Desktop.
Save cfr/b2ccc39168bddf2c72def8ce8bd3ac98 to your computer and use it in GitHub Desktop.
pub const EntityType = enum { variable, array, object };
pub const Variable = struct { name: []const u8, value: []const u8 };
pub const Array = struct { name: []const u8, content: []const Variable };
pub const Object = struct { name: []const u8, fields: []const Variable };
pub const Entity = union(EntityType) { variable: Variable, array: Array, Object: Object };
// zig build-exe langconf2rp.zig
// ./langconf2rp data/magiccosmetics.yml
const std = @import("std");
const yaml = @import("yamlparser.zig");
const Allocator = std.mem.Allocator;
// config -> ast -> localized -> config
// parse transf gen
const LocalizedConf = struct {
pkgs: std.ArrayList([]const u8),
cfg: []const u8,
};
const LocalizedString = struct {
key: []const u8,
content: []const u8,
};
const ParsedLine = struct {
substrings: std.ArrayList(LocalizedString),
var_id: []const u8,
var_id_sanitized: []const u8,
cfg_content: []const u8,
};
const ParserState = enum { key, array, object };
const ParserContext = union(ParserState) {
key: void,
array: []const u8,
object: []const u8,
};
const Transformer = struct {
allocator: Allocator,
config_name: []const u8,
parent: ?[]const u8 = null,
context: ?ParserContext = null,
fn init(allocator: Allocator, config_name: []const u8) Transformer {
const name = Transformer.sanitizeStatic(allocator, config_name) catch config_name;
return Transformer{ .allocator = allocator, .config_name = name };
}
fn feedLine(self: *Transformer, line: []const u8) !ParsedLine {
if (!yaml.LineParser.validate(line)) {
return yaml.LineScanError.InvalidLine;
}
var parser = std.fmt.Parser{ .buf = line };
const var_id = parser.until(':');
const var_id_sanitized = self.sanitize(var_id) catch var_id;
_ = parser.until('\''); // skip to the content
_ = parser.char(); // skip quote
var config_line = std.ArrayList(u8).init(self.allocator);
var substrings = std.ArrayList(LocalizedString).init(self.allocator);
var key_id: u8 = 1;
var substring = std.ArrayList(u8).init(self.allocator);
while (parser.char()) |char| {
if (char == ' ' and substring.items.len == 0) { // trim head space,
// TODO: mem.trim
_ = try config_line.writer().write(" ");
continue;
//std.debug.print("skipped space. ", .{});
}
const char_parser = yaml.CharParser{};
const splitKey = blk: {
const is_splitter = char_parser.isSplitter(char) and substring.items.len > 0;
if (!is_splitter) {
break :blk null;
}
const key_name = try std.fmt.allocPrint(self.allocator, "p{d}", .{key_id});
const content = try self.allocator.dupe(u8, substring.items);
const loc = LocalizedString{ .key = key_name, .content = content };
try substrings.append(loc);
key_id = key_id + 1;
substring.clearRetainingCapacity();
const current_key = try std.fmt.allocPrint(self.allocator, "<lang:{s}.{s}.{s}>", .{
self.config_name,
var_id_sanitized,
key_name,
});
//std.debug.print("encountered {s} node. ", .{key_name});
break :blk current_key;
};
switch (char_parser.scan(char)) {
.color_code => {
if (splitKey) |key| {
_ = try config_line.writer().write(key);
}
_ = try config_line.writer().writeByte(char);
if (parser.char()) |color_id| {
_ = try config_line.writer().writeByte(color_id);
}
},
.placeholder, .tag => |delimiter| {
if (splitKey) |key| {
_ = try config_line.writer().write(key);
}
//std.debug.print("parsing delimiter {c}. ", .{delimiter});
const parsed_id = parser.until(delimiter);
_ = parser.char(); // skip close
_ = try config_line.writer().writeByte(char);
_ = try config_line.writer().write(parsed_id);
_ = try config_line.writer().writeByte(delimiter);
},
.quote => {
if (splitKey) |key| {
_ = try config_line.writer().write(key);
} // EOL
},
.content => {
_ = try substring.writer().writeByte(char);
},
}
}
return ParsedLine{
.substrings = substrings,
.var_id = var_id,
.var_id_sanitized = var_id_sanitized,
.cfg_content = config_line.items,
};
}
fn localize(self: *Transformer, parsed_line: ParsedLine) !LocalizedConf {
var pkgs = std.ArrayList([]const u8).init(self.allocator);
for (parsed_line.substrings.items) |loc| {
const pkg_line = try std.fmt.allocPrint(self.allocator, "\"{s}.{s}.{s}\": \"{s}\"\n", .{
self.config_name,
parsed_line.var_id_sanitized,
loc.key,
loc.content,
});
try pkgs.append(pkg_line);
}
const cfg = try std.fmt.allocPrint(self.allocator, "{s}: \'{s}\'\n", .{
parsed_line.var_id,
parsed_line.cfg_content,
});
return LocalizedConf{ .pkgs = pkgs, .cfg = cfg };
}
fn sanitizeStatic(allocator: Allocator, name: []const u8) ![]const u8 {
var output = try allocator.dupe(u8, name); // FIXME: leak
std.mem.replaceScalar(u8, output, '-', '_');
std.mem.replaceScalar(u8, output, ' ', '_');
return output;
}
fn sanitize(self: *Transformer, name: []const u8) ![]const u8 {
return sanitizeStatic(self.allocator, name);
}
};
pub fn main() !void {
const stdout = std.io.getStdOut().writer();
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
const args = try std.process.argsAlloc(allocator);
if (args.len > 1) {
const filename = args[1];
var config = try std.fs.cwd().openFile(filename, .{});
const config_name = splitFilename(filename);
defer config.close();
const localized_config_name = try std.fmt.allocPrint(
std.heap.page_allocator,
"{s}_localized.yml",
.{config_name},
);
const localized_config = try std.fs.cwd().createFile(
localized_config_name,
.{},
);
defer localized_config.close();
const pkg_strings_name = try std.fmt.allocPrint(
std.heap.page_allocator,
"{s}.json",
.{config_name},
);
const pkg_strings = try std.fs.cwd().createFile(
pkg_strings_name,
.{},
);
defer pkg_strings.close();
const skip_name = try std.fmt.allocPrint(
std.heap.page_allocator,
"{s}_skip.yml",
.{config_name},
);
const skip = try std.fs.cwd().createFile(
skip_name,
.{},
);
defer skip.close();
var trans = Transformer.init(allocator, config_name);
var buf_reader = std.io.bufferedReader(config.reader());
var in_stream = buf_reader.reader();
var buf: [1024]u8 = undefined;
while (try in_stream.readUntilDelimiterOrEof(&buf, '\n')) |line| {
if (trans.feedLine(line)) |parsed_line| {
const localized_line = try trans.localize(parsed_line);
_ = try localized_config.writeAll(localized_line.cfg);
//std.debug.print("\n{s}\n", .{localized_line.cfg});
for (localized_line.pkgs.items) |loc| {
_ = try pkg_strings.writeAll(loc);
//std.debug.print("{s}\n", .{loc});
}
} else |_| { // feedLine throws
_ = try skip.writeAll(line);
_ = try skip.writeAll("\n");
try stdout.print("SKIP: {s}\n", .{line});
}
}
std.debug.print("Written files: {s}, {s}, {s}\n", .{
localized_config_name,
pkg_strings_name,
skip_name,
});
} else {
try stdout.print("USAGE: {s} <filename>\n", .{args[0]});
}
}
fn splitFilename(path: []const u8) []const u8 {
var slash_splits = std.mem.splitBackwards(u8, path, "/");
const filename_ext = slash_splits.first();
var dot_splits = std.mem.splitAny(u8, filename_ext, ".");
const filename = dot_splits.first();
return filename;
}
test "sanitize" {
const raw = "-a -b";
const sanitized = "_a__b";
var trans = Transformer.init(std.testing.allocator, "test");
const sanitized_t = try trans.sanitize(raw);
try std.testing.expect(std.mem.eql(u8, sanitized_t, sanitized));
}
fn feedLine(self: *Transformer, line: []const u8) !ParsedLine {
if (!yaml.LineParser.validate(line)) {
return yaml.LineScanError.InvalidLine;
}
var parser = std.fmt.Parser{ .buf = line };
const var_id = parser.until(':');
const var_id_sanitized = self.sanitize(var_id) catch var_id;
_ = parser.until('\''); // skip to the content
_ = parser.char(); // skip quote
var config_line = std.ArrayList(u8).init(self.allocator);
var substrings = std.ArrayList(LocalizedString).init(self.allocator);
var key_id: u8 = 1;
var substring = std.ArrayList(u8).init(self.allocator);
while (parser.char()) |char| {
if (char == ' ' and substring.items.len == 0) { // trim head space,
// TODO: mem.trim
_ = try config_line.writer().write(" ");
continue;
//std.debug.print("skipped space. ", .{});
}
const char_parser = yaml.CharParser{};
const splitKey = blk: {
const is_splitter = char_parser.isSplitter(char) and substring.items.len > 0;
if (!is_splitter) {
break :blk null;
}
const key_name = try std.fmt.allocPrint(self.allocator, "p{d}", .{key_id});
const content = try self.allocator.dupe(u8, substring.items);
const loc = LocalizedString{ .key = key_name, .content = content };
try substrings.append(loc);
key_id = key_id + 1;
substring.clearRetainingCapacity();
const current_key = try std.fmt.allocPrint(self.allocator, "<lang:{s}.{s}.{s}>", .{
self.config_name,
var_id_sanitized,
key_name,
});
//std.debug.print("encountered {s} node. ", .{key_name});
break :blk current_key;
};
switch (char_parser.scan(char)) {
.color_code => {
if (splitKey) |key| {
_ = try config_line.writer().write(key);
}
_ = try config_line.writer().writeByte(char);
if (parser.char()) |color_id| {
_ = try config_line.writer().writeByte(color_id);
}
},
.placeholder, .tag => |delimiter| {
if (splitKey) |key| {
_ = try config_line.writer().write(key);
}
//std.debug.print("parsing delimiter {c}. ", .{delimiter});
const parsed_id = parser.until(delimiter);
_ = parser.char(); // skip close
_ = try config_line.writer().writeByte(char);
_ = try config_line.writer().write(parsed_id);
_ = try config_line.writer().writeByte(delimiter);
},
.quote => {
if (splitKey) |key| {
_ = try config_line.writer().write(key);
} // EOL
},
.content => {
_ = try substring.writer().writeByte(char);
},
}
}
return ParsedLine{
.substrings = substrings,
.var_id = var_id,
.var_id_sanitized = var_id_sanitized,
.cfg_content = config_line.items,
};
}
fn localize(self: *Transformer, parsed_line: ParsedLine) !LocalizedConf {
var pkgs = std.ArrayList([]const u8).init(self.allocator);
for (parsed_line.substrings.items) |loc| {
const pkg_line = try std.fmt.allocPrint(self.allocator, "\"{s}.{s}.{s}\": \"{s}\"\n", .{
self.config_name,
parsed_line.var_id_sanitized,
loc.key,
loc.content,
});
try pkgs.append(pkg_line);
}
const cfg = try std.fmt.allocPrint(self.allocator, "{s}: \'{s}\'\n", .{
parsed_line.var_id,
parsed_line.cfg_content,
});
return LocalizedConf{ .pkgs = pkgs, .cfg = cfg };
}
fn sanitizeStatic(allocator: Allocator, name: []const u8) ![]const u8 {
var output = try allocator.dupe(u8, name); // FIXME: leak
std.mem.replaceScalar(u8, output, '-', '_');
std.mem.replaceScalar(u8, output, ' ', '_');
return output;
}
fn sanitize(self: *Transformer, name: []const u8) ![]const u8 {
return sanitizeStatic(self.allocator, name);
}
const std = @import("std");
const Allocator = std.mem.Allocator;
const conf = @import("config.zig");
pub const CharType = enum { color_code, quote, tag, placeholder, content };
pub const Char = union(CharType) {
color_code,
quote,
tag: u8,
placeholder: u8,
content,
};
pub const CharParser = struct {
delimiters: []const u8 = "%%<>\'§&",
pub fn isSplitter(self: CharParser, char: u8) bool {
return std.mem.indexOfScalar(u8, self.delimiters, char) != null;
}
pub fn scan(self: CharParser, char: u8) Char {
if (self.isSplitter(char)) {
switch (char) {
'<' => return Char{ .tag = '>' },
'%' => return Char{ .placeholder = '%' },
'§' => return Char.color_code,
'&' => return Char.color_code,
'\'' => return Char.quote,
else => unreachable,
}
} else {
return Char.content;
}
}
};
pub const LineParser = struct {
fn isArrayDecl(line: []const u8) bool {
const noQuotes = std.mem.count(u8, line, "\'") == 0;
const hasColon = std.mem.count(u8, line, ":") > 0;
const hasId = line.len > 1 and line[0] != ':';
return hasId and hasColon and noQuotes;
}
fn isArrayElemDecl(line: []const u8) bool {
const hasQuotes = std.mem.count(u8, line, "\'") == 2;
const noColon = std.mem.count(u8, line, ":") == 0;
const hasDash = std.mem.count(u8, line, "-") == 1;
return hasDash and noColon and hasQuotes;
}
fn isMultilineString(line: []const u8) bool {
return line.len < 0;
}
fn isPlainString(line: []const u8) bool {
return line.len < 0;
}
pub fn validate(line: []const u8) bool {
const hasColon = std.mem.count(u8, line, ":") > 0;
const nonEmpty = line.len > 0;
const hasId = nonEmpty and line[0] != ':';
return nonEmpty and hasId and hasColon;
}
fn isVariable(line: []const u8) bool {
const hasColon = std.mem.count(u8, line, ":") > 0;
//const hasQuotes = std.mem.count(u8, line, "\'") == 2;
const nonEmpty = line.len > 0;
const hasId = nonEmpty and line[0] != ':';
return nonEmpty and hasId and hasColon;
}
fn parseVariable(line: []const u8) conf.Variable {
var parser = std.fmt.Parser{ .buf = line };
const var_id = parser.until(':');
_ = parser.until('\''); // skip to the content
_ = parser.char(); // skip quote
// TODO: parse variable without quotes
var content = parser.until('\''); // TODO: parse escaped quotes
return conf.Variable{ .name = var_id, .content = content };
}
};
pub const LineScanError = error{ PossibleArrayDecl, InvalidLine };
test "validate" {
try std.testing.expect(CharParser.validate("bag: \'&bBag\'") == true);
try std.testing.expect(CharParser.validate("bag: &bBag\'") == false);
try std.testing.expect(CharParser.validate("bag\' &bBag\'") == false);
try std.testing.expect(CharParser.validate(":\'&bBag\'") == false);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment