Last active
December 3, 2023 18:27
-
-
Save cfr/b2ccc39168bddf2c72def8ce8bd3ac98 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pub const EntityType = enum { variable, array, object }; | |
pub const Variable = struct { name: []const u8, value: []const u8 }; | |
pub const Array = struct { name: []const u8, content: []const Variable }; | |
pub const Object = struct { name: []const u8, fields: []const Variable }; | |
pub const Entity = union(EntityType) { variable: Variable, array: Array, Object: Object }; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// zig build-exe langconf2rp.zig | |
// ./langconf2rp data/magiccosmetics.yml | |
const std = @import("std"); | |
const yaml = @import("yamlparser.zig"); | |
const Allocator = std.mem.Allocator; | |
// config -> ast -> localized -> config | |
// parse transf gen | |
const LocalizedConf = struct { | |
pkgs: std.ArrayList([]const u8), | |
cfg: []const u8, | |
}; | |
const LocalizedString = struct { | |
key: []const u8, | |
content: []const u8, | |
}; | |
const ParsedLine = struct { | |
substrings: std.ArrayList(LocalizedString), | |
var_id: []const u8, | |
var_id_sanitized: []const u8, | |
cfg_content: []const u8, | |
}; | |
const ParserState = enum { key, array, object }; | |
const ParserContext = union(ParserState) { | |
key: void, | |
array: []const u8, | |
object: []const u8, | |
}; | |
const Transformer = struct { | |
allocator: Allocator, | |
config_name: []const u8, | |
parent: ?[]const u8 = null, | |
context: ?ParserContext = null, | |
fn init(allocator: Allocator, config_name: []const u8) Transformer { | |
const name = Transformer.sanitizeStatic(allocator, config_name) catch config_name; | |
return Transformer{ .allocator = allocator, .config_name = name }; | |
} | |
fn feedLine(self: *Transformer, line: []const u8) !ParsedLine { | |
if (!yaml.LineParser.validate(line)) { | |
return yaml.LineScanError.InvalidLine; | |
} | |
var parser = std.fmt.Parser{ .buf = line }; | |
const var_id = parser.until(':'); | |
const var_id_sanitized = self.sanitize(var_id) catch var_id; | |
_ = parser.until('\''); // skip to the content | |
_ = parser.char(); // skip quote | |
var config_line = std.ArrayList(u8).init(self.allocator); | |
var substrings = std.ArrayList(LocalizedString).init(self.allocator); | |
var key_id: u8 = 1; | |
var substring = std.ArrayList(u8).init(self.allocator); | |
while (parser.char()) |char| { | |
if (char == ' ' and substring.items.len == 0) { // trim head space, | |
// TODO: mem.trim | |
_ = try config_line.writer().write(" "); | |
continue; | |
//std.debug.print("skipped space. ", .{}); | |
} | |
const char_parser = yaml.CharParser{}; | |
const splitKey = blk: { | |
const is_splitter = char_parser.isSplitter(char) and substring.items.len > 0; | |
if (!is_splitter) { | |
break :blk null; | |
} | |
const key_name = try std.fmt.allocPrint(self.allocator, "p{d}", .{key_id}); | |
const content = try self.allocator.dupe(u8, substring.items); | |
const loc = LocalizedString{ .key = key_name, .content = content }; | |
try substrings.append(loc); | |
key_id = key_id + 1; | |
substring.clearRetainingCapacity(); | |
const current_key = try std.fmt.allocPrint(self.allocator, "<lang:{s}.{s}.{s}>", .{ | |
self.config_name, | |
var_id_sanitized, | |
key_name, | |
}); | |
//std.debug.print("encountered {s} node. ", .{key_name}); | |
break :blk current_key; | |
}; | |
switch (char_parser.scan(char)) { | |
.color_code => { | |
if (splitKey) |key| { | |
_ = try config_line.writer().write(key); | |
} | |
_ = try config_line.writer().writeByte(char); | |
if (parser.char()) |color_id| { | |
_ = try config_line.writer().writeByte(color_id); | |
} | |
}, | |
.placeholder, .tag => |delimiter| { | |
if (splitKey) |key| { | |
_ = try config_line.writer().write(key); | |
} | |
//std.debug.print("parsing delimiter {c}. ", .{delimiter}); | |
const parsed_id = parser.until(delimiter); | |
_ = parser.char(); // skip close | |
_ = try config_line.writer().writeByte(char); | |
_ = try config_line.writer().write(parsed_id); | |
_ = try config_line.writer().writeByte(delimiter); | |
}, | |
.quote => { | |
if (splitKey) |key| { | |
_ = try config_line.writer().write(key); | |
} // EOL | |
}, | |
.content => { | |
_ = try substring.writer().writeByte(char); | |
}, | |
} | |
} | |
return ParsedLine{ | |
.substrings = substrings, | |
.var_id = var_id, | |
.var_id_sanitized = var_id_sanitized, | |
.cfg_content = config_line.items, | |
}; | |
} | |
fn localize(self: *Transformer, parsed_line: ParsedLine) !LocalizedConf { | |
var pkgs = std.ArrayList([]const u8).init(self.allocator); | |
for (parsed_line.substrings.items) |loc| { | |
const pkg_line = try std.fmt.allocPrint(self.allocator, "\"{s}.{s}.{s}\": \"{s}\"\n", .{ | |
self.config_name, | |
parsed_line.var_id_sanitized, | |
loc.key, | |
loc.content, | |
}); | |
try pkgs.append(pkg_line); | |
} | |
const cfg = try std.fmt.allocPrint(self.allocator, "{s}: \'{s}\'\n", .{ | |
parsed_line.var_id, | |
parsed_line.cfg_content, | |
}); | |
return LocalizedConf{ .pkgs = pkgs, .cfg = cfg }; | |
} | |
fn sanitizeStatic(allocator: Allocator, name: []const u8) ![]const u8 { | |
var output = try allocator.dupe(u8, name); // FIXME: leak | |
std.mem.replaceScalar(u8, output, '-', '_'); | |
std.mem.replaceScalar(u8, output, ' ', '_'); | |
return output; | |
} | |
fn sanitize(self: *Transformer, name: []const u8) ![]const u8 { | |
return sanitizeStatic(self.allocator, name); | |
} | |
}; | |
pub fn main() !void { | |
const stdout = std.io.getStdOut().writer(); | |
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); | |
defer arena.deinit(); | |
const allocator = arena.allocator(); | |
const args = try std.process.argsAlloc(allocator); | |
if (args.len > 1) { | |
const filename = args[1]; | |
var config = try std.fs.cwd().openFile(filename, .{}); | |
const config_name = splitFilename(filename); | |
defer config.close(); | |
const localized_config_name = try std.fmt.allocPrint( | |
std.heap.page_allocator, | |
"{s}_localized.yml", | |
.{config_name}, | |
); | |
const localized_config = try std.fs.cwd().createFile( | |
localized_config_name, | |
.{}, | |
); | |
defer localized_config.close(); | |
const pkg_strings_name = try std.fmt.allocPrint( | |
std.heap.page_allocator, | |
"{s}.json", | |
.{config_name}, | |
); | |
const pkg_strings = try std.fs.cwd().createFile( | |
pkg_strings_name, | |
.{}, | |
); | |
defer pkg_strings.close(); | |
const skip_name = try std.fmt.allocPrint( | |
std.heap.page_allocator, | |
"{s}_skip.yml", | |
.{config_name}, | |
); | |
const skip = try std.fs.cwd().createFile( | |
skip_name, | |
.{}, | |
); | |
defer skip.close(); | |
var trans = Transformer.init(allocator, config_name); | |
var buf_reader = std.io.bufferedReader(config.reader()); | |
var in_stream = buf_reader.reader(); | |
var buf: [1024]u8 = undefined; | |
while (try in_stream.readUntilDelimiterOrEof(&buf, '\n')) |line| { | |
if (trans.feedLine(line)) |parsed_line| { | |
const localized_line = try trans.localize(parsed_line); | |
_ = try localized_config.writeAll(localized_line.cfg); | |
//std.debug.print("\n{s}\n", .{localized_line.cfg}); | |
for (localized_line.pkgs.items) |loc| { | |
_ = try pkg_strings.writeAll(loc); | |
//std.debug.print("{s}\n", .{loc}); | |
} | |
} else |_| { // feedLine throws | |
_ = try skip.writeAll(line); | |
_ = try skip.writeAll("\n"); | |
try stdout.print("SKIP: {s}\n", .{line}); | |
} | |
} | |
std.debug.print("Written files: {s}, {s}, {s}\n", .{ | |
localized_config_name, | |
pkg_strings_name, | |
skip_name, | |
}); | |
} else { | |
try stdout.print("USAGE: {s} <filename>\n", .{args[0]}); | |
} | |
} | |
fn splitFilename(path: []const u8) []const u8 { | |
var slash_splits = std.mem.splitBackwards(u8, path, "/"); | |
const filename_ext = slash_splits.first(); | |
var dot_splits = std.mem.splitAny(u8, filename_ext, "."); | |
const filename = dot_splits.first(); | |
return filename; | |
} | |
test "sanitize" { | |
const raw = "-a -b"; | |
const sanitized = "_a__b"; | |
var trans = Transformer.init(std.testing.allocator, "test"); | |
const sanitized_t = try trans.sanitize(raw); | |
try std.testing.expect(std.mem.eql(u8, sanitized_t, sanitized)); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fn feedLine(self: *Transformer, line: []const u8) !ParsedLine { | |
if (!yaml.LineParser.validate(line)) { | |
return yaml.LineScanError.InvalidLine; | |
} | |
var parser = std.fmt.Parser{ .buf = line }; | |
const var_id = parser.until(':'); | |
const var_id_sanitized = self.sanitize(var_id) catch var_id; | |
_ = parser.until('\''); // skip to the content | |
_ = parser.char(); // skip quote | |
var config_line = std.ArrayList(u8).init(self.allocator); | |
var substrings = std.ArrayList(LocalizedString).init(self.allocator); | |
var key_id: u8 = 1; | |
var substring = std.ArrayList(u8).init(self.allocator); | |
while (parser.char()) |char| { | |
if (char == ' ' and substring.items.len == 0) { // trim head space, | |
// TODO: mem.trim | |
_ = try config_line.writer().write(" "); | |
continue; | |
//std.debug.print("skipped space. ", .{}); | |
} | |
const char_parser = yaml.CharParser{}; | |
const splitKey = blk: { | |
const is_splitter = char_parser.isSplitter(char) and substring.items.len > 0; | |
if (!is_splitter) { | |
break :blk null; | |
} | |
const key_name = try std.fmt.allocPrint(self.allocator, "p{d}", .{key_id}); | |
const content = try self.allocator.dupe(u8, substring.items); | |
const loc = LocalizedString{ .key = key_name, .content = content }; | |
try substrings.append(loc); | |
key_id = key_id + 1; | |
substring.clearRetainingCapacity(); | |
const current_key = try std.fmt.allocPrint(self.allocator, "<lang:{s}.{s}.{s}>", .{ | |
self.config_name, | |
var_id_sanitized, | |
key_name, | |
}); | |
//std.debug.print("encountered {s} node. ", .{key_name}); | |
break :blk current_key; | |
}; | |
switch (char_parser.scan(char)) { | |
.color_code => { | |
if (splitKey) |key| { | |
_ = try config_line.writer().write(key); | |
} | |
_ = try config_line.writer().writeByte(char); | |
if (parser.char()) |color_id| { | |
_ = try config_line.writer().writeByte(color_id); | |
} | |
}, | |
.placeholder, .tag => |delimiter| { | |
if (splitKey) |key| { | |
_ = try config_line.writer().write(key); | |
} | |
//std.debug.print("parsing delimiter {c}. ", .{delimiter}); | |
const parsed_id = parser.until(delimiter); | |
_ = parser.char(); // skip close | |
_ = try config_line.writer().writeByte(char); | |
_ = try config_line.writer().write(parsed_id); | |
_ = try config_line.writer().writeByte(delimiter); | |
}, | |
.quote => { | |
if (splitKey) |key| { | |
_ = try config_line.writer().write(key); | |
} // EOL | |
}, | |
.content => { | |
_ = try substring.writer().writeByte(char); | |
}, | |
} | |
} | |
return ParsedLine{ | |
.substrings = substrings, | |
.var_id = var_id, | |
.var_id_sanitized = var_id_sanitized, | |
.cfg_content = config_line.items, | |
}; | |
} | |
fn localize(self: *Transformer, parsed_line: ParsedLine) !LocalizedConf { | |
var pkgs = std.ArrayList([]const u8).init(self.allocator); | |
for (parsed_line.substrings.items) |loc| { | |
const pkg_line = try std.fmt.allocPrint(self.allocator, "\"{s}.{s}.{s}\": \"{s}\"\n", .{ | |
self.config_name, | |
parsed_line.var_id_sanitized, | |
loc.key, | |
loc.content, | |
}); | |
try pkgs.append(pkg_line); | |
} | |
const cfg = try std.fmt.allocPrint(self.allocator, "{s}: \'{s}\'\n", .{ | |
parsed_line.var_id, | |
parsed_line.cfg_content, | |
}); | |
return LocalizedConf{ .pkgs = pkgs, .cfg = cfg }; | |
} | |
fn sanitizeStatic(allocator: Allocator, name: []const u8) ![]const u8 { | |
var output = try allocator.dupe(u8, name); // FIXME: leak | |
std.mem.replaceScalar(u8, output, '-', '_'); | |
std.mem.replaceScalar(u8, output, ' ', '_'); | |
return output; | |
} | |
fn sanitize(self: *Transformer, name: []const u8) ![]const u8 { | |
return sanitizeStatic(self.allocator, name); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const std = @import("std"); | |
const Allocator = std.mem.Allocator; | |
const conf = @import("config.zig"); | |
pub const CharType = enum { color_code, quote, tag, placeholder, content }; | |
pub const Char = union(CharType) { | |
color_code, | |
quote, | |
tag: u8, | |
placeholder: u8, | |
content, | |
}; | |
pub const CharParser = struct { | |
delimiters: []const u8 = "%%<>\'§&", | |
pub fn isSplitter(self: CharParser, char: u8) bool { | |
return std.mem.indexOfScalar(u8, self.delimiters, char) != null; | |
} | |
pub fn scan(self: CharParser, char: u8) Char { | |
if (self.isSplitter(char)) { | |
switch (char) { | |
'<' => return Char{ .tag = '>' }, | |
'%' => return Char{ .placeholder = '%' }, | |
'§' => return Char.color_code, | |
'&' => return Char.color_code, | |
'\'' => return Char.quote, | |
else => unreachable, | |
} | |
} else { | |
return Char.content; | |
} | |
} | |
}; | |
pub const LineParser = struct { | |
fn isArrayDecl(line: []const u8) bool { | |
const noQuotes = std.mem.count(u8, line, "\'") == 0; | |
const hasColon = std.mem.count(u8, line, ":") > 0; | |
const hasId = line.len > 1 and line[0] != ':'; | |
return hasId and hasColon and noQuotes; | |
} | |
fn isArrayElemDecl(line: []const u8) bool { | |
const hasQuotes = std.mem.count(u8, line, "\'") == 2; | |
const noColon = std.mem.count(u8, line, ":") == 0; | |
const hasDash = std.mem.count(u8, line, "-") == 1; | |
return hasDash and noColon and hasQuotes; | |
} | |
fn isMultilineString(line: []const u8) bool { | |
return line.len < 0; | |
} | |
fn isPlainString(line: []const u8) bool { | |
return line.len < 0; | |
} | |
pub fn validate(line: []const u8) bool { | |
const hasColon = std.mem.count(u8, line, ":") > 0; | |
const nonEmpty = line.len > 0; | |
const hasId = nonEmpty and line[0] != ':'; | |
return nonEmpty and hasId and hasColon; | |
} | |
fn isVariable(line: []const u8) bool { | |
const hasColon = std.mem.count(u8, line, ":") > 0; | |
//const hasQuotes = std.mem.count(u8, line, "\'") == 2; | |
const nonEmpty = line.len > 0; | |
const hasId = nonEmpty and line[0] != ':'; | |
return nonEmpty and hasId and hasColon; | |
} | |
fn parseVariable(line: []const u8) conf.Variable { | |
var parser = std.fmt.Parser{ .buf = line }; | |
const var_id = parser.until(':'); | |
_ = parser.until('\''); // skip to the content | |
_ = parser.char(); // skip quote | |
// TODO: parse variable without quotes | |
var content = parser.until('\''); // TODO: parse escaped quotes | |
return conf.Variable{ .name = var_id, .content = content }; | |
} | |
}; | |
pub const LineScanError = error{ PossibleArrayDecl, InvalidLine }; | |
test "validate" { | |
try std.testing.expect(CharParser.validate("bag: \'&bBag\'") == true); | |
try std.testing.expect(CharParser.validate("bag: &bBag\'") == false); | |
try std.testing.expect(CharParser.validate("bag\' &bBag\'") == false); | |
try std.testing.expect(CharParser.validate(":\'&bBag\'") == false); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment