-
-
Save olebedev/63319bc7831a596ec4b39b93c4bd6a5d to your computer and use it in GitHub Desktop.
Parse a v1 yarn.lock into a nix expression. Try it with `nix eval -f parse-yarn-lock.nix`
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Parse a yarn.lock file using pure Nix | |
# yarn.lock v1 files are basically YAML with support for having multiple keys for a single value in a map and without array support. | |
# Inspired by https://github.com/yarnpkg/yarn/blob/158d96dce95313d9a00218302631cd263877d164/src/lockfile/parse.js | |
with builtins; | |
let | |
# Add index to a list of elements | |
enumerate = list: genList (i: ({ inherit i; e = elemAt list i; })) (length list); | |
mkToken = type: value: { inherit type value; }; | |
parseLockfile = str: let | |
# A Regex that tokenizes a yarn lockfile | |
# I've split up the regex in the various token types | |
newlineRe = "(\r?\n)"; | |
commentRe = "#([^\n]+)"; | |
# Used for any kind of whitespace and also indentation in an object | |
indentRe = "( +)"; | |
# Note that this contains a group for repetition, so the next group is offset. | |
# This is a regex that matches JSON strings, which is the format used. | |
stringRe = "(\"([^\"\\\\]|\\\\[\\\"\\\\/bfnrt]|\\\\u[0-9a-f]{4})+\")"; | |
numberRe = "([0-9]+)"; | |
booleanRe = "(true|false)"; | |
colonRe = "(:)"; | |
commaRe = "(,)"; | |
# A symbol is a string without quotes | |
symbolRe = "([a-zA-Z\\/.-][^: \n\r,]+)"; | |
tokenizeRe = "${newlineRe}|${commentRe}|${indentRe}|${stringRe}|${numberRe}|${booleanRe}|${colonRe}|${commaRe}|${symbolRe}"; | |
tokenize = split tokenizeRe; | |
convert = token: if isString token then abort "Invalid token ${token}" | |
else if (elemAt token 0) != null then | |
mkToken "newline" null | |
else if (elemAt token 1) != null then | |
mkToken "comment" (elemAt token 1) | |
else if (elemAt token 2) != null then | |
mkToken "indent" (stringLength (elemAt token 2)) | |
else if (elemAt token 3) != null then | |
mkToken "string" (fromJSON (elemAt token 3)) | |
else if (elemAt token 5) != null then | |
mkToken "number" (fromJSON (elemAt token 5)) | |
else if (elemAt token 6) != null then | |
mkToken "boolean" (elemAt token 6) == "true" | |
else if (elemAt token 7) != null then | |
mkToken "colon" null | |
else if (elemAt token 8) != null then | |
mkToken "comma" null | |
else if (elemAt token 9) != null then | |
mkToken "string" (elemAt token 9) | |
else abort "unreachable"; | |
unprocessedTokens = map convert (filter (e: e != "") (tokenize str)); | |
# Filter out comments, and spaces that don't follow a newline | |
tokens = map ({ i, e }: e) (filter ({ i, e }: | |
if e.type == "comment" then | |
# Check if this is the right version lockfile | |
if (match "[[:space:]]*yarn lockfile v[0-9]+[[:space:]]*" e.value) != null && (match "[[:space:]]*yarn lockfile v1[[:space:]]*" e.value) == null | |
then abort "Unsupported lockfile: ${e.value}" | |
else false | |
else | |
!(e.type == "indent" && (elemAt unprocessedTokens (i - 1)).type != "newline")) (enumerate unprocessedTokens)); | |
get = index: if index < length tokens then elemAt tokens index else { type = "eof"; }; | |
# Take one or more keys interspersed with commas | |
takeKeys = index: [(get index).value] ++ (if (get (index + 1)).type == "comma" && (get (index + 2)).type == "string" then takeKeys (index + 2) else []); | |
# Consume tokens for a single object | |
# Returns 'value' for the object and 'index' for how far we iterated | |
parse = start: indent: | |
let | |
# genericClosure is used here to iterate over the tokens in a non-recursive way, | |
# which would be too slow for the Nix language. | |
# We can't use fold because we need to recurse into nested maps and skip over | |
# the tokens that were consumed. | |
result = genericClosure { | |
startSet = [ { key = start; values = []; } ]; | |
operator = { key, ... }: | |
let | |
token = get key; | |
nextToken = get (key + 1); | |
done = []; | |
next = [{ key = key + 1; values = []; }]; | |
in | |
if token.type == "eof" then done | |
else if token.type == "newline" then | |
if indent == 0 then | |
next | |
else if nextToken.type != "indent" || nextToken.value != indent then | |
done | |
else [{ key = key + 1; values = []; }] | |
else if token.type == "indent" then | |
if token.value == indent then next else done | |
# String means this is a key value pair | |
else if token.type == "string" then | |
let | |
keys = takeKeys key; | |
skip = 1 + ((length keys) - 1) * 2; | |
nextToken = get (key + skip); | |
in | |
# If the key is followed by a colon then this is a nested object | |
if nextToken.type == "colon" then | |
let | |
# Parse the nested object | |
res = parse (key + skip + 1) (indent + 2); | |
inherit (res) value index; | |
in | |
[{ | |
key = index; | |
values = map (name: { inherit name value; }) keys; | |
}] | |
# The only valid values | |
else if (nextToken.type == "string" || nextToken.type == "number" || nextToken.type == "boolean") then | |
[{ | |
key = (key + skip + 1); | |
values = map (name: { inherit name; value = nextToken.value; }) keys; | |
}] | |
else abort "Invalid token ${nextToken.type}" | |
else abort "Invalid token ${token.type}" | |
; | |
}; | |
results = concatLists (map (el: el.values) result); | |
in | |
{ | |
value = listToAttrs results; | |
index = (elemAt result ((length result) - 1)).key; | |
}; | |
in | |
(parse 0 0).value; | |
in | |
parseLockfile (readFile ./yarn.lock) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment