Skip to content

Instantly share code, notes, and snippets.

@wader
Last active August 29, 2025 13:17
Show Gist options
  • Save wader/6ca01563a9de2767c7f602167e0e6db4 to your computer and use it in GitHub Desktop.
Save wader/6ca01563a9de2767c7f602167e0e6db4 to your computer and use it in GitHub Desktop.
# jq -L . -Rs 'include "conllu"; from_conllu' < fox.conllu
# jq -L . -rRs 'include "conllu"; from_conllu.tokens | to_tree | print_tree' < fox.conllu
def to_tree:
( . as $tokens
| def _rec($head):
( $tokens[]
| select(.head == $head)
| { token: $tokens[.id-1]
, children: [_rec(.id)]
}
);
_rec(0)
);
def print_tree:
def _rec($depth):
( "\($depth * " ")\(.token.id): \(.token.form)"
, ( .children[]
| _rec($depth+1)
)
);
_rec(0);
def from_conllu:
foreach (split("\n")[], "") as $l (
{ line: 0
, metadata: {}
, tokens: []
, emit: null
};
( .
# | debug({$l})
# | debug
| .line += 1
| if .emit then
( .metadata = {}
| .tokens = []
| .emit = null
)
else .
end
| if $l | test("^\\s*#") then
# "# key = this is a value"
( ( $l
| capture("^\\s*#\\s*(?<key>[^ ]+)\\s*=\\s*(?<value>.*)$")
) as {$key, $value}
| .metadata[$key] = $value
)
elif $l | test("^\\d+") then
.tokens +=
[ $l
| split("\t") as [$id, $form, $lemma, $upos, $xpos, $feats, $head, $deprel, $deps, $misc]
| { id: ($id | tonumber)
, $form
, $lemma
, $upos
, $xpos
, feats:
( $feats
| if . == "_" then null
else
# "key=value|..." => {key: value, ...}
( split("|")
| map(
( split("=")
| {key: .[0], value: .[1]}
)
)
| from_entries
)
end
)
, head:
( $head
| if . == "_" then null
else tonumber
end
)
, $deprel
, deps:
( $deps
| if . == "_" then null
else
# "id:rel|..." => [[id, rel], ...]
( split("|")
| map(
( split(":")
| .[0] |=
# id1 or id1.id2
( split(".")
| map(tonumber)
| if length == 1 then .[0] end
)
)
)
)
end
)
, $misc
}
]
elif $l | test("^\\s*$") then
if .metadata != {} then
.emit =
{ metadata
, tokens
}
else .
end
else
if .expr == null then
( .line = .line
| .expr = $l
)
elif .fail and .error == null then .error = $l
elif .input == null then .input = $l
else .output += [$l]
end
end
);
if .emit then .emit
else empty
end
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment