Skip to content

Instantly share code, notes, and snippets.

@Techcable
Created April 4, 2025 19:21
Show Gist options
  • Save Techcable/476e844206ba0882bdf56f524378ad53 to your computer and use it in GitHub Desktop.
Save Techcable/476e844206ba0882bdf56f524378ad53 to your computer and use it in GitHub Desktop.
Parses a markdown table into a CSV file
#!/usr/bin/env -S uv run --script
# Extracts data from the first markdown table found in the input file,
# converting it into CSV
# /// script
# dependencies = [
# "marko~=2.1"
# ]
# ///
import csv
import sys
from marko.inline import CodeSpan, RawText
from marko.ext.gfm import gfm
from marko.ext.gfm.elements import TableCell, TableRow, Table
def extract_text(e) -> str:
if isinstance(e, TableCell):
assert len(e.children) == 1, e
return extract_text(e.children[0])
elif isinstance(e, (CodeSpan, RawText)):
res = e.children
assert isinstance(res, str), (type(res), e)
return res
else:
raise TypeError(f"Unexpected element: {e!r}")
def extract_row_text(row: TableRow) -> list[str]:
assert isinstance(row, TableRow)
return [extract_text(child) for child in row.children]
def convert_table(t: Table):
assert t.children[0] == t.head
for child in t.children:
yield extract_row_text(child)
def main(args):
assert len(args) == 2, "Bad argcount"
input_path = args[0]
output_path = args[1]
with open(input_path, 'rt') as f:
text = f.read()
tree = gfm.parse(text)
table = None
for element in tree.children:
if isinstance(element, Table):
table = element
break
assert table is not None, "Could not find a table!"
assert table.head == table.children[0]
# NOTE: Intentionally fails if output already exists
with open(output_path, 'xt') as raw_output_file:
writer = csv.writer(raw_output_file)
for row in convert_table(table):
writer.writerow(row)
if __name__ == "__main__":
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment