Skip to content

Instantly share code, notes, and snippets.

@petzku
Last active October 13, 2024 06:17
Show Gist options
  • Save petzku/9ad9ccf2400bd748e9b75a401f857741 to your computer and use it in GitHub Desktop.
Save petzku/9ad9ccf2400bd748e9b75a401f857741 to your computer and use it in GitHub Desktop.
script to convert mpvQC .txt output -> better structured markdown
#!/usr/bin/env python
import re
import sys
from pathlib import Path
from datetime import timedelta
import argparse
import contextlib
from dataclasses import dataclass
# mpvQC output line format. sample:
# [00:02:18] [Phrasing] unsure of "comprises"
LINE_PATTERN = r"\[(.+?)\] \[(.+?)\] (.+)"
# When using --chrono, keep these categories separate
STANDALONE_CATEGORIES = ("Typeset", "Timing", "Encode")
# When using --refs, do not add a reference line for these categories
NON_DIALOGUE_CATEGORIES = ("Typeset", "Encode")
@dataclass
class QCNote:
time: str
category: str
text: str
def parse_args():
"""Set up argument parser"""
parser = argparse.ArgumentParser(
prog="qc2md", description="Convert mpvQC reports to markdown"
)
parser.add_argument("filename", help="mpvQC txt output file")
parser.add_argument(
"-r",
"--refs",
action="store_true",
help="Add quotation blocks for line references. Requires specifying --dialogue to include actual lines.",
)
parser.add_argument(
"-c",
"--chrono",
action="store_true",
help="Group all script notes together chronologically",
)
parser.add_argument(
"-d",
"--dialogue",
help="Dialogue file to source references from, where appropriate",
)
parser.add_argument(
"-o",
"--output",
help="Path to output markdown to. Defaults to input file name with .md extension. Use '-' for stdout. If supplied, suppresses printing of output file path.",
)
return parser.parse_args()
def read_file(filename: str):
"""Read mpvQC output .txt file
Returns list of lines containing notes, and name of video file used for QC"""
lines: list[str] = []
with open(filename, mode="r", encoding="utf-8") as file:
lines = file.readlines()
qc_file = next(
(line.split("/")[-1].strip() for line in lines if line.startswith("path")),
None,
)
lines = lines[lines.index("[DATA]\n") + 1 :]
return lines, qc_file
def get_githash(repo_path: Path):
"""Get SHA hash of current repo HEAD
Returns None if HEAD cannot be found."""
try:
# dynamic import [GitPython](https://pypi.org/project/GitPython/) if present
import git
repo = git.Repo(path=repo_path, search_parent_directories=True)
commit: git.Object = repo.head.object
return commit.hexsha
except (ImportError, git.InvalidGitRepositoryError):
return None
def parse_notes(
lines: list[str], *, group_script_notes=False
) -> dict[str, list[QCNote]]:
"""Parse notes into structured format
Optionally groups Script notes together (determined by STANDALONE_CATEGORIES)"""
# category -> time, category, note
# with --chrono, group most categories under "Script", but keep the original in the tuple
data: dict[str, list[tuple[str, str, str]]] = {}
for line in lines:
if line.startswith("#"):
continue
if not (match := re.match(LINE_PATTERN, line)):
continue
time, category, text = match.groups()
if group_script_notes:
group = category if category in STANDALONE_CATEGORIES else "Script"
else:
group = category
if group not in data:
data[group] = []
data[group].append(QCNote(time, category, text))
return data
@contextlib.contextmanager
def _smart_open(filename=None):
"""Open file if specified, else stdout
Copied from https://stackoverflow.com/a/17603000/4611644, CC BY-SA 4.0"""
if filename and filename != "-":
fh = open(filename, "w")
else:
fh = sys.stdout
try:
yield fh
finally:
if fh is not sys.stdout:
fh.close()
def write_output(
outfile: Path,
notes: dict[str, list[QCNote]],
video_file: str | None = None,
githash: str | None = None,
*,
include_reference=False,
dialogue_file: Path | None = None,
):
"""Write notes into output file or stdout
Args:
outfile (Path): File to use for output. If '-', uses stdout.
notes ({str: list[QCNote]}): Structured dict of notes
video_file (str, optional): Filename of mux used for QC
githash (str, optional): SHA hash of git repo at time of QC
include_reference (bool, optional): Whether to include reference lines before dialogue notes. Defaults to False.
dialogue_file (Path, optional): Dialogue ASS file to source reference lines from, if applicable
"""
if dialogue_file:
# conditional import of [python-ass](https://pypi.org/project/ass/)
import ass
with open(dialogue_file, encoding="utf-8-sig") as fo:
doc = ass.parse(fo)
dialogue = [
line for line in doc.events if isinstance(line, ass.line.Dialogue)
]
def _get_ass_lines_for_time(timestamp: str):
h, m, s = [int(x) for x in timestamp.split(":")]
start = timedelta(hours=h, minutes=m, seconds=s)
end = timedelta(seconds=start.seconds + 1)
return [
line for line in dialogue if (line.start < end) and (line.end > start)
]
ordered = sorted(notes.items(), key=lambda item: item[0])
with _smart_open(outfile) as fo:
# header, if values supplied
if video_file:
fo.write(f"Using file `{video_file}`\n")
if githash:
fo.write(f"Repo state `{githash}`\n")
if video_file or githash:
fo.write("\n")
for group, _notes in ordered:
fo.write(f"## {group}\n")
for note in _notes:
if include_reference and note.category not in NON_DIALOGUE_CATEGORIES:
fo.write("\n")
if dialogue_file:
matching_lines = _get_ass_lines_for_time(note.time)
for line in matching_lines:
fo.write(f"> {line.text}\n")
else:
fo.write("> \n")
# group != category only if --chrono was supplied
if group != note.category:
fo.write(
f"- [ ] [`{note.time}` - **{note.category}**]: {note.text}\n"
)
else:
fo.write(f"- [ ] [`{note.time}`]: {note.text}\n")
fo.write("\n")
def main():
args = parse_args()
lines, video_file = read_file(args.filename)
githash = get_githash(Path(args.filename).parent)
notes = parse_notes(lines, group_script_notes=args.chrono)
outfile = args.output if args.output else Path(args.filename).with_suffix(".md")
dialogue_file = (
Path(args.dialogue) if args.dialogue and Path(args.dialogue).exists() else None
)
write_output(
outfile,
notes,
video_file,
githash,
include_reference=args.refs,
dialogue_file=dialogue_file,
)
if not args.output:
print(outfile)
if __name__ == "__main__":
main()
@petzku
Copy link
Author

petzku commented Oct 13, 2024

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment