Last active
October 13, 2024 06:17
-
-
Save petzku/9ad9ccf2400bd748e9b75a401f857741 to your computer and use it in GitHub Desktop.
script to convert mpvQC .txt output -> better structured markdown
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import re | |
import sys | |
from pathlib import Path | |
from datetime import timedelta | |
import argparse | |
import contextlib | |
from dataclasses import dataclass | |
# mpvQC output line format. sample: | |
# [00:02:18] [Phrasing] unsure of "comprises" | |
LINE_PATTERN = r"\[(.+?)\] \[(.+?)\] (.+)" | |
# When using --chrono, keep these categories separate | |
STANDALONE_CATEGORIES = ("Typeset", "Timing", "Encode") | |
# When using --refs, do not add a reference line for these categories | |
NON_DIALOGUE_CATEGORIES = ("Typeset", "Encode") | |
@dataclass | |
class QCNote: | |
time: str | |
category: str | |
text: str | |
def parse_args(): | |
"""Set up argument parser""" | |
parser = argparse.ArgumentParser( | |
prog="qc2md", description="Convert mpvQC reports to markdown" | |
) | |
parser.add_argument("filename", help="mpvQC txt output file") | |
parser.add_argument( | |
"-r", | |
"--refs", | |
action="store_true", | |
help="Add quotation blocks for line references. Requires specifying --dialogue to include actual lines.", | |
) | |
parser.add_argument( | |
"-c", | |
"--chrono", | |
action="store_true", | |
help="Group all script notes together chronologically", | |
) | |
parser.add_argument( | |
"-d", | |
"--dialogue", | |
help="Dialogue file to source references from, where appropriate", | |
) | |
parser.add_argument( | |
"-o", | |
"--output", | |
help="Path to output markdown to. Defaults to input file name with .md extension. Use '-' for stdout. If supplied, suppresses printing of output file path.", | |
) | |
return parser.parse_args() | |
def read_file(filename: str): | |
"""Read mpvQC output .txt file | |
Returns list of lines containing notes, and name of video file used for QC""" | |
lines: list[str] = [] | |
with open(filename, mode="r", encoding="utf-8") as file: | |
lines = file.readlines() | |
qc_file = next( | |
(line.split("/")[-1].strip() for line in lines if line.startswith("path")), | |
None, | |
) | |
lines = lines[lines.index("[DATA]\n") + 1 :] | |
return lines, qc_file | |
def get_githash(repo_path: Path): | |
"""Get SHA hash of current repo HEAD | |
Returns None if HEAD cannot be found.""" | |
try: | |
# dynamic import [GitPython](https://pypi.org/project/GitPython/) if present | |
import git | |
repo = git.Repo(path=repo_path, search_parent_directories=True) | |
commit: git.Object = repo.head.object | |
return commit.hexsha | |
except (ImportError, git.InvalidGitRepositoryError): | |
return None | |
def parse_notes( | |
lines: list[str], *, group_script_notes=False | |
) -> dict[str, list[QCNote]]: | |
"""Parse notes into structured format | |
Optionally groups Script notes together (determined by STANDALONE_CATEGORIES)""" | |
# category -> time, category, note | |
# with --chrono, group most categories under "Script", but keep the original in the tuple | |
data: dict[str, list[tuple[str, str, str]]] = {} | |
for line in lines: | |
if line.startswith("#"): | |
continue | |
if not (match := re.match(LINE_PATTERN, line)): | |
continue | |
time, category, text = match.groups() | |
if group_script_notes: | |
group = category if category in STANDALONE_CATEGORIES else "Script" | |
else: | |
group = category | |
if group not in data: | |
data[group] = [] | |
data[group].append(QCNote(time, category, text)) | |
return data | |
@contextlib.contextmanager | |
def _smart_open(filename=None): | |
"""Open file if specified, else stdout | |
Copied from https://stackoverflow.com/a/17603000/4611644, CC BY-SA 4.0""" | |
if filename and filename != "-": | |
fh = open(filename, "w") | |
else: | |
fh = sys.stdout | |
try: | |
yield fh | |
finally: | |
if fh is not sys.stdout: | |
fh.close() | |
def write_output( | |
outfile: Path, | |
notes: dict[str, list[QCNote]], | |
video_file: str | None = None, | |
githash: str | None = None, | |
*, | |
include_reference=False, | |
dialogue_file: Path | None = None, | |
): | |
"""Write notes into output file or stdout | |
Args: | |
outfile (Path): File to use for output. If '-', uses stdout. | |
notes ({str: list[QCNote]}): Structured dict of notes | |
video_file (str, optional): Filename of mux used for QC | |
githash (str, optional): SHA hash of git repo at time of QC | |
include_reference (bool, optional): Whether to include reference lines before dialogue notes. Defaults to False. | |
dialogue_file (Path, optional): Dialogue ASS file to source reference lines from, if applicable | |
""" | |
if dialogue_file: | |
# conditional import of [python-ass](https://pypi.org/project/ass/) | |
import ass | |
with open(dialogue_file, encoding="utf-8-sig") as fo: | |
doc = ass.parse(fo) | |
dialogue = [ | |
line for line in doc.events if isinstance(line, ass.line.Dialogue) | |
] | |
def _get_ass_lines_for_time(timestamp: str): | |
h, m, s = [int(x) for x in timestamp.split(":")] | |
start = timedelta(hours=h, minutes=m, seconds=s) | |
end = timedelta(seconds=start.seconds + 1) | |
return [ | |
line for line in dialogue if (line.start < end) and (line.end > start) | |
] | |
ordered = sorted(notes.items(), key=lambda item: item[0]) | |
with _smart_open(outfile) as fo: | |
# header, if values supplied | |
if video_file: | |
fo.write(f"Using file `{video_file}`\n") | |
if githash: | |
fo.write(f"Repo state `{githash}`\n") | |
if video_file or githash: | |
fo.write("\n") | |
for group, _notes in ordered: | |
fo.write(f"## {group}\n") | |
for note in _notes: | |
if include_reference and note.category not in NON_DIALOGUE_CATEGORIES: | |
fo.write("\n") | |
if dialogue_file: | |
matching_lines = _get_ass_lines_for_time(note.time) | |
for line in matching_lines: | |
fo.write(f"> {line.text}\n") | |
else: | |
fo.write("> \n") | |
# group != category only if --chrono was supplied | |
if group != note.category: | |
fo.write( | |
f"- [ ] [`{note.time}` - **{note.category}**]: {note.text}\n" | |
) | |
else: | |
fo.write(f"- [ ] [`{note.time}`]: {note.text}\n") | |
fo.write("\n") | |
def main(): | |
args = parse_args() | |
lines, video_file = read_file(args.filename) | |
githash = get_githash(Path(args.filename).parent) | |
notes = parse_notes(lines, group_script_notes=args.chrono) | |
outfile = args.output if args.output else Path(args.filename).with_suffix(".md") | |
dialogue_file = ( | |
Path(args.dialogue) if args.dialogue and Path(args.dialogue).exists() else None | |
) | |
write_output( | |
outfile, | |
notes, | |
video_file, | |
githash, | |
include_reference=args.refs, | |
dialogue_file=dialogue_file, | |
) | |
if not args.output: | |
print(outfile) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
See also https://github.com/9vult/qc2md