Last active
January 24, 2023 06:49
-
-
Save iamevn/6d796a1c8296ac325da4545fd20caf2f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# split out AMAZON/NETFLIX STYLE TYPESETTING | |
#by iamevn | |
import sys, re | |
def find_nth(string, substring, n, start=0): | |
"""find nth occurance of substring in string starting at position start. | |
(uses string.find) n starts at 1, start starts at 0""" | |
found = string.find(substring, start) | |
if n == 1 or found == -1: | |
return found | |
elif n > 1: | |
return find_nth(string, substring, n - 1, start=found + 1) | |
# match an ASS event with named groups and newline on the end | |
line_pattern = re.compile(r'(?P<Format>[^:]*): ?(?P<Layer>\d*), ?(?P<Start>[^,]*), ?(?P<End>[^,]*), ?(?P<Style>[^,]*), ?(?P<Name>[^,]*), ?(?P<MarginL>[^,]*), ?(?P<MarginR>[^,]*), ?(?P<MarginV>[^,]*), ?(?P<Effect>[^,]*),(?P<Text>.*\n)') | |
def line2dict(line): | |
"""pull fields out of ass event into dictionary | |
takes string line as argument and returns dictionary or None if line is not an ASS event""" | |
# print(line) # <- fun UnicodeEncodeErrors! | |
match = line_pattern.match(line) | |
if match: | |
return {key: match.group(key) for key in line_pattern.groupindex} | |
else: | |
return None | |
def dict2line(d): | |
return "{Format}: {Layer},{Start},{End},{Style},{Name},{MarginL},{MarginR},{MarginV},{Effect},{Text}".format(**d) | |
def is_sign(text): | |
"""True if text should be a sign, False otherwise. | |
specifically: | |
False if line has no alphabetic text and ends in punctuation, | |
True if no characters in text outside of {} pairs are lowercase, | |
False otherwise. | |
""" | |
in_comment = False | |
escaped = False | |
has_alphabetic_text = False | |
last_char = '' | |
for c in text: | |
if escaped: | |
escaped = False | |
last_char = c | |
elif c == '\\': | |
escaped = True | |
last_char = '\\' | |
elif in_comment and c == '}': | |
in_comment = False | |
elif not in_comment and c == '{': | |
in_comment = True | |
elif not in_comment and c.islower(): | |
return False | |
elif not in_comment and c.isalpha(): | |
has_alphabetic_text = True | |
last_char = c | |
elif not in_comment: | |
last_char = c | |
if (not has_alphabetic_text) and last_char in '.,!?': | |
return False | |
return True | |
def missing_newline(text): | |
"""True if text doesn't have a newline at the end. Otherwise False.""" | |
return not text.endswith('\n') | |
def main(inpath, outpath, new_style='Type'): | |
r""" Split out amazon/netflix style "typesetting" | |
finds ALL UPPERCASE lines (whole events or a subsection of an event split by \N) | |
and puts them them on a line with a style new_style | |
requirement: | |
-> Check the line and look for \N | |
There is no \N: | |
1) Is everything uppercase only? -> Y: Give it the style "Type" | |
2) N: Leave it as it is | |
There is a \N: | |
1) Is everything before the \N only uppercase? -> Y: Split the line and give the uppercase line the style "Type" | |
2) Is everything before and after the \N uppercase? -> Y: Give it the style "Type" | |
3) N: Leave it as it is | |
implemented slightly modified version that works with multiple \N on a line | |
""" | |
lines = list() | |
with open(inpath, encoding='utf-8') as infile: | |
# seek to [Events] section | |
lines.append(infile.readline()) | |
while lines[-1] != '[Events]\n': | |
lines.append(infile.readline()) | |
lines.append(infile.readline()) # Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | |
nextline = infile.readline() # the first line of dialogue | |
while nextline: | |
d = line2dict(nextline) | |
if d.get('Format') == 'Dialogue': | |
if not '\\N' in d['Text'] and is_sign(d['Text']): | |
d['Style'] = new_style | |
elif '\\N' in d['Text']: | |
upper_segments = [] | |
lower_segments = [] | |
for segment in d['Text'].split('\\N'): | |
if is_sign(segment): | |
upper_segments.append(segment) | |
else: | |
lower_segments.append(segment) | |
upper_text = '\\N'.join(upper_segments) | |
lower_text = '\\N'.join(lower_segments) | |
if upper_segments and lower_segments: | |
# need to create new line for upper segments | |
upper_d = d.copy() | |
upper_d['Style'] = new_style | |
upper_d['Text'] = upper_text | |
lines.append(dict2line(upper_d)) | |
# put lower back in original | |
d['Text'] = lower_text | |
lines.append(dict2line(d)) | |
elif upper_segments: | |
# no lower segments, can reuse original line for upper | |
d['Style'] = new_style | |
lines.append(dict2line(d)) | |
# if not upper_segments then line is left unchanged | |
else: | |
lines.append(dict2line(d)) | |
else: | |
lines.append(nextline) | |
else: | |
lines.append(nextline) | |
nextline = infile.readline() | |
with open(outpath, 'w', encoding='utf-8') as outfile: | |
for line in lines: | |
outfile.write(line) | |
if missing_newline(line): | |
outfile.write('\n') | |
if __name__ == '__main__': | |
if len(sys.argv) != 3: | |
sys.exit('Usage: {} infile.ass outfile.ass'.format(sys.argv[0])) | |
main(sys.argv[1], sys.argv[2], new_style='Type') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment