Skip to content

Instantly share code, notes, and snippets.

Last active January 24, 2023 06:49
Show Gist options
  • Save iamevn/6d796a1c8296ac325da4545fd20caf2f to your computer and use it in GitHub Desktop.
Save iamevn/6d796a1c8296ac325da4545fd20caf2f to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
#by iamevn
import sys, re
def find_nth(string, substring, n, start=0):
"""find nth occurance of substring in string starting at position start.
(uses string.find) n starts at 1, start starts at 0"""
found = string.find(substring, start)
if n == 1 or found == -1:
return found
elif n > 1:
return find_nth(string, substring, n - 1, start=found + 1)
# match an ASS event with named groups and newline on the end
line_pattern = re.compile(r'(?P<Format>[^:]*): ?(?P<Layer>\d*), ?(?P<Start>[^,]*), ?(?P<End>[^,]*), ?(?P<Style>[^,]*), ?(?P<Name>[^,]*), ?(?P<MarginL>[^,]*), ?(?P<MarginR>[^,]*), ?(?P<MarginV>[^,]*), ?(?P<Effect>[^,]*),(?P<Text>.*\n)')
def line2dict(line):
"""pull fields out of ass event into dictionary
takes string line as argument and returns dictionary or None if line is not an ASS event"""
# print(line) # <- fun UnicodeEncodeErrors!
match = line_pattern.match(line)
if match:
return {key: for key in line_pattern.groupindex}
return None
def dict2line(d):
return "{Format}: {Layer},{Start},{End},{Style},{Name},{MarginL},{MarginR},{MarginV},{Effect},{Text}".format(**d)
def is_sign(text):
"""True if text should be a sign, False otherwise.
False if line has no alphabetic text and ends in punctuation,
True if no characters in text outside of {} pairs are lowercase,
False otherwise.
in_comment = False
escaped = False
has_alphabetic_text = False
last_char = ''
for c in text:
if escaped:
escaped = False
last_char = c
elif c == '\\':
escaped = True
last_char = '\\'
elif in_comment and c == '}':
in_comment = False
elif not in_comment and c == '{':
in_comment = True
elif not in_comment and c.islower():
return False
elif not in_comment and c.isalpha():
has_alphabetic_text = True
last_char = c
elif not in_comment:
last_char = c
if (not has_alphabetic_text) and last_char in '.,!?':
return False
return True
def missing_newline(text):
"""True if text doesn't have a newline at the end. Otherwise False."""
return not text.endswith('\n')
def main(inpath, outpath, new_style='Type'):
r""" Split out amazon/netflix style "typesetting"
finds ALL UPPERCASE lines (whole events or a subsection of an event split by \N)
and puts them them on a line with a style new_style
-> Check the line and look for \N
There is no \N:
1) Is everything uppercase only? -> Y: Give it the style "Type"
2) N: Leave it as it is
There is a \N:
1) Is everything before the \N only uppercase? -> Y: Split the line and give the uppercase line the style "Type"
2) Is everything before and after the \N uppercase? -> Y: Give it the style "Type"
3) N: Leave it as it is
implemented slightly modified version that works with multiple \N on a line
lines = list()
with open(inpath, encoding='utf-8') as infile:
# seek to [Events] section
while lines[-1] != '[Events]\n':
lines.append(infile.readline()) # Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
nextline = infile.readline() # the first line of dialogue
while nextline:
d = line2dict(nextline)
if d.get('Format') == 'Dialogue':
if not '\\N' in d['Text'] and is_sign(d['Text']):
d['Style'] = new_style
elif '\\N' in d['Text']:
upper_segments = []
lower_segments = []
for segment in d['Text'].split('\\N'):
if is_sign(segment):
upper_text = '\\N'.join(upper_segments)
lower_text = '\\N'.join(lower_segments)
if upper_segments and lower_segments:
# need to create new line for upper segments
upper_d = d.copy()
upper_d['Style'] = new_style
upper_d['Text'] = upper_text
# put lower back in original
d['Text'] = lower_text
elif upper_segments:
# no lower segments, can reuse original line for upper
d['Style'] = new_style
# if not upper_segments then line is left unchanged
nextline = infile.readline()
with open(outpath, 'w', encoding='utf-8') as outfile:
for line in lines:
if missing_newline(line):
if __name__ == '__main__':
if len(sys.argv) != 3:
sys.exit('Usage: {} infile.ass outfile.ass'.format(sys.argv[0]))
main(sys.argv[1], sys.argv[2], new_style='Type')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment