Created
April 12, 2012 13:40
-
-
Save SAPikachu/2367357 to your computer and use it in GitHub Desktop.
See also: https://gist.github.com/2367370
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from optparse import OptionParser | |
import sys | |
import os | |
import subprocess | |
from tempfile import mkstemp | |
import chardet | |
BUILTIN_FONTS = [ | |
"arial", "tahoma", "simhei", "simsun", "youyuan", "黑体", "宋体", "幼圆" | |
] | |
def scan_fonts(subs): | |
for filename, sub in subs.items(): | |
font_to_style_mapping = {} | |
sub["font_styles"] = font_to_style_mapping | |
for style_match in re.finditer( | |
r"^\s*Style:\s*(?P<style_name>.+?),(?P<font>.+?),", | |
sub["content"], | |
re.I | re.M, | |
): | |
font = style_match.group("font").lower() | |
style_name = style_match.group("style_name") | |
if not re.search( | |
r"^\s*Dialogue:\s*.*{}.*$".format(re.escape(style_name)), | |
sub["content"], | |
re.I | re.M, | |
): | |
continue | |
if font not in font_to_style_mapping: | |
font_to_style_mapping[font] = [] | |
font_to_style_mapping[font].append(style_name) | |
for code_match in re.finditer( | |
r"\\fn(?P<font>.+?)(\\|})", | |
sub["content"], | |
re.I, | |
): | |
font = code_match.group("font").lower() | |
if font not in font_to_style_mapping: | |
font_to_style_mapping[font] = [] | |
def format_output_file_name(format, original_file_name): | |
dirname, full_name = os.path.split(original_file_name) | |
name, ext = os.path.splitext(full_name) | |
return format.format( | |
dirname=dirname, | |
full_name=full_name, | |
name=name, | |
ext=ext, | |
) | |
def flatten(x): | |
"""flatten(sequence) -> list | |
Returns a single, flat list which contains all elements retrieved | |
from the sequence and all recursively contained sub-sequences | |
(iterables). | |
Examples: | |
>>> [1, 2, [3,4], (5,6)] | |
[1, 2, [3, 4], (5, 6)] | |
>>> flatten([[[1,2,3], (42,None)], [4,5], [6], 7, MyVector(8,9,10)]) | |
[1, 2, 3, 42, None, 4, 5, 6, 7, 8, 9, 10]""" | |
result = [] | |
for el in x: | |
if hasattr(el, "__iter__") and not isinstance(el, (str, bytes)): | |
result.extend(flatten(el)) | |
else: | |
result.append(el) | |
return result | |
def build_script( | |
sub_files, ignored_fonts, output_subfile_format, output_font_format, | |
mkvmerge_options_file, | |
): | |
ignored_fonts = ignored_fonts or [] | |
subs = {} | |
for file in sub_files: | |
with open(file, "rb") as f: | |
raw_content = f.read() | |
chardet_result = chardet.detect(raw_content) | |
subs[file] = { | |
"encoding": chardet_result["encoding"], | |
"content": raw_content.decode(chardet_result["encoding"]), | |
} | |
scan_fonts(subs) | |
used_fonts = flatten([x["font_styles"].keys() for x in subs.values()]) | |
used_fonts = set(used_fonts) - set(BUILTIN_FONTS) - set(ignored_fonts) | |
temp_file_name = mkstemp()[1] | |
output_stream = sys.stdout | |
def out(*args, **kwargs): | |
print(file=output_stream, *args, **kwargs) | |
out('del "{}" >nul 2>nul'.format(temp_file_name)) | |
for sub_name, sub in subs.items(): | |
sub["output_name"] = format_output_file_name( | |
output_subfile_format, sub_name | |
) | |
out('copy /y "{}" "{}"'.format(sub_name, sub["output_name"])) | |
font_outputs = [] | |
for font_name in used_fonts: | |
for sub_name, sub in subs.items(): | |
if font_name in sub["font_styles"]: | |
styles = sub["font_styles"][font_name] | |
out( | |
('SSATextRip "{sub_name}" "i,text,contains,{font_name}" ' + | |
'{style_rules} >> "{temp_file}"').format( | |
sub_name=sub_name, | |
font_name=font_name, | |
temp_file=temp_file_name, | |
style_rules=" ".join( | |
['"i,style,=,{}"'.format(x) for x in styles] | |
), | |
) | |
) | |
file_output = subprocess.check_output( | |
["GetFontPath.exe", font_name], | |
universal_newlines=True, | |
) | |
files = file_output.strip().splitlines() | |
assert files | |
for file_name in files: | |
if not os.path.isabs(file_name): | |
file_name = \ | |
os.path.expandvars("%SystemRoot%\\Fonts\\" + file_name) | |
assert os.path.isfile(file_name) | |
font_output = format_output_file_name(output_font_format, file_name) | |
font_outputs.append(font_output) | |
out((r'SubsetFont "{input}" "{output}" "{char_file}" -r | ' + | |
r'SSARepFont {repfont_entries}').format( | |
input=file_name, | |
output=font_output, | |
char_file=temp_file_name, | |
repfont_entries=' '.join( | |
['-f "{0}::{0}"'.format(x["output_name"]) | |
for x in subs.values()] | |
), | |
)) | |
out('del "{}"'.format(temp_file_name)) | |
if mkvmerge_options_file: | |
with open(mkvmerge_options_file, "w", encoding="utf-8") as f: | |
for output_file in font_outputs: | |
print("--attachment-mime-type", file=f) | |
print("application/x-truetype-font", file=f) | |
print("--attach-file", file=f) | |
print(output_file.replace("\\", "\\\\"), file=f) | |
def parse_args(): | |
parser = OptionParser() | |
parser.add_option("-n", "--ignore_font", | |
action="append", dest="ignored_fonts") | |
parser.add_option("-s", "--output-subfile-format", default="{name}.n{ext}") | |
parser.add_option("-f", "--output-font-format", | |
default="{name}-subset{ext}") | |
parser.add_option("-m", "--mkvmerge-options-file", default="mux_fonts.txt") | |
options, args = parser.parse_args() | |
ret = options.__dict__ | |
ret["sub_files"] = args | |
return ret | |
if __name__ == "__main__": | |
build_script(**parse_args()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment