Created
April 3, 2012 12:19
-
-
Save Taehun/2291528 to your computer and use it in GitHub Desktop.
Python: 리눅스용 한/영 통합자막 생성 스크립트
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding:utf-8 -*- | |
# 사용법: ./merge_sub.py <자막 파일> | |
import sys | |
import os | |
import re | |
def main(argv): | |
en_start = False | |
kr_start = False | |
subs = {} | |
syncs = {} | |
sync = 0 | |
sub = "" | |
if len(argv) != 1: | |
print "Usage: merge_sub.py <subtitle file>" | |
return | |
p1 = re.compile(r'<SYNC.+[sS].+=(\d+)><[pP].+KRCC>(.+)') | |
p2 = re.compile(r'<SYNC.+[sS].+=(\d+)><[pP].+ENCC>(.+)') | |
p3 = re.compile(r'</BODY>') | |
os.rename(argv[0], argv[0]+".ori") | |
f = open(argv[0]+".ori", "r") | |
while True: | |
ln = f.readline() | |
if not ln: break | |
kr_match = p1.search(ln) | |
en_match = p2.search(ln) | |
if kr_match: | |
if not ' ' in sub: | |
subs[sync] = [sub] | |
syncs[sync/1000] = sync | |
sync = int(kr_match.group(1)) | |
sub = kr_match.group(2)[:-2] | |
kr_start = True | |
elif kr_start: | |
sub = sub + ln[:-2] | |
if en_match: | |
if not ' ' in sub: | |
for v in range(sync-1000, sync+1001, 1000): | |
if v/1000 in syncs: | |
subs[syncs[v/1000]].append(sub) | |
sync = int(en_match.group(1)) | |
sub = en_match.group(2)[:-2] | |
en_start = True | |
kr_start = False | |
elif en_start: | |
sub = sub + ln[:-2] | |
f.close() | |
f = open(argv[0]+".ori", "r") | |
f2 = open(argv[0], "w") | |
en_start = False | |
while True: | |
ln = f.readline() | |
if not ln: break | |
kr_match = p1.search(ln) | |
en_match = p2.search(ln) | |
end_match = p3.search(ln) | |
if en_match: | |
en_start = True | |
if end_match: | |
en_start = False | |
if not en_start: | |
f2.write(ln) | |
if kr_match: | |
sync = int(kr_match.group(1)) | |
if sync in subs: | |
if len(subs[sync]) == 2: | |
f2.write(subs[sync][1] + "<br>") | |
f.close() | |
f2.close() | |
if __name__ == "__main__": | |
main(sys.argv[1:]) |
Use kr_match.group(1) instead of p1.sub(r'\1', kr_match.group()).
Replace filter(lambda...) with plain if check inside for loop.
Thanks sanxian.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I'm python newbie. Give me feedback from python guy.