Skip to content

Instantly share code, notes, and snippets.

@Taehun
Created April 3, 2012 12:19
Show Gist options
  • Select an option

  • Save Taehun/2291528 to your computer and use it in GitHub Desktop.

Select an option

Save Taehun/2291528 to your computer and use it in GitHub Desktop.
Python: 리눅스용 한/영 통합자막 생성 스크립트
#!/usr/bin/python
# -*- coding:utf-8 -*-
# 사용법: ./merge_sub.py <자막 파일>
import sys
import os
import re
def main(argv):
en_start = False
kr_start = False
subs = {}
syncs = {}
sync = 0
sub = ""
if len(argv) != 1:
print "Usage: merge_sub.py <subtitle file>"
return
p1 = re.compile(r'<SYNC.+[sS].+=(\d+)><[pP].+KRCC>(.+)')
p2 = re.compile(r'<SYNC.+[sS].+=(\d+)><[pP].+ENCC>(.+)')
p3 = re.compile(r'</BODY>')
os.rename(argv[0], argv[0]+".ori")
f = open(argv[0]+".ori", "r")
while True:
ln = f.readline()
if not ln: break
kr_match = p1.search(ln)
en_match = p2.search(ln)
if kr_match:
if not '&nbsp' in sub:
subs[sync] = [sub]
syncs[sync/1000] = sync
sync = int(kr_match.group(1))
sub = kr_match.group(2)[:-2]
kr_start = True
elif kr_start:
sub = sub + ln[:-2]
if en_match:
if not '&nbsp' in sub:
for v in range(sync-1000, sync+1001, 1000):
if v/1000 in syncs:
subs[syncs[v/1000]].append(sub)
sync = int(en_match.group(1))
sub = en_match.group(2)[:-2]
en_start = True
kr_start = False
elif en_start:
sub = sub + ln[:-2]
f.close()
f = open(argv[0]+".ori", "r")
f2 = open(argv[0], "w")
en_start = False
while True:
ln = f.readline()
if not ln: break
kr_match = p1.search(ln)
en_match = p2.search(ln)
end_match = p3.search(ln)
if en_match:
en_start = True
if end_match:
en_start = False
if not en_start:
f2.write(ln)
if kr_match:
sync = int(kr_match.group(1))
if sync in subs:
if len(subs[sync]) == 2:
f2.write(subs[sync][1] + "<br>")
f.close()
f2.close()
if __name__ == "__main__":
main(sys.argv[1:])
@Taehun
Copy link
Copy Markdown
Author

Taehun commented Apr 3, 2012

I'm python newbie. Give me feedback from python guy.

@sanxiyn
Copy link
Copy Markdown

sanxiyn commented Apr 3, 2012

Use kr_match.group(1) instead of p1.sub(r'\1', kr_match.group()).

@sanxiyn
Copy link
Copy Markdown

sanxiyn commented Apr 3, 2012

Replace filter(lambda...) with plain if check inside for loop.

@Taehun
Copy link
Copy Markdown
Author

Taehun commented Apr 3, 2012

Thanks sanxian.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment