alexsavio · December 25, 2016 23:38
diff --git a/copy_subtitles_sync.py b/copy_subtitles_sync.py
 #!/usr/bin/env python

 import os
 import os.path as path
 import argparse
 import logging
 from chardet.universaldetector import UniversalDetector
 from pysrt import SubRipFile, SubRipItem

 logging.basicConfig(level=logging.INFO)
 log = logging.getLogger(__name__)


 def get_file_encoding(afile):
    """ Use chardet to try to detect the encoding of the input file."""
    detector = UniversalDetector()
    detector.reset()
    with open(afile, 'rb') as f:
        for line in f:
            detector.feed(line)
            if detector.done: break
        detector.close()

    return detector.result['encoding']


 def open_subtitles_file(afile, encoding=None):
    """ Guess the `encoding` of `afile` if not provided and try to
    parse it using pysrt.

    Parameters
    ----------
    afile: str
        Path to the subtitles file.

    encoding: str, optional
        Encoding of `afile`.

    Returns
    -------
    subs: SubRipFile

    encoding: str
        File encoding

    Raises
    ------
    UnicodeDecodeError:
        If the encoding is not valid.
    """
    if encoding is None:
        encoding = get_file_encoding(afile)

    try:
        subs = SubRipFile.open(afile, encoding=encoding)
    except UnicodeDecodeError as uerr:
        log.error('Error detecting the encoding of file {}, guessed {}. '
                  'Please give the encoding as input.'.format(afile, encoding))
        raise
    else:
        return subs, encoding


 def merge_sync_text(sync_subs, text_subs):
    """ Return a new SubRipFile object with the timestamps from `sync_subs`
    and the text from `text_subs`.

    Parameters
    ----------
    sync_subs: SubRipFile

    text_subs: SubRipFile

    Returns
    -------
    subs: SubRipFile
    """
    subs = SubRipFile()

    for idx, syncitem in enumerate(sync_subs.data):
        outitem = syncitem
        try:
            outitem.text = text_subs[idx].text
        except IndexError:
            log.error('Could not read item {} from {}'.format(idx, spchf),
                      exc_info=True)
        else:
            subs.append(outitem)

    return subs


 def is_existing_file(parser, arg):
    if not os.path.exists(arg):
        parser.error("The file %s does not exist!" % arg)
    else:
        return arg  # return the file path


 def is_empty_file(fpath):
    return os.stat(fpath)[6] == 0


 def is_not_existing_file(parser, arg):
    if os.path.exists(arg) and not is_empty_file(arg):
        parser.error("The file %s already exists!" % arg)
    else:
        return arg  # return the file path


 if __name__ == '__main__':

    desc = 'Reads the time data from one SRT file, the speech text '\
           ' from another and saves it into a new srt file.'

    parser = argparse.ArgumentParser(description=desc)
    parser.add_argument('-t', '--in_time_srt', metavar="FILE",
                        type=lambda x: is_existing_file(parser, x),
                        help='File to get the sync times.', required=True)
    parser.add_argument('-s', '--in_speech_srt', metavar="FILE",
                        type=lambda x: is_existing_file(parser, x),
                        help='File to get the speech texts.', required=True)
    parser.add_argument('-o', '--out', metavar="FILE",
                        type=lambda x: is_not_existing_file(parser, x),
                        help='Output file.', required=True)
    parser.add_argument('--in_time_enc', type=str, default=None,
                        help='Encoding of the file to get the sync times.')
    parser.add_argument('--in_speech_enc', type=str, default=None,
                        help='Encoding of the file to get the speech texts.')

    try:
        args = parser.parse_args()
    except:
        raise

    syncf = args.in_time_srt
    synce = args.in_time_enc
    spchf = args.in_speech_srt
    spche = args.in_speech_enc
    outpf = args.out

    # read source files
    syncsubs, syncenc = open_subtitles_file(syncf, synce)
    spchsubs, spchenc = open_subtitles_file(spchf, spche)

    # merge sync and text
    subs = merge_sync_text(syncsubs, spchsubs)

    # save the file with the same encoding as the text file
    log.info('Saving file {}'.format(outpf))
    subs.save(outpf, encoding=spchenc)
	#!/usr/bin/env python

	import os
	import os.path as path
	import argparse
	import logging
	from chardet.universaldetector import UniversalDetector
	from pysrt import SubRipFile, SubRipItem

	logging.basicConfig(level=logging.INFO)
	log = logging.getLogger(__name__)


	def get_file_encoding(afile):
	""" Use chardet to try to detect the encoding of the input file."""
	detector = UniversalDetector()
	detector.reset()
	with open(afile, 'rb') as f:
	for line in f:
	detector.feed(line)
	if detector.done: break
	detector.close()

	return detector.result['encoding']


	def open_subtitles_file(afile, encoding=None):
	""" Guess the `encoding` of `afile` if not provided and try to
	parse it using pysrt.

	Parameters
	----------
	afile: str
	Path to the subtitles file.

	encoding: str, optional
	Encoding of `afile`.

	Returns
	-------
	subs: SubRipFile

	encoding: str
	File encoding

	Raises
	------
	UnicodeDecodeError:
	If the encoding is not valid.
	"""
	if encoding is None:
	encoding = get_file_encoding(afile)

	try:
	subs = SubRipFile.open(afile, encoding=encoding)
	except UnicodeDecodeError as uerr:
	log.error('Error detecting the encoding of file {}, guessed {}. '
	'Please give the encoding as input.'.format(afile, encoding))
	raise
	else:
	return subs, encoding


	def merge_sync_text(sync_subs, text_subs):
	""" Return a new SubRipFile object with the timestamps from `sync_subs`
	and the text from `text_subs`.

	Parameters
	----------
	sync_subs: SubRipFile

	text_subs: SubRipFile

	Returns
	-------
	subs: SubRipFile
	"""
	subs = SubRipFile()

	for idx, syncitem in enumerate(sync_subs.data):
	outitem = syncitem
	try:
	outitem.text = text_subs[idx].text
	except IndexError:
	log.error('Could not read item {} from {}'.format(idx, spchf),
	exc_info=True)
	else:
	subs.append(outitem)

	return subs


	def is_existing_file(parser, arg):
	if not os.path.exists(arg):
	parser.error("The file %s does not exist!" % arg)
	else:
	return arg # return the file path


	def is_empty_file(fpath):
	return os.stat(fpath)[6] == 0


	def is_not_existing_file(parser, arg):
	if os.path.exists(arg) and not is_empty_file(arg):
	parser.error("The file %s already exists!" % arg)
	else:
	return arg # return the file path


	if __name__ == '__main__':

	desc = 'Reads the time data from one SRT file, the speech text '\
	' from another and saves it into a new srt file.'

	parser = argparse.ArgumentParser(description=desc)
	parser.add_argument('-t', '--in_time_srt', metavar="FILE",
	type=lambda x: is_existing_file(parser, x),
	help='File to get the sync times.', required=True)
	parser.add_argument('-s', '--in_speech_srt', metavar="FILE",
	type=lambda x: is_existing_file(parser, x),
	help='File to get the speech texts.', required=True)
	parser.add_argument('-o', '--out', metavar="FILE",
	type=lambda x: is_not_existing_file(parser, x),
	help='Output file.', required=True)
	parser.add_argument('--in_time_enc', type=str, default=None,
	help='Encoding of the file to get the sync times.')
	parser.add_argument('--in_speech_enc', type=str, default=None,
	help='Encoding of the file to get the speech texts.')

	try:
	args = parser.parse_args()
	except:
	raise

	syncf = args.in_time_srt
	synce = args.in_time_enc
	spchf = args.in_speech_srt
	spche = args.in_speech_enc
	outpf = args.out

	# read source files
	syncsubs, syncenc = open_subtitles_file(syncf, synce)
	spchsubs, spchenc = open_subtitles_file(spchf, spche)

	# merge sync and text
	subs = merge_sync_text(syncsubs, spchsubs)

	# save the file with the same encoding as the text file
	log.info('Saving file {}'.format(outpf))
	subs.save(outpf, encoding=spchenc)