m-szk · February 28, 2025 00:46
diff --git a/vv_txt_wav_to_srt.py b/vv_txt_wav_to_srt.py
 import argparse
 from datetime import datetime
 import os
 import subprocess
 import sys
 import yaml


 def create_duration_list(wav_dir):
    if not wav_dir or not os.path.isdir(wav_dir):
        print(f'Does not exist directory. : {wav_dir}')
        sys.exit(1)

    duration_list = []
    for root, _, files in os.walk(wav_dir):
        files = sorted(files)  # ファイルをソート
        for file in files:
            if file.endswith(".wav"):
                filepath = os.path.join(root, file)
                result = subprocess.run(
                    ["ffprobe", "-i", filepath, "-show_entries", "format=duration", "-v", "quiet", "-of", "csv=p=0"],
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
                    text=True
                )
                duration = result.stdout.strip()
                duration_list.append((filepath, float(duration)))
    return duration_list


 def create_text_list(text_file):
    text_list = []
    with open(text_file, "r") as f:
        for line in f:
            talk = line.strip().split(",", 1)[1]  # 最初の "," で分割し、右側の部分だけ取得
            text_list.append(talk)
    return text_list


 def parse_time_to_seconds(time_str):
    try:
        dt = datetime.strptime(time_str, "%H:%M:%S,%f")  # ミリ秒あり
    except ValueError:
        dt = datetime.strptime(time_str, "%H:%M:%S")  # ミリ秒なし

    return dt.hour * 3600 + dt.minute * 60 + dt.second + dt.microsecond / 1_000_000


 def read_setting_yaml(yaml_path):
    time_adjustments = {}
    if not yaml_path or not os.path.exists(yaml_path):
        return time_adjustments

    with open(yaml_path, "r") as f:
        setting = yaml.safe_load(f)
        for adjust in setting["time_adjustments"]:
            if "line" in adjust:
                seconds = parse_time_to_seconds(adjust["start_time"])
                time_adjustments[int(adjust["line"])] = seconds
    return time_adjustments


 def convert_time(seconds):
    milliseconds = int((seconds - int(seconds)) * 1000)
    seconds = int(seconds)
    minutes, seconds = divmod(seconds, 60)
    hours, minutes = divmod(minutes, 60)
    return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"


 def create_srt(output_srt_file, text_list, duration_list,time_adjustments):
    with open(output_srt_file, "w", encoding='utf-8') as f:
        index = 1
        start_time = 0
        end_time = 0
        for text, duration in zip(text_list, duration_list):
            start_time = end_time
            if index in time_adjustments:
                start_time = time_adjustments[index]    # 調整時間を適用
            end_time = start_time + duration[1]
            f.write(f"{index}\n")
            f.write(f"{convert_time(start_time)} --> {convert_time(end_time)}\n")
            f.write(f"{text}\n")
            f.write("\n")
            index += 1


 def main():
    # コマンドライン引数のパーサーを設定
    parser = argparse.ArgumentParser(description="Process VOICEVOX text and .wav files to output srt file.")
    parser.add_argument("text_file", type=str, help="VOICEVOX text file")
    parser.add_argument("wav_dir", type=str, help="VOICEVOX .wav files directory")
    parser.add_argument("output_srt_file", type=str, help="output srt file")
    parser.add_argument('-s', '--setting', help='setting.yaml')
    args = parser.parse_args()

    text_list = create_text_list(args.text_file)
    duration_list = create_duration_list(args.wav_dir)
    time_adjustments = read_setting_yaml(args.setting)
    create_srt(args.output_srt_file, text_list, duration_list, time_adjustments)


 if __name__ == "__main__":
    main()
	import argparse
	from datetime import datetime
	import os
	import subprocess
	import sys
	import yaml


	def create_duration_list(wav_dir):
	if not wav_dir or not os.path.isdir(wav_dir):
	print(f'Does not exist directory. : {wav_dir}')
	sys.exit(1)

	duration_list = []
	for root, _, files in os.walk(wav_dir):
	files = sorted(files) # ファイルをソート
	for file in files:
	if file.endswith(".wav"):
	filepath = os.path.join(root, file)
	result = subprocess.run(
	["ffprobe", "-i", filepath, "-show_entries", "format=duration", "-v", "quiet", "-of", "csv=p=0"],
	stdout=subprocess.PIPE,
	stderr=subprocess.PIPE,
	text=True
	)
	duration = result.stdout.strip()
	duration_list.append((filepath, float(duration)))
	return duration_list


	def create_text_list(text_file):
	text_list = []
	with open(text_file, "r") as f:
	for line in f:
	talk = line.strip().split(",", 1)[1] # 最初の "," で分割し、右側の部分だけ取得
	text_list.append(talk)
	return text_list


	def parse_time_to_seconds(time_str):
	try:
	dt = datetime.strptime(time_str, "%H:%M:%S,%f") # ミリ秒あり
	except ValueError:
	dt = datetime.strptime(time_str, "%H:%M:%S") # ミリ秒なし

	return dt.hour * 3600 + dt.minute * 60 + dt.second + dt.microsecond / 1_000_000


	def read_setting_yaml(yaml_path):
	time_adjustments = {}
	if not yaml_path or not os.path.exists(yaml_path):
	return time_adjustments

	with open(yaml_path, "r") as f:
	setting = yaml.safe_load(f)
	for adjust in setting["time_adjustments"]:
	if "line" in adjust:
	seconds = parse_time_to_seconds(adjust["start_time"])
	time_adjustments[int(adjust["line"])] = seconds
	return time_adjustments


	def convert_time(seconds):
	milliseconds = int((seconds - int(seconds)) * 1000)
	seconds = int(seconds)
	minutes, seconds = divmod(seconds, 60)
	hours, minutes = divmod(minutes, 60)
	return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"


	def create_srt(output_srt_file, text_list, duration_list,time_adjustments):
	with open(output_srt_file, "w", encoding='utf-8') as f:
	index = 1
	start_time = 0
	end_time = 0
	for text, duration in zip(text_list, duration_list):
	start_time = end_time
	if index in time_adjustments:
	start_time = time_adjustments[index] # 調整時間を適用
	end_time = start_time + duration[1]
	f.write(f"{index}\n")
	f.write(f"{convert_time(start_time)} --> {convert_time(end_time)}\n")
	f.write(f"{text}\n")
	f.write("\n")
	index += 1


	def main():
	# コマンドライン引数のパーサーを設定
	parser = argparse.ArgumentParser(description="Process VOICEVOX text and .wav files to output srt file.")
	parser.add_argument("text_file", type=str, help="VOICEVOX text file")
	parser.add_argument("wav_dir", type=str, help="VOICEVOX .wav files directory")
	parser.add_argument("output_srt_file", type=str, help="output srt file")
	parser.add_argument('-s', '--setting', help='setting.yaml')
	args = parser.parse_args()

	text_list = create_text_list(args.text_file)
	duration_list = create_duration_list(args.wav_dir)
	time_adjustments = read_setting_yaml(args.setting)
	create_srt(args.output_srt_file, text_list, duration_list, time_adjustments)


	if __name__ == "__main__":
	main()