Skip to content

Instantly share code, notes, and snippets.

@m-szk
Created February 28, 2025 00:46
Show Gist options
  • Save m-szk/ef83ed40e7685e7446ccb5022ff4ecc8 to your computer and use it in GitHub Desktop.
Save m-szk/ef83ed40e7685e7446ccb5022ff4ecc8 to your computer and use it in GitHub Desktop.
import argparse
from datetime import datetime
import os
import subprocess
import sys
import yaml
def create_duration_list(wav_dir):
if not wav_dir or not os.path.isdir(wav_dir):
print(f'Does not exist directory. : {wav_dir}')
sys.exit(1)
duration_list = []
for root, _, files in os.walk(wav_dir):
files = sorted(files) # ファイルをソート
for file in files:
if file.endswith(".wav"):
filepath = os.path.join(root, file)
result = subprocess.run(
["ffprobe", "-i", filepath, "-show_entries", "format=duration", "-v", "quiet", "-of", "csv=p=0"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
duration = result.stdout.strip()
duration_list.append((filepath, float(duration)))
return duration_list
def create_text_list(text_file):
text_list = []
with open(text_file, "r") as f:
for line in f:
talk = line.strip().split(",", 1)[1] # 最初の "," で分割し、右側の部分だけ取得
text_list.append(talk)
return text_list
def parse_time_to_seconds(time_str):
try:
dt = datetime.strptime(time_str, "%H:%M:%S,%f") # ミリ秒あり
except ValueError:
dt = datetime.strptime(time_str, "%H:%M:%S") # ミリ秒なし
return dt.hour * 3600 + dt.minute * 60 + dt.second + dt.microsecond / 1_000_000
def read_setting_yaml(yaml_path):
time_adjustments = {}
if not yaml_path or not os.path.exists(yaml_path):
return time_adjustments
with open(yaml_path, "r") as f:
setting = yaml.safe_load(f)
for adjust in setting["time_adjustments"]:
if "line" in adjust:
seconds = parse_time_to_seconds(adjust["start_time"])
time_adjustments[int(adjust["line"])] = seconds
return time_adjustments
def convert_time(seconds):
milliseconds = int((seconds - int(seconds)) * 1000)
seconds = int(seconds)
minutes, seconds = divmod(seconds, 60)
hours, minutes = divmod(minutes, 60)
return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
def create_srt(output_srt_file, text_list, duration_list,time_adjustments):
with open(output_srt_file, "w", encoding='utf-8') as f:
index = 1
start_time = 0
end_time = 0
for text, duration in zip(text_list, duration_list):
start_time = end_time
if index in time_adjustments:
start_time = time_adjustments[index] # 調整時間を適用
end_time = start_time + duration[1]
f.write(f"{index}\n")
f.write(f"{convert_time(start_time)} --> {convert_time(end_time)}\n")
f.write(f"{text}\n")
f.write("\n")
index += 1
def main():
# コマンドライン引数のパーサーを設定
parser = argparse.ArgumentParser(description="Process VOICEVOX text and .wav files to output srt file.")
parser.add_argument("text_file", type=str, help="VOICEVOX text file")
parser.add_argument("wav_dir", type=str, help="VOICEVOX .wav files directory")
parser.add_argument("output_srt_file", type=str, help="output srt file")
parser.add_argument('-s', '--setting', help='setting.yaml')
args = parser.parse_args()
text_list = create_text_list(args.text_file)
duration_list = create_duration_list(args.wav_dir)
time_adjustments = read_setting_yaml(args.setting)
create_srt(args.output_srt_file, text_list, duration_list, time_adjustments)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment