Created
February 28, 2025 00:46
-
-
Save m-szk/ef83ed40e7685e7446ccb5022ff4ecc8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
from datetime import datetime | |
import os | |
import subprocess | |
import sys | |
import yaml | |
def create_duration_list(wav_dir): | |
if not wav_dir or not os.path.isdir(wav_dir): | |
print(f'Does not exist directory. : {wav_dir}') | |
sys.exit(1) | |
duration_list = [] | |
for root, _, files in os.walk(wav_dir): | |
files = sorted(files) # ファイルをソート | |
for file in files: | |
if file.endswith(".wav"): | |
filepath = os.path.join(root, file) | |
result = subprocess.run( | |
["ffprobe", "-i", filepath, "-show_entries", "format=duration", "-v", "quiet", "-of", "csv=p=0"], | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE, | |
text=True | |
) | |
duration = result.stdout.strip() | |
duration_list.append((filepath, float(duration))) | |
return duration_list | |
def create_text_list(text_file): | |
text_list = [] | |
with open(text_file, "r") as f: | |
for line in f: | |
talk = line.strip().split(",", 1)[1] # 最初の "," で分割し、右側の部分だけ取得 | |
text_list.append(talk) | |
return text_list | |
def parse_time_to_seconds(time_str): | |
try: | |
dt = datetime.strptime(time_str, "%H:%M:%S,%f") # ミリ秒あり | |
except ValueError: | |
dt = datetime.strptime(time_str, "%H:%M:%S") # ミリ秒なし | |
return dt.hour * 3600 + dt.minute * 60 + dt.second + dt.microsecond / 1_000_000 | |
def read_setting_yaml(yaml_path): | |
time_adjustments = {} | |
if not yaml_path or not os.path.exists(yaml_path): | |
return time_adjustments | |
with open(yaml_path, "r") as f: | |
setting = yaml.safe_load(f) | |
for adjust in setting["time_adjustments"]: | |
if "line" in adjust: | |
seconds = parse_time_to_seconds(adjust["start_time"]) | |
time_adjustments[int(adjust["line"])] = seconds | |
return time_adjustments | |
def convert_time(seconds): | |
milliseconds = int((seconds - int(seconds)) * 1000) | |
seconds = int(seconds) | |
minutes, seconds = divmod(seconds, 60) | |
hours, minutes = divmod(minutes, 60) | |
return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}" | |
def create_srt(output_srt_file, text_list, duration_list,time_adjustments): | |
with open(output_srt_file, "w", encoding='utf-8') as f: | |
index = 1 | |
start_time = 0 | |
end_time = 0 | |
for text, duration in zip(text_list, duration_list): | |
start_time = end_time | |
if index in time_adjustments: | |
start_time = time_adjustments[index] # 調整時間を適用 | |
end_time = start_time + duration[1] | |
f.write(f"{index}\n") | |
f.write(f"{convert_time(start_time)} --> {convert_time(end_time)}\n") | |
f.write(f"{text}\n") | |
f.write("\n") | |
index += 1 | |
def main(): | |
# コマンドライン引数のパーサーを設定 | |
parser = argparse.ArgumentParser(description="Process VOICEVOX text and .wav files to output srt file.") | |
parser.add_argument("text_file", type=str, help="VOICEVOX text file") | |
parser.add_argument("wav_dir", type=str, help="VOICEVOX .wav files directory") | |
parser.add_argument("output_srt_file", type=str, help="output srt file") | |
parser.add_argument('-s', '--setting', help='setting.yaml') | |
args = parser.parse_args() | |
text_list = create_text_list(args.text_file) | |
duration_list = create_duration_list(args.wav_dir) | |
time_adjustments = read_setting_yaml(args.setting) | |
create_srt(args.output_srt_file, text_list, duration_list, time_adjustments) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment