Created
January 19, 2025 17:17
-
-
Save hollerith/2eff5a473c23f3f188c74fafbceb6b3f to your computer and use it in GitHub Desktop.
Stacking two tensors from different prackages Kokora and OuteTTS
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import outetts | |
import wave | |
import os | |
import torch | |
import numpy as np | |
# Configure the model | |
model_config = outetts.HFModelConfig_v1(model_path="OuteAI/OuteTTS-0.2-500M", language="en") | |
interface = outetts.InterfaceHF(model_version="0.2", cfg=model_config) | |
interface.print_default_speakers() | |
def create_speaker(sample="output.m4a"): | |
speaker = interface.create_speaker(audio_path=sample, transcript=None, whisper_model="turbo", whisper_device=None) | |
return speaker | |
def load_speaker(speaker_file): | |
embedding = torch.load(speaker_file) | |
return interface.create_speaker_from_embedding(embedding) | |
def save_tensor(speaker_dict, output_file): | |
codes_list = [word['codes'] for word in speaker_dict['words']] | |
max_len = max(len(codes) for codes in codes_list) | |
padded_codes = [codes + [0] * (max_len - len(codes)) for codes in codes_list] | |
codes = np.array(padded_codes) | |
codes = codes.astype(np.float32) / 2000.0 - 0.5 | |
codes_reshaped = np.zeros((511, 1, 256), dtype=np.float32) | |
codes_reshaped[:min(511, codes.shape[0]), 0, :min(256, codes.shape[1])] = codes[:511, :256] | |
codes_tensor = torch.from_numpy(codes_reshaped).to(emma.dtype) | |
torch.save(codes_tensor, output_file) | |
speaker_file = "bf_hannah.txt" | |
if not os.path.exists(speaker_file): | |
speaker = create_speaker() | |
interface.save_speaker(speaker, speaker_file) | |
speaker = interface.load_speaker(speaker_file) | |
emma = torch.load('bf_emma.pt', weights_only=True) | |
save_tensor(speaker, 'bf_hannah.pt') | |
hannah = torch.load('bf_hannah.pt', weights_only=True) | |
print(emma.dtype, hannah.dtype) | |
print(type(emma), type(hannah)) | |
averaged_tensor = torch.mean(torch.stack([emma, hannah]), dim=0) | |
torch.save(averaged_tensor, "hammah.pth") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment