Created
November 13, 2023 02:47
-
-
Save enginebai/56e69de182b2849dcde1c8277bef6a31 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from bark import SAMPLE_RATE, generate_audio | |
from bark.generation import ( | |
preload_models | |
) | |
from scipy.io.wavfile import write as write_wav | |
import nltk | |
import numpy as np | |
import argparse | |
# Setup environment variables to speeds up generation | |
os.environ["CUDA_VISIBLE_DEVICES"] = "0" | |
os.environ["SUNO_OFFLOAD_CPU"] = "1" | |
# os.environ["SUNO_USE_SMALL_MODELS"] = "1" | |
default_speaker = "v2/en_speaker_6" | |
test_script = """ | |
""" | |
audio_file_extension = ".mp4" | |
def __generate_voice(script, speaker): | |
sentences = nltk.sent_tokenize(script.replace("\n", " ").strip()) | |
silence = np.zeros(int(0.25 * SAMPLE_RATE)) # quarter second of silence | |
pieces = [] | |
for sentence in sentences: | |
print(f'\nGenerating audio for: "{sentence}"') | |
audio_array = generate_audio(sentence, history_prompt=speaker) | |
pieces += [audio_array, silence.copy()] | |
return np.concatenate(pieces) | |
def english_gpt(script, speaker, output_file): | |
# Download the models and sample data | |
nltk.download('punkt') | |
preload_models() | |
audio_array = __generate_voice(script if script else test_script, | |
speaker if speaker else default_speaker) | |
default_output_file = test_script[0:35].replace('\n', '').strip() | |
output_file = output_file if output_file else f'{default_output_file}.wav' | |
write_wav(output_file, SAMPLE_RATE, audio_array) | |
return output_file | |
def convert_to_mp4(wav_file, mp4_file): | |
command = f"ffmpeg -i '{wav_file}' -c:v libx264 -c:a aac -strict -2 '{mp4_file}'" | |
print(command) | |
os.system(command) | |
def start_cli(): | |
parser = argparse.ArgumentParser(description='Generate audio from text') | |
parser.add_argument('--script', help='Script to generate audio from') | |
parser.add_argument('--input', help='Input file with script') | |
parser.add_argument('--speaker', help='Speaker to use') | |
parser.add_argument('--output', help='Output file') | |
args = parser.parse_args() | |
if args.input: | |
with open(args.input, 'r') as file: | |
my_script = file.read() | |
else: | |
my_script = args.script | |
generated_audio = english_gpt(my_script, args.speaker, args.output) | |
print(generated_audio) | |
convert_to_mp4(generated_audio, os.path.splitext(generated_audio)[0] + audio_file_extension) | |
start_cli() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment