anotherdirtbag · September 19, 2019 17:59
diff --git a/synthesize2.py b/synthesize2.py
 #following instructions from https://github.com/mozilla/TTS/blob/master/notebooks/Benchmark.ipynb

 #%load_ext autoreload
 #%autoreload 2
 import os
 import sys
 import io
 import time
 import numpy as np
 #pip3 install --user numpy


 from collections import OrderedDict
 #from matplotlib import pylab as plt

 import torch
 #To install with CUDA 9.2. This worked for me
 #https://developer.nvidia.com/cuda-92-download-archive?target_os=Windows&target_arch=x86_64&target_version=10&target_type=exenetwork
 #pip3 install --user --no-cache-dir torch==1.2.0+cu92 torchvision==0.4.0+cu92 -f https://download.pytorch.org/whl/torch_stable.html

 #For trying CUDA 10.0. This didn't work for me
 #https://developer.nvidia.com/cuda-10.0-download-archive?target_os=Windows&target_arch=x86_64&target_version=10&target_type=exelocal
 #pip3 install --no-cache-dir --user torch==1.2.0 torchvision==0.4.0 -f https://download.pytorch.org/whl/cu100/torch_stable.html


 TTS_PATH = os.path.join(r'C:\Users\sokka\Documents\tts\TTS')
 WAVERNN_PATH = os.path.join(TTS_PATH, 'WaveRNN')

 #%pylab inline
 #rcParams["figure.figsize"] = (16,5)

 # add libraries into environment
 #import importlib
 #importlib.import_module('TTS')

 sys.path.append(TTS_PATH) # set this if TTS is not installed globally
 sys.path.append(WAVERNN_PATH) # set this if TTS is not installed globally

 import librosa
 import librosa.display

 from models.tacotron import Tacotron
 from layers import *
 from utils.data import *
 from utils.audio import AudioProcessor
 from utils.generic_utils import load_config, setup_model
 from utils.text import text_to_sequence, cleaners
 from utils.synthesis import synthesis
 #from utils.visual import visualize




 #import IPython
 #from IPython.display import Audio
 #pip3 install --user ipython

 import os
 import re
 #os.environ['CUDA_VISIBLE_DEVICES']='1'
 #os.environ['OMP_NUM_THREADS']='1'

 iscuda = torch.cuda.is_available()
 print('torch.cuda.is_available()=' + str(iscuda))

 runcounter = 0
 def tts(model, text, CONFIG, use_cuda, ap, use_gl, speaker_id=None, figures=True):
    global runcounter
    t_1 = time.time()
    submatch = re.sub(r'\s+',' ',text)
    file_namematch = re.search( r'([^\s]+\s?\d+)',  submatch)
    if file_namematch:
        file_name = file_namematch.group(0) + '_' + str(runcounter) + '.wav'
    else:
        file_name = 'tempout_' + str(runcounter) + '.wav'
    runcounter += 1
    
    waveform, alignment, mel_spec, mel_postnet_spec, stop_tokens = synthesis(model, text, CONFIG, use_cuda, ap, truncated=False)
    if CONFIG.model == "Tacotron" and not use_gl:
        mel_postnet_spec = ap.out_linear_to_mel(mel_postnet_spec.T).T
    if not use_gl:
        waveform = wavernn.generate(torch.FloatTensor(mel_postnet_spec.T).unsqueeze(0).cuda(), batched=batched_wavernn, target=11000, overlap=550)

    print(" >  Run-time: {}".format(time.time() - t_1))
    #if figures:                                                                                                         
    #    visualize(alignment, mel_postnet_spec, stop_tokens, text, ap.hop_length, CONFIG, mel_spec)                                                                       
    #IPython.display.display(Audio(waveform, rate=CONFIG.audio['sample_rate']))  
    os.makedirs(OUT_FOLDER, exist_ok=True)
    
    out_path = os.path.join(OUT_FOLDER, file_name)
    ap.save_wav(waveform, out_path)
    return alignment, mel_postnet_spec, stop_tokens, waveform



 # Set constants
 ROOT_PATH = TTS_PATH
 MODEL_PATH = os.path.join(r'C:\Users\sokka\Documents\tts\wavernn_mold\ljspeech_8a47b46','checkpoint_261000.pth.tar')
 CONFIG_PATH =  os.path.join(r'C:\Users\sokka\Documents\tts\wavernn_mold\ljspeech_8a47b46','config.json')
 OUT_FOLDER = os.path.join(ROOT_PATH,'AudioSamples/benchmark_samples/')
 CONFIG = load_config(CONFIG_PATH)
 VOCODER_MODEL_PATH = os.path.join(r'C:\Users\sokka\Documents\tts\wavernn_mold\wavernn_mold_8a1c152', 'checkpoint_433000.pth.tar')
 VOCODER_CONFIG_PATH = os.path.join(r'C:\Users\sokka\Documents\tts\wavernn_mold\wavernn_mold_8a1c152', 'config.json')
 VOCODER_CONFIG = load_config(VOCODER_CONFIG_PATH)
 use_cuda = True

 # Set some config fields manually for testing
 # CONFIG.windowing = False
 # CONFIG.prenet_dropout = False
 # CONFIG.separate_stopnet = True
 # CONFIG.stopnet = True

 # Set the vocoder
 use_gl = False # use GL if True
 batched_wavernn = True    # use batched wavernn inference if True



 # LOAD TTS MODEL
 from utils.text.symbols import symbols, phonemes

 # load the model
 num_chars = len(phonemes) if CONFIG.use_phonemes else len(symbols)
 model = setup_model(num_chars, CONFIG)

 # load the audio processor
 ap = AudioProcessor(**CONFIG.audio)         


 # load model state
 if use_cuda:
    cp = torch.load(MODEL_PATH)
 else:
    cp = torch.load(MODEL_PATH, map_location=lambda storage, loc: storage)

 # load the model
 model.load_state_dict(cp['model'])
 if use_cuda:
    model.cuda()
 model.eval()
 print(cp['step'])



 # LOAD WAVERNN
 if use_gl == False:
    from WaveRNN.models.wavernn import Model
    bits = 10

    wavernn = Model(
            rnn_dims=512,
            fc_dims=512,
            mode="mold",
            pad=2,
            upsample_factors=VOCODER_CONFIG.upsample_factors,  # set this depending on dataset
            feat_dims=VOCODER_CONFIG.audio["num_mels"],
            compute_dims=128,
            res_out_dims=128,
            res_blocks=10,
            hop_length=ap.hop_length,
            sample_rate=ap.sample_rate,
        ).cuda()


    check = torch.load(VOCODER_MODEL_PATH)
    wavernn.load_state_dict(check['model'])
    if use_cuda:
        wavernn.cuda()
    wavernn.eval()
    print(check['step'])



 illegalchars_exclusive = re.compile(r'[^\w\d\.\,\;\!\?\s]')
 repitiion = re.compile(r'\s{2,}')
 def custom_text_fix(sentence):
    global illegalchars_exclusive
    global repitiion
    newsentance = illegalchars_exclusive.sub(' ', sentence)
    newsentance = repitiion.sub(' ', newsentance)
    return newsentance


 model.eval()
 model.decoder.max_decoder_steps = 2000
 speaker_id = 0

 sentences = ["Bill got in the habit of asking himself “Is that thought true?” And if he wasn’t absolutely certain it was, he just let it go." ]

 for sentence in sentences:
    sentence = custom_text_fix(sentence)
    sentence = cleaners.english_cleaners(sentence)
    alizgn, spec, stop_tokens, wav = tts(model, sentence, CONFIG, use_cuda, ap, speaker_id=speaker_id, use_gl=use_gl, figures=True)
	#following instructions from https://github.com/mozilla/TTS/blob/master/notebooks/Benchmark.ipynb

	#%load_ext autoreload
	#%autoreload 2
	import os
	import sys
	import io
	import time
	import numpy as np
	#pip3 install --user numpy


	from collections import OrderedDict
	#from matplotlib import pylab as plt

	import torch
	#To install with CUDA 9.2. This worked for me
	#https://developer.nvidia.com/cuda-92-download-archive?target_os=Windows&target_arch=x86_64&target_version=10&target_type=exenetwork
	#pip3 install --user --no-cache-dir torch==1.2.0+cu92 torchvision==0.4.0+cu92 -f https://download.pytorch.org/whl/torch_stable.html

	#For trying CUDA 10.0. This didn't work for me
	#https://developer.nvidia.com/cuda-10.0-download-archive?target_os=Windows&target_arch=x86_64&target_version=10&target_type=exelocal
	#pip3 install --no-cache-dir --user torch==1.2.0 torchvision==0.4.0 -f https://download.pytorch.org/whl/cu100/torch_stable.html


	TTS_PATH = os.path.join(r'C:\Users\sokka\Documents\tts\TTS')
	WAVERNN_PATH = os.path.join(TTS_PATH, 'WaveRNN')

	#%pylab inline
	#rcParams["figure.figsize"] = (16,5)

	# add libraries into environment
	#import importlib
	#importlib.import_module('TTS')

	sys.path.append(TTS_PATH) # set this if TTS is not installed globally
	sys.path.append(WAVERNN_PATH) # set this if TTS is not installed globally

	import librosa
	import librosa.display

	from models.tacotron import Tacotron
	from layers import *
	from utils.data import *
	from utils.audio import AudioProcessor
	from utils.generic_utils import load_config, setup_model
	from utils.text import text_to_sequence, cleaners
	from utils.synthesis import synthesis
	#from utils.visual import visualize




	#import IPython
	#from IPython.display import Audio
	#pip3 install --user ipython

	import os
	import re
	#os.environ['CUDA_VISIBLE_DEVICES']='1'
	#os.environ['OMP_NUM_THREADS']='1'

	iscuda = torch.cuda.is_available()
	print('torch.cuda.is_available()=' + str(iscuda))

	runcounter = 0
	def tts(model, text, CONFIG, use_cuda, ap, use_gl, speaker_id=None, figures=True):
	global runcounter
	t_1 = time.time()
	submatch = re.sub(r'\s+',' ',text)
	file_namematch = re.search( r'([^\s]+\s?\d+)', submatch)
	if file_namematch:
	file_name = file_namematch.group(0) + '_' + str(runcounter) + '.wav'
	else:
	file_name = 'tempout_' + str(runcounter) + '.wav'
	runcounter += 1

	waveform, alignment, mel_spec, mel_postnet_spec, stop_tokens = synthesis(model, text, CONFIG, use_cuda, ap, truncated=False)
	if CONFIG.model == "Tacotron" and not use_gl:
	mel_postnet_spec = ap.out_linear_to_mel(mel_postnet_spec.T).T
	if not use_gl:
	waveform = wavernn.generate(torch.FloatTensor(mel_postnet_spec.T).unsqueeze(0).cuda(), batched=batched_wavernn, target=11000, overlap=550)

	print(" > Run-time: {}".format(time.time() - t_1))
	#if figures:
	# visualize(alignment, mel_postnet_spec, stop_tokens, text, ap.hop_length, CONFIG, mel_spec)
	#IPython.display.display(Audio(waveform, rate=CONFIG.audio['sample_rate']))
	os.makedirs(OUT_FOLDER, exist_ok=True)

	out_path = os.path.join(OUT_FOLDER, file_name)
	ap.save_wav(waveform, out_path)
	return alignment, mel_postnet_spec, stop_tokens, waveform



	# Set constants
	ROOT_PATH = TTS_PATH
	MODEL_PATH = os.path.join(r'C:\Users\sokka\Documents\tts\wavernn_mold\ljspeech_8a47b46','checkpoint_261000.pth.tar')
	CONFIG_PATH = os.path.join(r'C:\Users\sokka\Documents\tts\wavernn_mold\ljspeech_8a47b46','config.json')
	OUT_FOLDER = os.path.join(ROOT_PATH,'AudioSamples/benchmark_samples/')
	CONFIG = load_config(CONFIG_PATH)
	VOCODER_MODEL_PATH = os.path.join(r'C:\Users\sokka\Documents\tts\wavernn_mold\wavernn_mold_8a1c152', 'checkpoint_433000.pth.tar')
	VOCODER_CONFIG_PATH = os.path.join(r'C:\Users\sokka\Documents\tts\wavernn_mold\wavernn_mold_8a1c152', 'config.json')
	VOCODER_CONFIG = load_config(VOCODER_CONFIG_PATH)
	use_cuda = True

	# Set some config fields manually for testing
	# CONFIG.windowing = False
	# CONFIG.prenet_dropout = False
	# CONFIG.separate_stopnet = True
	# CONFIG.stopnet = True

	# Set the vocoder
	use_gl = False # use GL if True
	batched_wavernn = True # use batched wavernn inference if True



	# LOAD TTS MODEL
	from utils.text.symbols import symbols, phonemes

	# load the model
	num_chars = len(phonemes) if CONFIG.use_phonemes else len(symbols)
	model = setup_model(num_chars, CONFIG)

	# load the audio processor
	ap = AudioProcessor(**CONFIG.audio)


	# load model state
	if use_cuda:
	cp = torch.load(MODEL_PATH)
	else:
	cp = torch.load(MODEL_PATH, map_location=lambda storage, loc: storage)

	# load the model
	model.load_state_dict(cp['model'])
	if use_cuda:
	model.cuda()
	model.eval()
	print(cp['step'])



	# LOAD WAVERNN
	if use_gl == False:
	from WaveRNN.models.wavernn import Model
	bits = 10

	wavernn = Model(
	rnn_dims=512,
	fc_dims=512,
	mode="mold",
	pad=2,
	upsample_factors=VOCODER_CONFIG.upsample_factors, # set this depending on dataset
	feat_dims=VOCODER_CONFIG.audio["num_mels"],
	compute_dims=128,
	res_out_dims=128,
	res_blocks=10,
	hop_length=ap.hop_length,
	sample_rate=ap.sample_rate,
	).cuda()


	check = torch.load(VOCODER_MODEL_PATH)
	wavernn.load_state_dict(check['model'])
	if use_cuda:
	wavernn.cuda()
	wavernn.eval()
	print(check['step'])



	illegalchars_exclusive = re.compile(r'[^\w\d\.\,\;\!\?\s]')
	repitiion = re.compile(r'\s{2,}')
	def custom_text_fix(sentence):
	global illegalchars_exclusive
	global repitiion
	newsentance = illegalchars_exclusive.sub(' ', sentence)
	newsentance = repitiion.sub(' ', newsentance)
	return newsentance


	model.eval()
	model.decoder.max_decoder_steps = 2000
	speaker_id = 0

	sentences = ["Bill got in the habit of asking himself “Is that thought true?” And if he wasn’t absolutely certain it was, he just let it go." ]

	for sentence in sentences:
	sentence = custom_text_fix(sentence)
	sentence = cleaners.english_cleaners(sentence)
	alizgn, spec, stop_tokens, wav = tts(model, sentence, CONFIG, use_cuda, ap, speaker_id=speaker_id, use_gl=use_gl, figures=True)