Created
May 2, 2018 13:12
-
-
Save erogol/92cdeca0e12c9ea3e79e518111b354c7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import os\n", | |
"import time\n", | |
"from shutil import rmtree\n", | |
"import numpy as np\n", | |
"import matplotlib # Remove this line if you don't need them\n", | |
"matplotlib.use('Agg') # Remove this line if you don't need them\n", | |
"import matplotlib.pyplot as plt\n", | |
"import soundfile as sf\n", | |
"# import librosa\n", | |
"import pyworld as pw\n", | |
"\n", | |
"speed = 1.0\n", | |
"frame_period = 5.0\n", | |
"\n", | |
"EPSILON = 1e-8\n", | |
"\n", | |
"def savefig(filename, figlist, log=True):\n", | |
" #h = 10\n", | |
" n = len(figlist)\n", | |
" # peek into instances\n", | |
" f = figlist[0]\n", | |
" if len(f.shape) == 1:\n", | |
" plt.figure()\n", | |
" for i, f in enumerate(figlist):\n", | |
" plt.subplot(n, 1, i+1)\n", | |
" if len(f.shape) == 1:\n", | |
" plt.plot(f)\n", | |
" plt.xlim([0, len(f)])\n", | |
" elif len(f.shape) == 2:\n", | |
" Nsmp, dim = figlist[0].shape\n", | |
" #figsize=(h * float(Nsmp) / dim, len(figlist) * h)\n", | |
" #plt.figure(figsize=figsize)\n", | |
" plt.figure()\n", | |
" for i, f in enumerate(figlist):\n", | |
" plt.subplot(n, 1, i+1)\n", | |
" if log:\n", | |
" x = np.log(f + EPSILON)\n", | |
" else:\n", | |
" x = f + EPSILON\n", | |
" plt.imshow(x.T, origin='lower', interpolation='none', aspect='auto', extent=(0, x.shape[0], 0, x.shape[1]))\n", | |
" else:\n", | |
" raise ValueError('Input dimension must < 3.')\n", | |
" plt.savefig(filename)\n", | |
" # plt.close() " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"FILE_NAME = '/data/shared/KeithIto/LJSpeech-1.0/wavs/LJ007-0141.wav'\n", | |
"# FILE_NAME = '/data/shared/CommonVoice/cv_corpus/cv-other-test/sample-000645.wav'\n", | |
"# FILE_NAME = '/data/shared/OpenSLR/LibriSpeech/train-other-500/1006/135212/1006-135212-0040.flac'\n", | |
"x, fs = sf.read(FILE_NAME)\n", | |
"# x, fs = librosa.load('utterance/vaiueo2d.wav', dtype=np.float64)\n", | |
"\n", | |
"# 1. A convient way\n", | |
"f0, sp, ap = pw.wav2world(x, fs) # use default options\n", | |
"start = time.time()\n", | |
"y = pw.synthesize(f0, sp, ap, fs, pw.default_frame_period)\n", | |
"print(\" > Default - {}\".format(time.time() - start))\n", | |
"\n", | |
"# 2. Step by step\n", | |
"# 2-1 Without F0 refinement\n", | |
"start = time.time()\n", | |
"_f0, t = pw.dio(x, fs, f0_floor=50.0, f0_ceil=600.0,\n", | |
" channels_in_octave=2,\n", | |
" frame_period=frame_period,\n", | |
" speed=speed)\n", | |
"_sp = pw.cheaptrick(x, _f0, t, fs)\n", | |
"_ap = pw.d4c(x, _f0, t, fs)\n", | |
"start = time.time()\n", | |
"_y = pw.synthesize(_f0, _sp, _ap, fs, frame_period)\n", | |
"print(\" > No refinement - {}\".format(time.time() - start))\n", | |
"# librosa.output.write_wav('test/y_without_f0_refinement.wav', _y, fs)\n", | |
"# sf.write('y_without_f0_refinement.wav', _y, fs)\n", | |
"\n", | |
"# 2-2 DIO with F0 refinement (using Stonemask)\n", | |
"f0 = pw.stonemask(x, _f0, t, fs,)\n", | |
"sp = pw.cheaptrick(x, f0, t, fs,)\n", | |
"ap = pw.d4c(x, f0, t, fs)\n", | |
"start = time.time()\n", | |
"y_dio = pw.synthesize(f0, sp, ap, fs, frame_period)\n", | |
"print(\" > DIO - {}\".format(time.time() - start))\n", | |
"# librosa.output.write_wav('test/y_with_f0_refinement.wav', y, fs)\n", | |
"# sf.write('y_with_f0_refinement.wav', y_dio, fs)\n", | |
"\n", | |
"# 2-3 Harvest with F0 refinement (using Stonemask)\n", | |
"_f0_h, t_h = pw.harvest(x, fs)\n", | |
"f0_h = pw.stonemask(x, _f0_h, t_h, fs)\n", | |
"sp_h = pw.cheaptrick(x, f0_h, t_h, fs)\n", | |
"ap_h = pw.d4c(x, f0_h, t_h, fs)\n", | |
"start = time.time()\n", | |
"y_h_harvest = pw.synthesize(f0_h, sp_h, ap_h, fs, pw.default_frame_period)\n", | |
"print(\" > Harvest - {}\".format(time.time() - start))\n", | |
"# librosa.output.write_wav('test/y_harvest_with_f0_refinement.wav', y_h, fs)\n", | |
"# sf.write('y_harvest_with_f0_refinement.wav', y_h_harvest, fs)\n", | |
"\n", | |
"# Reaper F0\n", | |
"import pyreaper\n", | |
"from scipy.io import wavfile\n", | |
"import numpy as np\n", | |
"# fs, x = wavfile.read(FILE_NAME, )\n", | |
"x, fs = sf.read(FILE_NAME, dtype=np.int16)\n", | |
"pm_times, pm, t_h, f0_h, corr = pyreaper.reaper(x, fs, do_high_pass=True, do_hilbert_transform=False, )\n", | |
"x = x.astype('double')\n", | |
"f0_h = f0_h.astype('double')\n", | |
"t_h = t_h.astype('double')\n", | |
"sp_h = pw.cheaptrick(x, f0_h, t_h, fs)\n", | |
"ap_h = pw.d4c(x, f0_h, t_h, fs)\n", | |
"start = time.time()\n", | |
"y_h_reaper = pw.synthesize(f0_h, sp_h, ap_h, fs, pw.default_frame_period)\n", | |
"print(\" > Reaper - {}\".format(time.time() - start))\n", | |
"# librosa.output.write_wav('test/y_harvest_with_f0_refinement.wav', y_h, fs)\n", | |
"# sf.write('y_reaper_with_f0_refinement.wav', y_h_reaper, fs)\n", | |
"\n", | |
"# Comparison\n", | |
"# savefig('test/wavform.png', [x, _y, y])\n", | |
"# savefig('test/sp.png', [_sp, sp])\n", | |
"# savefig('test/ap.png', [_ap, ap], log=False)\n", | |
"# savefig('test/f0.png', [_f0, f0])\n", | |
"\n", | |
"import IPython\n", | |
"def play_audio(audio, sr):\n", | |
" IPython.display.display(IPython.display.Audio(audio, rate=sr))\n", | |
" \n", | |
"print(\"-- Default\")\n", | |
"play_audio(y, fs)\n", | |
" \n", | |
"print(\"-- No refinement\")\n", | |
"play_audio(_y, fs)\n", | |
"\n", | |
"print(\"-- DIO f0 refinement\")\n", | |
"play_audio(y_dio, fs)\n", | |
"\n", | |
"print(\"-- Harverst f0 refinement\")\n", | |
"play_audio(y_h_harvest, fs)\n", | |
"\n", | |
"print(\"-- Reaper f0 refinement\")\n", | |
"play_audio(y_h_reaper, fs)\n", | |
"\n", | |
"print(f0.shape)\n", | |
"print(_sp.shape)\n", | |
"print(_ap.shape)\n", | |
"print(fs)\n", | |
"\n", | |
"import IPython\n", | |
"IPython.display.Audio(FILE_NAME, rate=fs)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment