erogol · May 2, 2018 13:12
diff --git a/pyworld.ipynb b/pyworld.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import time\n",
    "from shutil import rmtree\n",
    "import numpy as np\n",
    "import matplotlib      # Remove this line if you don't need them\n",
    "matplotlib.use('Agg')  # Remove this line if you don't need them\n",
    "import matplotlib.pyplot as plt\n",
    "import soundfile as sf\n",
    "# import librosa\n",
    "import pyworld as pw\n",
    "\n",
    "speed = 1.0\n",
    "frame_period = 5.0\n",
    "\n",
    "EPSILON = 1e-8\n",
    "\n",
    "def savefig(filename, figlist, log=True):\n",
    "    #h = 10\n",
    "    n = len(figlist)\n",
    "    # peek into instances\n",
    "    f = figlist[0]\n",
    "    if len(f.shape) == 1:\n",
    "        plt.figure()\n",
    "        for i, f in enumerate(figlist):\n",
    "            plt.subplot(n, 1, i+1)\n",
    "            if len(f.shape) == 1:\n",
    "                plt.plot(f)\n",
    "                plt.xlim([0, len(f)])\n",
    "    elif len(f.shape) == 2:\n",
    "        Nsmp, dim = figlist[0].shape\n",
    "        #figsize=(h * float(Nsmp) / dim, len(figlist) * h)\n",
    "        #plt.figure(figsize=figsize)\n",
    "        plt.figure()\n",
    "        for i, f in enumerate(figlist):\n",
    "            plt.subplot(n, 1, i+1)\n",
    "            if log:\n",
    "                x = np.log(f + EPSILON)\n",
    "            else:\n",
    "                x = f + EPSILON\n",
    "            plt.imshow(x.T, origin='lower', interpolation='none', aspect='auto', extent=(0, x.shape[0], 0, x.shape[1]))\n",
    "    else:\n",
    "        raise ValueError('Input dimension must < 3.')\n",
    "    plt.savefig(filename)\n",
    "    # plt.close() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "FILE_NAME = '/data/shared/KeithIto/LJSpeech-1.0/wavs/LJ007-0141.wav'\n",
    "# FILE_NAME = '/data/shared/CommonVoice/cv_corpus/cv-other-test/sample-000645.wav'\n",
    "# FILE_NAME = '/data/shared/OpenSLR/LibriSpeech/train-other-500/1006/135212/1006-135212-0040.flac'\n",
    "x, fs = sf.read(FILE_NAME)\n",
    "# x, fs = librosa.load('utterance/vaiueo2d.wav', dtype=np.float64)\n",
    "\n",
    "# 1. A convient way\n",
    "f0, sp, ap = pw.wav2world(x, fs)    # use default options\n",
    "start = time.time()\n",
    "y = pw.synthesize(f0, sp, ap, fs, pw.default_frame_period)\n",
    "print(\" > Default - {}\".format(time.time() - start))\n",
    "\n",
    "# 2. Step by step\n",
    "# 2-1 Without F0 refinement\n",
    "start = time.time()\n",
    "_f0, t = pw.dio(x, fs, f0_floor=50.0, f0_ceil=600.0,\n",
    "                channels_in_octave=2,\n",
    "                frame_period=frame_period,\n",
    "                speed=speed)\n",
    "_sp = pw.cheaptrick(x, _f0, t, fs)\n",
    "_ap = pw.d4c(x, _f0, t, fs)\n",
    "start = time.time()\n",
    "_y = pw.synthesize(_f0, _sp, _ap, fs, frame_period)\n",
    "print(\" > No refinement - {}\".format(time.time() - start))\n",
    "# librosa.output.write_wav('test/y_without_f0_refinement.wav', _y, fs)\n",
    "# sf.write('y_without_f0_refinement.wav', _y, fs)\n",
    "\n",
    "# 2-2 DIO with F0 refinement (using Stonemask)\n",
    "f0 = pw.stonemask(x, _f0, t, fs,)\n",
    "sp = pw.cheaptrick(x, f0, t, fs,)\n",
    "ap = pw.d4c(x, f0, t, fs)\n",
    "start = time.time()\n",
    "y_dio = pw.synthesize(f0, sp, ap, fs, frame_period)\n",
    "print(\" > DIO - {}\".format(time.time() - start))\n",
    "# librosa.output.write_wav('test/y_with_f0_refinement.wav', y, fs)\n",
    "# sf.write('y_with_f0_refinement.wav', y_dio, fs)\n",
    "\n",
    "# 2-3 Harvest with F0 refinement (using Stonemask)\n",
    "_f0_h, t_h = pw.harvest(x, fs)\n",
    "f0_h = pw.stonemask(x, _f0_h, t_h, fs)\n",
    "sp_h = pw.cheaptrick(x, f0_h, t_h, fs)\n",
    "ap_h = pw.d4c(x, f0_h, t_h, fs)\n",
    "start = time.time()\n",
    "y_h_harvest = pw.synthesize(f0_h, sp_h, ap_h, fs, pw.default_frame_period)\n",
    "print(\" > Harvest - {}\".format(time.time() - start))\n",
    "# librosa.output.write_wav('test/y_harvest_with_f0_refinement.wav', y_h, fs)\n",
    "# sf.write('y_harvest_with_f0_refinement.wav', y_h_harvest, fs)\n",
    "\n",
    "# Reaper F0\n",
    "import pyreaper\n",
    "from scipy.io import wavfile\n",
    "import numpy as np\n",
    "# fs, x = wavfile.read(FILE_NAME, )\n",
    "x, fs = sf.read(FILE_NAME, dtype=np.int16)\n",
    "pm_times, pm, t_h, f0_h, corr = pyreaper.reaper(x, fs, do_high_pass=True, do_hilbert_transform=False, )\n",
    "x = x.astype('double')\n",
    "f0_h = f0_h.astype('double')\n",
    "t_h = t_h.astype('double')\n",
    "sp_h = pw.cheaptrick(x, f0_h, t_h, fs)\n",
    "ap_h = pw.d4c(x, f0_h, t_h, fs)\n",
    "start = time.time()\n",
    "y_h_reaper = pw.synthesize(f0_h, sp_h, ap_h, fs, pw.default_frame_period)\n",
    "print(\" > Reaper - {}\".format(time.time() - start))\n",
    "# librosa.output.write_wav('test/y_harvest_with_f0_refinement.wav', y_h, fs)\n",
    "# sf.write('y_reaper_with_f0_refinement.wav', y_h_reaper, fs)\n",
    "\n",
    "# Comparison\n",
    "# savefig('test/wavform.png', [x, _y, y])\n",
    "# savefig('test/sp.png', [_sp, sp])\n",
    "# savefig('test/ap.png', [_ap, ap], log=False)\n",
    "# savefig('test/f0.png', [_f0, f0])\n",
    "\n",
    "import IPython\n",
    "def play_audio(audio, sr):\n",
    "    IPython.display.display(IPython.display.Audio(audio, rate=sr))\n",
    "    \n",
    "print(\"-- Default\")\n",
    "play_audio(y, fs)\n",
    "    \n",
    "print(\"-- No refinement\")\n",
    "play_audio(_y, fs)\n",
    "\n",
    "print(\"-- DIO f0 refinement\")\n",
    "play_audio(y_dio, fs)\n",
    "\n",
    "print(\"-- Harverst f0 refinement\")\n",
    "play_audio(y_h_harvest, fs)\n",
    "\n",
    "print(\"-- Reaper f0 refinement\")\n",
    "play_audio(y_h_reaper, fs)\n",
    "\n",
    "print(f0.shape)\n",
    "print(_sp.shape)\n",
    "print(_ap.shape)\n",
    "print(fs)\n",
    "\n",
    "import IPython\n",
    "IPython.display.Audio(FILE_NAME, rate=fs)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"import os\n",
	"import time\n",
	"from shutil import rmtree\n",
	"import numpy as np\n",
	"import matplotlib # Remove this line if you don't need them\n",
	"matplotlib.use('Agg') # Remove this line if you don't need them\n",
	"import matplotlib.pyplot as plt\n",
	"import soundfile as sf\n",
	"# import librosa\n",
	"import pyworld as pw\n",
	"\n",
	"speed = 1.0\n",
	"frame_period = 5.0\n",
	"\n",
	"EPSILON = 1e-8\n",
	"\n",
	"def savefig(filename, figlist, log=True):\n",
	" #h = 10\n",
	" n = len(figlist)\n",
	" # peek into instances\n",
	" f = figlist[0]\n",
	" if len(f.shape) == 1:\n",
	" plt.figure()\n",
	" for i, f in enumerate(figlist):\n",
	" plt.subplot(n, 1, i+1)\n",
	" if len(f.shape) == 1:\n",
	" plt.plot(f)\n",
	" plt.xlim([0, len(f)])\n",
	" elif len(f.shape) == 2:\n",
	" Nsmp, dim = figlist[0].shape\n",
	" #figsize=(h * float(Nsmp) / dim, len(figlist) * h)\n",
	" #plt.figure(figsize=figsize)\n",
	" plt.figure()\n",
	" for i, f in enumerate(figlist):\n",
	" plt.subplot(n, 1, i+1)\n",
	" if log:\n",
	" x = np.log(f + EPSILON)\n",
	" else:\n",
	" x = f + EPSILON\n",
	" plt.imshow(x.T, origin='lower', interpolation='none', aspect='auto', extent=(0, x.shape[0], 0, x.shape[1]))\n",
	" else:\n",
	" raise ValueError('Input dimension must < 3.')\n",
	" plt.savefig(filename)\n",
	" # plt.close() "
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"FILE_NAME = '/data/shared/KeithIto/LJSpeech-1.0/wavs/LJ007-0141.wav'\n",
	"# FILE_NAME = '/data/shared/CommonVoice/cv_corpus/cv-other-test/sample-000645.wav'\n",
	"# FILE_NAME = '/data/shared/OpenSLR/LibriSpeech/train-other-500/1006/135212/1006-135212-0040.flac'\n",
	"x, fs = sf.read(FILE_NAME)\n",
	"# x, fs = librosa.load('utterance/vaiueo2d.wav', dtype=np.float64)\n",
	"\n",
	"# 1. A convient way\n",
	"f0, sp, ap = pw.wav2world(x, fs) # use default options\n",
	"start = time.time()\n",
	"y = pw.synthesize(f0, sp, ap, fs, pw.default_frame_period)\n",
	"print(\" > Default - {}\".format(time.time() - start))\n",
	"\n",
	"# 2. Step by step\n",
	"# 2-1 Without F0 refinement\n",
	"start = time.time()\n",
	"_f0, t = pw.dio(x, fs, f0_floor=50.0, f0_ceil=600.0,\n",
	" channels_in_octave=2,\n",
	" frame_period=frame_period,\n",
	" speed=speed)\n",
	"_sp = pw.cheaptrick(x, _f0, t, fs)\n",
	"_ap = pw.d4c(x, _f0, t, fs)\n",
	"start = time.time()\n",
	"_y = pw.synthesize(_f0, _sp, _ap, fs, frame_period)\n",
	"print(\" > No refinement - {}\".format(time.time() - start))\n",
	"# librosa.output.write_wav('test/y_without_f0_refinement.wav', _y, fs)\n",
	"# sf.write('y_without_f0_refinement.wav', _y, fs)\n",
	"\n",
	"# 2-2 DIO with F0 refinement (using Stonemask)\n",
	"f0 = pw.stonemask(x, _f0, t, fs,)\n",
	"sp = pw.cheaptrick(x, f0, t, fs,)\n",
	"ap = pw.d4c(x, f0, t, fs)\n",
	"start = time.time()\n",
	"y_dio = pw.synthesize(f0, sp, ap, fs, frame_period)\n",
	"print(\" > DIO - {}\".format(time.time() - start))\n",
	"# librosa.output.write_wav('test/y_with_f0_refinement.wav', y, fs)\n",
	"# sf.write('y_with_f0_refinement.wav', y_dio, fs)\n",
	"\n",
	"# 2-3 Harvest with F0 refinement (using Stonemask)\n",
	"_f0_h, t_h = pw.harvest(x, fs)\n",
	"f0_h = pw.stonemask(x, _f0_h, t_h, fs)\n",
	"sp_h = pw.cheaptrick(x, f0_h, t_h, fs)\n",
	"ap_h = pw.d4c(x, f0_h, t_h, fs)\n",
	"start = time.time()\n",
	"y_h_harvest = pw.synthesize(f0_h, sp_h, ap_h, fs, pw.default_frame_period)\n",
	"print(\" > Harvest - {}\".format(time.time() - start))\n",
	"# librosa.output.write_wav('test/y_harvest_with_f0_refinement.wav', y_h, fs)\n",
	"# sf.write('y_harvest_with_f0_refinement.wav', y_h_harvest, fs)\n",
	"\n",
	"# Reaper F0\n",
	"import pyreaper\n",
	"from scipy.io import wavfile\n",
	"import numpy as np\n",
	"# fs, x = wavfile.read(FILE_NAME, )\n",
	"x, fs = sf.read(FILE_NAME, dtype=np.int16)\n",
	"pm_times, pm, t_h, f0_h, corr = pyreaper.reaper(x, fs, do_high_pass=True, do_hilbert_transform=False, )\n",
	"x = x.astype('double')\n",
	"f0_h = f0_h.astype('double')\n",
	"t_h = t_h.astype('double')\n",
	"sp_h = pw.cheaptrick(x, f0_h, t_h, fs)\n",
	"ap_h = pw.d4c(x, f0_h, t_h, fs)\n",
	"start = time.time()\n",
	"y_h_reaper = pw.synthesize(f0_h, sp_h, ap_h, fs, pw.default_frame_period)\n",
	"print(\" > Reaper - {}\".format(time.time() - start))\n",
	"# librosa.output.write_wav('test/y_harvest_with_f0_refinement.wav', y_h, fs)\n",
	"# sf.write('y_reaper_with_f0_refinement.wav', y_h_reaper, fs)\n",
	"\n",
	"# Comparison\n",
	"# savefig('test/wavform.png', [x, _y, y])\n",
	"# savefig('test/sp.png', [_sp, sp])\n",
	"# savefig('test/ap.png', [_ap, ap], log=False)\n",
	"# savefig('test/f0.png', [_f0, f0])\n",
	"\n",
	"import IPython\n",
	"def play_audio(audio, sr):\n",
	" IPython.display.display(IPython.display.Audio(audio, rate=sr))\n",
	" \n",
	"print(\"-- Default\")\n",
	"play_audio(y, fs)\n",
	" \n",
	"print(\"-- No refinement\")\n",
	"play_audio(_y, fs)\n",
	"\n",
	"print(\"-- DIO f0 refinement\")\n",
	"play_audio(y_dio, fs)\n",
	"\n",
	"print(\"-- Harverst f0 refinement\")\n",
	"play_audio(y_h_harvest, fs)\n",
	"\n",
	"print(\"-- Reaper f0 refinement\")\n",
	"play_audio(y_h_reaper, fs)\n",
	"\n",
	"print(f0.shape)\n",
	"print(_sp.shape)\n",
	"print(_ap.shape)\n",
	"print(fs)\n",
	"\n",
	"import IPython\n",
	"IPython.display.Audio(FILE_NAME, rate=fs)"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.5"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}