Skip to content

Instantly share code, notes, and snippets.

@cobanov
Created September 20, 2022 08:24
Show Gist options
  • Save cobanov/00522b2de32a6bce6f2060749d61997e to your computer and use it in GitHub Desktop.
Save cobanov/00522b2de32a6bce6f2060749d61997e to your computer and use it in GitHub Desktop.
mel spectrogram image to audio file
import librosa
import numpy as np
from PIL import Image
import soundfile as sf
def image_to_audio(image, sr=22050, n_fft=2048, hop_length=512, top_db=80):
"""Converts spectrogram to audio.
Args:
image (PIL Image): x_res x y_res grayscale image
Returns:
audio (array): raw audio
"""
bytedata = np.frombuffer(image.tobytes(), dtype="uint8").reshape(
(image.width, image.height)
)
log_S = bytedata.astype("float") * top_db / 255 - top_db
S = librosa.db_to_power(log_S)
audio = librosa.feature.inverse.mel_to_audio(
S, sr=sr, n_fft=n_fft, hop_length=hop_length
)
return audio
image = Image.open("./unnamed.png").convert("L")
if image.size != (256, 256):
image = image.resize((256, 256))
print("Image resized")
sound = image_to_audio(image)
sf.write("example.wav", sound, 22050)
print("Audio file saved")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment