Created
September 20, 2022 08:24
-
-
Save cobanov/00522b2de32a6bce6f2060749d61997e to your computer and use it in GitHub Desktop.
mel spectrogram image to audio file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import librosa | |
import numpy as np | |
from PIL import Image | |
import soundfile as sf | |
def image_to_audio(image, sr=22050, n_fft=2048, hop_length=512, top_db=80): | |
"""Converts spectrogram to audio. | |
Args: | |
image (PIL Image): x_res x y_res grayscale image | |
Returns: | |
audio (array): raw audio | |
""" | |
bytedata = np.frombuffer(image.tobytes(), dtype="uint8").reshape( | |
(image.width, image.height) | |
) | |
log_S = bytedata.astype("float") * top_db / 255 - top_db | |
S = librosa.db_to_power(log_S) | |
audio = librosa.feature.inverse.mel_to_audio( | |
S, sr=sr, n_fft=n_fft, hop_length=hop_length | |
) | |
return audio | |
image = Image.open("./unnamed.png").convert("L") | |
if image.size != (256, 256): | |
image = image.resize((256, 256)) | |
print("Image resized") | |
sound = image_to_audio(image) | |
sf.write("example.wav", sound, 22050) | |
print("Audio file saved") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment