Skip to content

Instantly share code, notes, and snippets.

@hirocarma
Created September 6, 2024 15:40
Show Gist options
  • Save hirocarma/08a8089f57e5eea1a291a4e76f537d12 to your computer and use it in GitHub Desktop.
Save hirocarma/08a8089f57e5eea1a291a4e76f537d12 to your computer and use it in GitHub Desktop.
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
import cv2
def wav_to_image_with_rgb(wav_file, image_file):
sample_rate, data = wavfile.read(wav_file)
# If stereo, to mono
if len(data.shape) == 2:
data = data[:, 0]
data = data / np.max(np.abs(data))
# STFT
plt.specgram(data, Fs=sample_rate, NFFT=1024, noverlap=512, cmap='viridis')
plt.axis('off')
plt.savefig(image_file, bbox_inches='tight', pad_inches=0)
plt.close()
img = cv2.imread(image_file)
# average RGB
avg_color_per_row = np.mean(img, axis=0)
avg_color = np.mean(avg_color_per_row, axis=0)
avg_rgb = avg_color.astype(int) # Convert to integer for display
img_with_rgb = cv2.putText(img,
f'Average RGB: {avg_rgb[2]}, {avg_rgb[1]}, {avg_rgb[0]}',
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
1, (255, 255, 255), 2, cv2.LINE_AA)
cv2.imwrite(image_file, img_with_rgb)
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python script_name.py wavfile")
sys.exit(1)
wav_file = sys.argv[1]
basename = os.path.basename(wav_file)
img_file = basename + ".png"
wav_to_image_with_rgb(wav_file, img_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment