Skip to content

Instantly share code, notes, and snippets.

@hirocarma
Created April 18, 2021 09:27
Show Gist options
  • Save hirocarma/4621b4e459305baa3f6c61190c16b592 to your computer and use it in GitHub Desktop.
Save hirocarma/4621b4e459305baa3f6c61190c16b592 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import os
import sys
import cv2
import numpy as np
import matplotlib.pyplot as plt
import wave
import scipy.signal as signal
import colorsys
def downsampling(conversion_rate, data, fr) :
decimation_sampleNum = conversion_rate-1
nyqF = (fr/conversion_rate)/2.0
cF = (fr/conversion_rate/2.0-500.)/nyqF
taps = 511
b = signal.firwin(taps, cF)
data = signal.lfilter(b, 1, data)
down_data = []
for i in range(0, len(data), decimation_sampleNum+1):
down_data.append(data[i])
return (down_data, int(fr/conversion_rate))
def smoothing(input, window):
output = []
for i in range(input.shape[0]):
if i < window:
output.append(np.mean(input[:i+window+1]))
elif i > input.shape[0] - 1 - window:
output.append(np.mean(input[i:]))
else:
output.append(np.mean(input[i-window:i+window+1]))
return np.array(output)
def to_db(data, N):
pad = np.zeros(N//2)
pad_data = np.concatenate([pad, data, pad])
rms = np.array([np.sqrt((1/N) * (np.sum(pad_data[i:i+N]))**2) \
for i in range(len(data))])
with np.errstate(divide='ignore'):
db = 20 * np.log10(rms)
return db
def hsv_av(img):
r = int(np.mean(img[:,:,0]))
g = int(np.mean(img[:,:,1]))
b = int(np.mean(img[:,:,2]))
hsv = colorsys.rgb_to_hsv(r/255.0,g/255.0,b/255.0)
(h, s, v) = (int(hsv[0]*255), int(hsv[1]*255),int(hsv[2]*255))
return (h, s, v)
def hsv_plt(IMG_DIR):
files = os.listdir(IMG_DIR)
files = sorted(files)
fps = 60
Lt=[];Lh=[];Ls=[];Lv=[]
for i, file in enumerate(files):
if not i % (fps / 8) == 0:
continue
img_path = IMG_DIR + '/' + file
img = cv2.imread(img_path)
(h, s, v) = hsv_av(img)
Lt.append(i/fps/60)
Lh.append(h)
Ls.append(s)
Lv.append(v)
num = fps
b = np.ones(num)/num
Lh2 = np.convolve(Lh, b, mode='same')
Ls2 = np.convolve(Ls, b, mode='same')
Lv2 = np.convolve(Lv, b, mode='same')
return (Lt, Lh2, Ls2, Lv2)
def sound_plt(wav_fname):
wave_file = wave.open(wav_fname,"rb")
fr = wave_file.getframerate()
nframes = wave_file.getnframes()
data = wave_file.readframes(nframes)
data = np.frombuffer(data, dtype= "int16")
down_rate = 2
down_fr = int(fr / (down_rate * 1000))
down_data, down_fr = downsampling(down_fr , data, fr)
N = int(fr / 42)
db = to_db(down_data, N)
time = np.arange(0, db.shape[0] / down_fr, 1 / down_fr) / 60 / down_rate
sm_db = smoothing(db, 1000)
sm_db_x = [i for i in sm_db if i >= 20]
db_mean = np.mean(sm_db_x)
db_max = (np.max(sm_db_x))
db_max_time = time[np.argmax(sm_db)]
sm_db_s = [i for i in sm_db if i < 20]
silent_time = len(sm_db_s) / down_fr / down_rate
num = fr
b = np.ones(num)/num
sm_db2 = np.convolve(sm_db, b, mode='same')
return (time, sm_db2, db_mean, db_max, db_max_time, silent_time)
def sound_hsv_plt(wav_fname, IMG_DIR):
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = ['IPAPGothic', 'VL PGothic']
fig = plt.figure(figsize=(16, 8), dpi=100, facecolor='tan', tight_layout=True)
ax = fig.add_subplot(111)
ax.set_title("スーパーカブ 1話 音量/彩度 推移")
ax.set_xlabel("分(min)")
ax.set_ylabel("音量(dB)/彩度")
(time, db, db_mean, db_max, db_max_time, silent_time) = sound_plt(wav_fname)
ax.plot(time, db , 'r', label='Sound:moving average')
(Lt, Lh2, Ls2, Lv2) = hsv_plt(IMG_DIR)
ax.plot(Lt, Ls2 , 'b', label='Saturation:moving average')
ax.plot(Lt, Lv2 , 'k', label='Value:moving average')
ax.grid()
ax.legend()
fig.savefig('sound-hsv.png', facecolor=fig.get_facecolor())
plt.show()
if __name__ == '__main__':
wav_fname = sys.argv[1]
IMG_DIR = sys.argv[2]
sound_hsv_plt(wav_fname,IMG_DIR)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment