Skip to content

Instantly share code, notes, and snippets.

@RicherMans
Last active April 11, 2019 05:56
Show Gist options
  • Save RicherMans/a4169785aaf12a888632d5464fff5334 to your computer and use it in GitHub Desktop.
Save RicherMans/a4169785aaf12a888632d5464fff5334 to your computer and use it in GitHub Desktop.
Audio_video extract
import numpy as np
import pandas as pd
import os
import argparse
from mimetypes import MimeTypes
import librosa
import deepdish as dd
from tqdm import tqdm
parser = argparse.ArgumentParser()
parser.add_argument('files', nargs="+", type=str)
parser.add_argument('--filelist', '-f', default='filelist.csv',
help="name of output filelist")
parser.add_argument('-o', default='features', type=str)
parser.add_argument('-ao', default='audio', type=str,
help='Audio root dir (in -o)')
parser.add_argument('-vo', default='video', type=str,
help='Video root dir (in -o)')
args = parser.parse_args()
audio_root = os.path.join(args.o, args.ao)
video_root = os.path.join(args.o, args.vo)
os.makedirs(audio_root, exist_ok=True)
os.makedirs(video_root, exist_ok=True)
eps = np.spacing(1)
mime_guesser = MimeTypes()
all_features = []
with tqdm(total=len(args.files), unit='file') as t:
for i, f in enumerate(args.files):
mime_type = mime_guesser.guess_type(f)[0]
# If folders or so are passed
if not mime_type:
continue
filename = os.path.splitext(os.path.basename(f))[0]
if 'audio' in mime_type:
y, sr = librosa.load(f, sr=16000)
# 40ms window, 20ms shift
S = np.abs(librosa.core.stft(y, n_fft=2048, win_length=640,
hop_length=320))
feat = np.log(librosa.feature.melspectrogram(
S=S, n_mels=128) + eps).transpose().astype(np.float32)
saved_filename = os.path.join(audio_root, filename + '.dd')
all_features.append(
{'filename': filename, 'audiopath': saved_filename})
t.set_postfix(file=saved_filename, size=feat.shape)
dd.io.save(saved_filename, feat)
elif 'video' in mime_type:
saved_filename = os.path.join(video_root, filename + '.mp4')
os.symlink(os.path.abspath(f), saved_filename)
all_features.append(
{'filename': filename, 'videopath': saved_filename})
t.set_postfix(file=f, size=feat.shape)
t.update()
df = pd.DataFrame(all_features)
# Keys are only row by row, so for same key there are two values
df = pd.merge(df[['audiopath', 'filename']],
df[['videopath', 'filename']], how='outer').dropna(axis=0)
# The dropping removes all Nan Values, leaving with filename, audio, video
df.to_csv(
os.path.join(args.o, args.filelist), index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment