Skip to content

Instantly share code, notes, and snippets.

@eldrin
Last active December 6, 2018 22:36
Show Gist options
  • Save eldrin/45378348bc0237071717fabfa4a364c2 to your computer and use it in GitHub Desktop.
Save eldrin/45378348bc0237071717fabfa4a364c2 to your computer and use it in GitHub Desktop.
short script to process `.json` outputs from Essentia's music extractor program
from os.path import join, dirname, basename
import glob
import json
import argparse
from multiprocessing import Pool
import pandas as pd
from flatten_json import flatten_json
from tqdm import tqdm
def parmap(func, iterable, n_workers=2, verbose=False):
""" Simple Implementation for Parallel Map """
if n_workers == 1:
if verbose:
iterable = tqdm(iterable, total=len(iterable), ncols=80)
return map(func, iterable)
else:
with Pool(processes=n_workers) as p:
if verbose:
with tqdm(total=len(iterable), ncols=80) as pbar:
output = []
for o in p.imap_unordered(func, iterable):
output.append(o)
pbar.update()
return output
else:
return p.imap_unordered(func, iterable)
def _process(fn):
""" Load and process a single json output from Essentia
"""
with open(fn) as f:
data = json.load(f)
flattened_data = flatten_json(data)
return flattened_data
def main(fns, out_path, ext='.pkl.gz', n_jobs=1):
""" The main process to collate all the Essentia output files
"""
print('> Loading files...')
features = list(parmap(_process, fns, n_workers=n_jobs, verbose=True))
print('> Dumping output...')
out = pd.DataFrame(features)
out.to_pickle(join(out_path, 'essentia_music_extract' + ext))
if __name__ == "__main__":
# setting up the argparser
parser = argparse.ArgumentParser()
parser.add_argument("input_path", type=str,
help='path where the .json files located')
parser.add_argument("output_path",
help='path for the feature file saved')
parser.add_argument("--n-workers", type=int, default=1,
help='number of processes to load the data')
args = parser.parse_args()
# load the fns
fns = glob.glob(join(args.input_path, '*.json'))
# process
main(fns, args.output_path, n_jobs=args.n_workers)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment