Last active
December 6, 2018 22:36
-
-
Save eldrin/45378348bc0237071717fabfa4a364c2 to your computer and use it in GitHub Desktop.
short script to process `.json` outputs from Essentia's music extractor program
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from os.path import join, dirname, basename | |
import glob | |
import json | |
import argparse | |
from multiprocessing import Pool | |
import pandas as pd | |
from flatten_json import flatten_json | |
from tqdm import tqdm | |
def parmap(func, iterable, n_workers=2, verbose=False): | |
""" Simple Implementation for Parallel Map """ | |
if n_workers == 1: | |
if verbose: | |
iterable = tqdm(iterable, total=len(iterable), ncols=80) | |
return map(func, iterable) | |
else: | |
with Pool(processes=n_workers) as p: | |
if verbose: | |
with tqdm(total=len(iterable), ncols=80) as pbar: | |
output = [] | |
for o in p.imap_unordered(func, iterable): | |
output.append(o) | |
pbar.update() | |
return output | |
else: | |
return p.imap_unordered(func, iterable) | |
def _process(fn): | |
""" Load and process a single json output from Essentia | |
""" | |
with open(fn) as f: | |
data = json.load(f) | |
flattened_data = flatten_json(data) | |
return flattened_data | |
def main(fns, out_path, ext='.pkl.gz', n_jobs=1): | |
""" The main process to collate all the Essentia output files | |
""" | |
print('> Loading files...') | |
features = list(parmap(_process, fns, n_workers=n_jobs, verbose=True)) | |
print('> Dumping output...') | |
out = pd.DataFrame(features) | |
out.to_pickle(join(out_path, 'essentia_music_extract' + ext)) | |
if __name__ == "__main__": | |
# setting up the argparser | |
parser = argparse.ArgumentParser() | |
parser.add_argument("input_path", type=str, | |
help='path where the .json files located') | |
parser.add_argument("output_path", | |
help='path for the feature file saved') | |
parser.add_argument("--n-workers", type=int, default=1, | |
help='number of processes to load the data') | |
args = parser.parse_args() | |
# load the fns | |
fns = glob.glob(join(args.input_path, '*.json')) | |
# process | |
main(fns, args.output_path, n_jobs=args.n_workers) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment