gvyshnya · September 2, 2020 19:20
diff --git a/Parallel Audio Feature Extraction with MP.py b/Parallel Audio Feature Extraction with MP.py
    for ebird in final_data:
        print("Starting to process a new species: ", ebird)
        ebird_data = train_csv[train_csv['species'] == ebird]

        short_file_name = ebird_data['ebird_code'].unique()[0]
        print("Short file name: ", short_file_name)

        pool = mp.Pool(c.NUMBER_OF_CPU_IN_POOL)  # use the number of parallel processes as per the configured

        funclist = []

        for index, row in ebird_data.iterrows():
            # process each audio file
            f = pool.apply_async(extract_feautres, [row['full_path']])
            funclist.append(f)

        result = []
        for f in funclist:
            result.append(f.get(timeout=600))  # timeout in 600 seconds = 10 mins

        # combine chunks with transformed data into a single training set
        extracted_features = pd.concat(result)

        # save extracted features to CSV
        output_path = "".join([c.TRANSFORMED_DATA_PATH, short_file_name, ".csv"])
        extracted_features.to_csv(output_path, index=False)

        # clean up
        pool.close()
        pool.join()

        print("Finished processing: ", ebird)
	for ebird in final_data:
	print("Starting to process a new species: ", ebird)
	ebird_data = train_csv[train_csv['species'] == ebird]

	short_file_name = ebird_data['ebird_code'].unique()[0]
	print("Short file name: ", short_file_name)

	pool = mp.Pool(c.NUMBER_OF_CPU_IN_POOL) # use the number of parallel processes as per the configured

	funclist = []

	for index, row in ebird_data.iterrows():
	# process each audio file
	f = pool.apply_async(extract_feautres, [row['full_path']])
	funclist.append(f)

	result = []
	for f in funclist:
	result.append(f.get(timeout=600)) # timeout in 600 seconds = 10 mins

	# combine chunks with transformed data into a single training set
	extracted_features = pd.concat(result)

	# save extracted features to CSV
	output_path = "".join([c.TRANSFORMED_DATA_PATH, short_file_name, ".csv"])
	extracted_features.to_csv(output_path, index=False)

	# clean up
	pool.close()
	pool.join()

	print("Finished processing: ", ebird)