Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save gvyshnya/92995935c2b222530fd34915c49f4927 to your computer and use it in GitHub Desktop.
Save gvyshnya/92995935c2b222530fd34915c49f4927 to your computer and use it in GitHub Desktop.
Parallel Audio Feature Extraction with Ray
@ray.remote
def extract_feautres(trial_audio_file_path):
# process data frame
function_start_time = dt.datetime.now()
print("Started a file processing at ", function_start_time)
df0 = u.extract_feature_means(trial_audio_file_path)
function_finish_time = dt.datetime.now()
print("Fininished the file processing at ", function_finish_time)
processing = function_finish_time - function_start_time
print("Processed the file: ", trial_audio_file_path, "; processing time: ", processing)
return df0
ray.init()
for ebird in final_data:
print("Starting to process a new species: ", ebird)
ebird_data = train_csv[train_csv['species'] == ebird]
short_file_name = ebird_data['ebird_code'].unique()[0]
print("Short file name: ", short_file_name)
result = []
for index, row in ebird_data.iterrows():
# process each audio file
df = ray.get([extract_feautres.remote(row['full_path'])])
result.append(df)
# combine chunks with transformed data into a single training set
extracted_features = pd.concat(result)
# save extracted features to CSV
output_path = "".join([c.TRANSFORMED_DATA_PATH, short_file_name, ".csv"])
extracted_features.to_csv(output_path, index=False)
print("Finished processing: ", ebird)
ray.shutdown()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment