Last active
March 12, 2019 11:20
-
-
Save JamesOwers/afc8163f4a5da84587079147bb9bf9a0 to your computer and use it in GitHub Desktop.
p4p_scores.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from tqdm import tqdm | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
from glob import glob | |
def get_scores(true_df, gen_df, start_onset, increment=1.0, max_t=None): | |
""" | |
Calculates the inter-ontime interval scores. Returns a list of ioi | |
scores [ioi_(increment), ioi_(2*increment), ..., ioi_(max_t)]. | |
References | |
---------- | |
https://www.music-ir.org/mirex/wiki/2018:Patterns_for_Prediction#IOI | |
https://github.com/BeritJanssen/PatternsForPrediction/blob/master/evaluate_prediction.py | |
Parameters | |
---------- | |
true_df: DataFrame | |
A dataframe containing: the onset times (measured in number of | |
quarter notes since start) of the true continuation, and the pitches. | |
The dataframe should have colums 'onset' and 'pitch' relating to the | |
onset times and the midinote pitch number respectively | |
gen_df: DataFrame | |
As above, but for the generated continuation | |
start_onset: float | |
the onset time of the final note played in the prime associated with | |
these continations | |
increment: float | |
number of quarter notes to increment by in each iteration | |
max_t: float, optional | |
the maximum time in crotchets to get the score for. If None, gets scores | |
till all onsets in both arrays have been assessed | |
Returns | |
------- | |
scores: dict | |
Containing: | |
ioi: dict | |
dict of ioi scores the first value is for ioi_1, the next is | |
for ioi_(1+increment), etc. until ioi_(max_t), keys are _tt | |
pitch: dict | |
sim of pitch scores | |
combo: dict | |
sim of combo scores | |
""" | |
if max_t is None: | |
# TODO: get the max_t | |
max_onset = max(true_df.onset.max(), gen_df.onset.max()) | |
max_t = int(max_onset - start_onset) | |
score_names = ['ioi', 'pitch', 'combo'] | |
scores = {name: {'precision': {}, 'recall': {}, 'f1': {}} | |
for name in score_names} | |
# clean data | |
true_df_copy = true_df.copy(deep=True) | |
gen_df_copy = gen_df.copy(deep=True) | |
for df in [true_df_copy, gen_df_copy]: | |
# These roundings are done in the original evaluation code | |
# which is linked in docstring references | |
df.loc[:, 'pitch'] = df.loc[:, 'pitch'].astype(int) | |
df.loc[:, 'onset'] = df.loc[:, 'onset'].round(2) | |
nr_steps = int(max_t / increment) # range is only for ints, not floats | |
for ii in range(1, nr_steps+1): | |
tt = ii*increment | |
max_onset = start_onset + tt | |
true_notes = [(row.onset, row.pitch) for idx, row in true_df_copy.iterrows() | |
if row.onset <= max_onset] | |
gen_notes = [(row.onset, row.pitch) for idx, row in gen_df_copy.iterrows() | |
if row.onset <= max_onset] | |
true_size = len(true_notes) | |
gen_size = len(gen_notes) | |
if true_size == 0 and gen_size == 0: | |
for name in score_names: | |
scores[name]['precision'][tt] = 1 | |
scores[name]['recall'][tt] = 1 | |
scores[name]['f1'][tt] = 1 | |
elif true_size == 0: # and gen_size != 0: | |
for name in score_names: | |
scores[name]['precision'][tt] = 0 | |
scores[name]['recall'][tt] = 0 # argument this should be NaN... | |
# but shouldn't be 1 because there | |
# is an implicit 'rest' note | |
scores[name]['f1'][tt] = 0 | |
elif gen_size == 0: # and true_size != 0: | |
for name in score_names: | |
scores[name]['precision'][tt] = 0 # argument this should be NaN... | |
# but shouldn't be 1 because there | |
# is an implicit 'rest' note | |
scores[name]['recall'][tt] = 0 | |
scores[name]['f1'][tt] = 0 | |
else: | |
true_onsets, true_pitches = [list(x) for x in zip(*true_notes)] | |
gen_onsets, gen_pitches = [list(x) for x in zip(*gen_notes)] | |
# TODO: dedup polyphonic submissions? For now, assuming input is | |
# monophonic | |
# https://github.com/BeritJanssen/PatternsForPrediction/issues/5 | |
# IOI score ======= | |
# TODO: This needs to be adapted for polyphonic case | |
intersection_size = len( | |
set(true_onsets).intersection(set(gen_onsets)) | |
) | |
precision = intersection_size / gen_size | |
recall = intersection_size / true_size | |
scores['ioi']['precision'][tt] = precision | |
scores['ioi']['recall'][tt] = recall | |
if (precision + recall) != 0: | |
scores['ioi']['f1'][tt] = (2*precision*recall)/(precision + recall) | |
else: | |
scores['ioi']['f1'][tt] = 0 | |
# Pitch score ======= | |
ptr1 = 0 | |
ptr2 = 0 | |
true_pitches.sort() | |
gen_pitches.sort() | |
nr_matches = 0 | |
while ptr1 < true_size and ptr2 < gen_size: | |
if true_pitches[ptr1] == gen_pitches[ptr2]: | |
nr_matches += 1 | |
ptr1 += 1 | |
ptr2 += 1 | |
elif true_pitches[ptr1] < gen_pitches[ptr2]: | |
ptr1 += 1 | |
else: | |
ptr2 += 1 | |
precision = nr_matches / gen_size | |
recall = nr_matches / true_size | |
scores['pitch']['precision'][tt] = precision | |
scores['pitch']['recall'][tt] = recall | |
if (precision + recall) != 0: | |
scores['pitch']['f1'][tt] = (2*precision*recall)/(precision + recall) | |
else: | |
scores['pitch']['f1'][tt] = 0 | |
# Combo score ======= | |
intersection_size = len( | |
set(true_notes).intersection(set(gen_notes)) | |
) | |
precision = intersection_size / gen_size | |
recall = intersection_size / true_size | |
scores['combo']['precision'][tt] = precision | |
scores['combo']['recall'][tt] = recall | |
if (precision + recall) != 0: | |
scores['combo']['f1'][tt] = (2*precision*recall)/(precision + recall) | |
else: | |
scores['combo']['f1'][tt] = 0 | |
return scores | |
def get_scores_old(true_df, gen_df, start_onset, increment=1.0, max_t=None): | |
""" | |
Same as get_scores, but as implemented in the competition. Has issue with | |
handling NaN cases for precision, recall, and F1. See notes of get_scores | |
for details. | |
""" | |
if max_t is None: | |
# TODO: get the max_t | |
max_onset = max(true_df.onset.max(), gen_df.onset.max()) | |
max_t = int(max_onset - start_onset) | |
score_names = ['ioi', 'pitch', 'combo'] | |
scores = {name: {'precision': {}, 'recall': {}, 'f1': {}} | |
for name in score_names} | |
# clean data | |
true_df_copy = true_df.copy(deep=True) | |
gen_df_copy = gen_df.copy(deep=True) | |
for df in [true_df_copy, gen_df_copy]: | |
# These roundings are done in the original evaluation code | |
# which is linked in docstring references | |
df.loc[:, 'pitch'] = df.loc[:, 'pitch'].astype(int) | |
df.loc[:, 'onset'] = df.loc[:, 'onset'].round(2) | |
nr_steps = int(max_t / increment) # range is only for ints, not floats | |
for ii in range(1, nr_steps+1): | |
tt = ii*increment | |
max_onset = start_onset + tt | |
true_notes = [(row.onset, row.pitch) for idx, row in true_df_copy.iterrows() | |
if row.onset <= max_onset] | |
gen_notes = [(row.onset, row.pitch) for idx, row in gen_df_copy.iterrows() | |
if row.onset <= max_onset] | |
true_size = len(true_notes) | |
gen_size = len(gen_notes) | |
# This is one of the issues - we should handle size zero cases | |
if true_size == 0 or gen_size == 0: | |
for name in score_names: | |
scores[name]['precision'][tt] = np.nan | |
scores[name]['recall'][tt] = np.nan | |
scores[name]['f1'][tt] = np.nan | |
else: | |
true_onsets, true_pitches = [list(x) for x in zip(*true_notes)] | |
gen_onsets, gen_pitches = [list(x) for x in zip(*gen_notes)] | |
# TODO: dedup polyphonic submissions? For now, assuming input is | |
# monophonic | |
# https://github.com/BeritJanssen/PatternsForPrediction/issues/5 | |
# IOI score ======= | |
# TODO: This needs to be adapted for polyphonic case | |
intersection_size = len( | |
set(true_onsets).intersection(set(gen_onsets)) | |
) | |
precision = intersection_size / gen_size | |
recall = intersection_size / true_size | |
scores['ioi']['precision'][tt] = precision | |
scores['ioi']['recall'][tt] = recall | |
# This is one of the issues | |
if (precision + recall) != 0: | |
scores['ioi']['f1'][tt] = (2*precision*recall)/(precision + recall) | |
else: | |
scores['ioi']['f1'][tt] = np.nan | |
# Pitch score ======= | |
ptr1 = 0 | |
ptr2 = 0 | |
true_pitches.sort() | |
gen_pitches.sort() | |
nr_matches = 0 | |
while ptr1 < true_size and ptr2 < gen_size: | |
if true_pitches[ptr1] == gen_pitches[ptr2]: | |
nr_matches += 1 | |
ptr1 += 1 | |
ptr2 += 1 | |
elif true_pitches[ptr1] < gen_pitches[ptr2]: | |
ptr1 += 1 | |
else: | |
ptr2 += 1 | |
precision = nr_matches / gen_size | |
recall = nr_matches / true_size | |
scores['pitch']['precision'][tt] = precision | |
scores['pitch']['recall'][tt] = recall | |
if (precision + recall) != 0: | |
scores['pitch']['f1'][tt] = (2*precision*recall)/(precision + recall) | |
else: | |
scores['pitch']['f1'][tt] = np.nan | |
# Combo score ======= | |
intersection_size = len( | |
set(true_notes).intersection(set(gen_notes)) | |
) | |
precision = intersection_size / gen_size | |
recall = intersection_size / true_size | |
scores['combo']['precision'][tt] = precision | |
scores['combo']['recall'][tt] = recall | |
if (precision + recall) != 0: | |
scores['combo']['f1'][tt] = (2*precision*recall)/(precision + recall) | |
else: | |
scores['combo']['f1'][tt] = np.nan | |
return scores | |
if __name__ == '__main__': | |
# Change to point towards a folder containing the unzipped data | |
DATA_LOC = './data' | |
# Change to run on different sizes | |
SIZE = 'small' | |
SIZE_PATH = f'{DATA_LOC}/PPDD-Jul2018_aud_mono_{SIZE}' | |
DESCRIPTOR_PATH = f'{DATA_LOC}/PPDD-Jul2018_aud_mono_{SIZE}/descriptor' | |
COLNAMES = ['onset', 'pitch', 'morph', 'dur', 'ch'] | |
def get_fn(path): | |
return path.split('/')[-1].split('.')[0] | |
print('Reading csv files') | |
part = 'prime' | |
midi_path = f'{SIZE_PATH}/{part}_midi' | |
prime = {get_fn(path): pd.read_csv(path, names=COLNAMES) | |
for path in tqdm(glob(f'{SIZE_PATH}/{part}_csv/*'))} | |
part = 'cont_foil' | |
cont_foil = {get_fn(path): pd.read_csv(path, names=COLNAMES) | |
for path in tqdm(glob(f'{SIZE_PATH}/{part}_csv/*'))} | |
part = 'cont_true' | |
cont_true = {get_fn(path): pd.read_csv(path, names=COLNAMES) | |
for path in tqdm(glob(f'{SIZE_PATH}/{part}_csv/*'))} | |
fn_list = list(prime.keys()) | |
fn = fn_list[0] | |
print('Scoring compositions with new scores') | |
scores = {} | |
for fn in tqdm(fn_list): | |
scores[fn] = get_scores(cont_true[fn], cont_foil[fn], | |
prime[fn].onset.iloc[-1], | |
increment=0.5, max_t=10.0) | |
print('Scoring compositions with old scores') | |
old_scores = {} | |
for fn in tqdm(fn_list): | |
old_scores[fn] = get_scores_old(cont_true[fn], cont_foil[fn], | |
prime[fn].onset.iloc[-1], | |
increment=0.5, max_t=10.0) | |
for score_type in ['pitch', 'ioi', 'combo']: | |
for metric in ['recall', 'precision', 'f1']: | |
data = {fn: scores[fn][score_type][metric] for fn in fn_list} | |
df = (pd.DataFrame | |
.from_dict(data, orient='index') | |
.reset_index() | |
.rename(columns={'index': 'fn'}) | |
.melt(id_vars=['fn'], var_name='t', value_name='score') | |
) | |
df['score_type'] = 'revised_scores' | |
data2 = {fn: old_scores[fn][score_type][metric] for fn in fn_list} | |
df2 = (pd.DataFrame | |
.from_dict(data2, orient='index') | |
.reset_index() | |
.rename(columns={'index': 'fn'}) | |
.melt(id_vars=['fn'], var_name='t', value_name='score') | |
) | |
df2['score_type'] = 'published_scores' | |
plt.figure() | |
sns.lineplot(x='t', y='score', hue='score_type', | |
data=pd.concat((df, df2), axis=0)) | |
plt.title(f'{score_type} score, {metric} metric') | |
# plt.ylim([0, 1]) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment