JamesOwers · March 12, 2019 11:20
diff --git a/p4p_scores.py b/p4p_scores.py
 import pandas as pd
 import numpy as np
 from tqdm import tqdm
 import seaborn as sns
 import matplotlib.pyplot as plt
 from glob import glob
    
 def get_scores(true_df, gen_df, start_onset, increment=1.0, max_t=None):
    """
    Calculates the inter-ontime interval scores. Returns a list of ioi
    scores [ioi_(increment), ioi_(2*increment), ..., ioi_(max_t)].
    
    References
    ----------
    https://www.music-ir.org/mirex/wiki/2018:Patterns_for_Prediction#IOI
    https://github.com/BeritJanssen/PatternsForPrediction/blob/master/evaluate_prediction.py
    
    Parameters
    ----------
    true_df: DataFrame
        A dataframe containing: the onset times (measured in number of 
        quarter notes since start) of the true continuation, and the pitches.
        The dataframe should have colums 'onset' and 'pitch' relating to the 
        onset times and the midinote pitch number respectively
    gen_df: DataFrame
        As above, but for the generated continuation
    start_onset: float
        the onset time of the final note played in the prime associated with
        these continations
    increment: float
        number of quarter notes to increment by in each iteration
    max_t: float, optional
        the maximum time in crotchets to get the score for. If None, gets scores 
        till all onsets in both arrays have been assessed
    
    Returns
    -------
    scores: dict
        Containing:
            ioi: dict
                dict of ioi scores the first value is for ioi_1, the next is 
                for ioi_(1+increment), etc. until ioi_(max_t), keys are _tt
            pitch: dict
                sim of pitch scores
            combo: dict
                sim of combo scores
       
    """
    if max_t is None:
        # TODO: get the max_t
        max_onset = max(true_df.onset.max(), gen_df.onset.max())
        max_t = int(max_onset - start_onset)
    
    score_names = ['ioi', 'pitch', 'combo']
    scores = {name: {'precision': {}, 'recall': {}, 'f1': {}} 
              for name in score_names}
    
    # clean data
    true_df_copy = true_df.copy(deep=True)
    gen_df_copy = gen_df.copy(deep=True)
    for df in [true_df_copy, gen_df_copy]:
        # These roundings are done in the original evaluation code
        # which is linked in docstring references
        df.loc[:, 'pitch'] = df.loc[:, 'pitch'].astype(int)
        df.loc[:, 'onset'] = df.loc[:, 'onset'].round(2)
    
    nr_steps = int(max_t / increment)  # range is only for ints, not floats
    for ii in range(1, nr_steps+1):
        tt = ii*increment
        max_onset = start_onset + tt
        true_notes = [(row.onset, row.pitch) for idx, row in true_df_copy.iterrows()
                     if row.onset <= max_onset]
        gen_notes = [(row.onset, row.pitch) for idx, row in gen_df_copy.iterrows()
                     if row.onset <= max_onset]
        true_size = len(true_notes)
        gen_size = len(gen_notes)
        if true_size == 0 and gen_size == 0:
            for name in score_names:
                scores[name]['precision'][tt] = 1
                scores[name]['recall'][tt] = 1
                scores[name]['f1'][tt] = 1
        elif true_size == 0:  # and gen_size != 0:
            for name in score_names:
                scores[name]['precision'][tt] = 0
                scores[name]['recall'][tt] = 0   # argument this should be NaN...
                                                 # but shouldn't be 1 because there
                                                 # is an implicit 'rest' note
                scores[name]['f1'][tt] = 0
        elif gen_size == 0:  # and true_size != 0:
            for name in score_names:
                scores[name]['precision'][tt] = 0  # argument this should be NaN...
                                                   # but shouldn't be 1 because there
                                                   # is an implicit 'rest' note
                scores[name]['recall'][tt] = 0
                scores[name]['f1'][tt] = 0
        else:
            true_onsets, true_pitches = [list(x) for x in zip(*true_notes)]
            gen_onsets, gen_pitches = [list(x) for x in zip(*gen_notes)]
            # TODO: dedup polyphonic submissions? For now, assuming input is
            #       monophonic
            # https://github.com/BeritJanssen/PatternsForPrediction/issues/5
            # IOI score =======
            # TODO: This needs to be adapted for polyphonic case
            intersection_size = len(
                set(true_onsets).intersection(set(gen_onsets))
            )
            precision = intersection_size / gen_size
            recall = intersection_size / true_size
            scores['ioi']['precision'][tt] = precision
            scores['ioi']['recall'][tt] = recall
            if (precision + recall) != 0:
                scores['ioi']['f1'][tt] = (2*precision*recall)/(precision + recall)
            else:
                scores['ioi']['f1'][tt] = 0
            # Pitch score =======
            ptr1 = 0
            ptr2 = 0
            true_pitches.sort()
            gen_pitches.sort()
            nr_matches = 0
            while ptr1 < true_size and ptr2 < gen_size:
                if true_pitches[ptr1] == gen_pitches[ptr2]:
                    nr_matches += 1
                    ptr1 += 1
                    ptr2 += 1
                elif true_pitches[ptr1] < gen_pitches[ptr2]:
                    ptr1 += 1
                else:
                    ptr2 += 1
            precision = nr_matches / gen_size
            recall = nr_matches / true_size
            scores['pitch']['precision'][tt] = precision
            scores['pitch']['recall'][tt] = recall
            if (precision + recall) != 0:
                scores['pitch']['f1'][tt] = (2*precision*recall)/(precision + recall)
            else:
                scores['pitch']['f1'][tt] = 0
            # Combo score =======
            intersection_size = len(
                set(true_notes).intersection(set(gen_notes))
            )
            precision = intersection_size / gen_size
            recall = intersection_size / true_size
            scores['combo']['precision'][tt] = precision
            scores['combo']['recall'][tt] = recall
            if (precision + recall) != 0:
                scores['combo']['f1'][tt] = (2*precision*recall)/(precision + recall)
            else:
                scores['combo']['f1'][tt] = 0
    return scores



 def get_scores_old(true_df, gen_df, start_onset, increment=1.0, max_t=None):
    """
    Same as get_scores, but as implemented in the competition. Has issue with
    handling NaN cases for precision, recall, and F1. See notes of get_scores
    for details.
    """
    if max_t is None:
        # TODO: get the max_t
        max_onset = max(true_df.onset.max(), gen_df.onset.max())
        max_t = int(max_onset - start_onset)
    
    score_names = ['ioi', 'pitch', 'combo']
    scores = {name: {'precision': {}, 'recall': {}, 'f1': {}} 
              for name in score_names}
    
    # clean data
    true_df_copy = true_df.copy(deep=True)
    gen_df_copy = gen_df.copy(deep=True)
    for df in [true_df_copy, gen_df_copy]:
        # These roundings are done in the original evaluation code
        # which is linked in docstring references
        df.loc[:, 'pitch'] = df.loc[:, 'pitch'].astype(int)
        df.loc[:, 'onset'] = df.loc[:, 'onset'].round(2)
    
    nr_steps = int(max_t / increment)  # range is only for ints, not floats
    for ii in range(1, nr_steps+1):
        tt = ii*increment
        max_onset = start_onset + tt
        true_notes = [(row.onset, row.pitch) for idx, row in true_df_copy.iterrows()
                     if row.onset <= max_onset]
        gen_notes = [(row.onset, row.pitch) for idx, row in gen_df_copy.iterrows()
                     if row.onset <= max_onset]
        true_size = len(true_notes)
        gen_size = len(gen_notes)
        # This is one of the issues - we should handle size zero cases
        if true_size == 0 or gen_size == 0:
            for name in score_names:
                scores[name]['precision'][tt] = np.nan
                scores[name]['recall'][tt] = np.nan
                scores[name]['f1'][tt] = np.nan
        else:
            true_onsets, true_pitches = [list(x) for x in zip(*true_notes)]
            gen_onsets, gen_pitches = [list(x) for x in zip(*gen_notes)]
            # TODO: dedup polyphonic submissions? For now, assuming input is
            #       monophonic
            # https://github.com/BeritJanssen/PatternsForPrediction/issues/5
            # IOI score =======
            # TODO: This needs to be adapted for polyphonic case
            intersection_size = len(
                set(true_onsets).intersection(set(gen_onsets))
            )
            precision = intersection_size / gen_size
            recall = intersection_size / true_size
            scores['ioi']['precision'][tt] = precision
            scores['ioi']['recall'][tt] = recall
            # This is one of the issues
            if (precision + recall) != 0:
                scores['ioi']['f1'][tt] = (2*precision*recall)/(precision + recall)
            else:
                scores['ioi']['f1'][tt] = np.nan
            # Pitch score =======
            ptr1 = 0
            ptr2 = 0
            true_pitches.sort()
            gen_pitches.sort()
            nr_matches = 0
            while ptr1 < true_size and ptr2 < gen_size:
                if true_pitches[ptr1] == gen_pitches[ptr2]:
                    nr_matches += 1
                    ptr1 += 1
                    ptr2 += 1
                elif true_pitches[ptr1] < gen_pitches[ptr2]:
                    ptr1 += 1
                else:
                    ptr2 += 1
            precision = nr_matches / gen_size
            recall = nr_matches / true_size
            scores['pitch']['precision'][tt] = precision
            scores['pitch']['recall'][tt] = recall
            if (precision + recall) != 0:
                scores['pitch']['f1'][tt] = (2*precision*recall)/(precision + recall)
            else:
                scores['pitch']['f1'][tt] = np.nan
            # Combo score =======
            intersection_size = len(
                set(true_notes).intersection(set(gen_notes))
            )
            precision = intersection_size / gen_size
            recall = intersection_size / true_size
            scores['combo']['precision'][tt] = precision
            scores['combo']['recall'][tt] = recall
            if (precision + recall) != 0:
                scores['combo']['f1'][tt] = (2*precision*recall)/(precision + recall)
            else:
                scores['combo']['f1'][tt] = np.nan
    return scores

 if __name__ == '__main__':
    
    # Change to point towards a folder containing the unzipped data
    DATA_LOC = './data'
    # Change to run on different sizes
    SIZE = 'small'
    SIZE_PATH = f'{DATA_LOC}/PPDD-Jul2018_aud_mono_{SIZE}'
    DESCRIPTOR_PATH = f'{DATA_LOC}/PPDD-Jul2018_aud_mono_{SIZE}/descriptor'
    COLNAMES = ['onset', 'pitch', 'morph', 'dur', 'ch']
    
    def get_fn(path):
        return path.split('/')[-1].split('.')[0]

    print('Reading csv files')    
    part = 'prime'
    midi_path = f'{SIZE_PATH}/{part}_midi'
    prime = {get_fn(path): pd.read_csv(path, names=COLNAMES) 
             for path in tqdm(glob(f'{SIZE_PATH}/{part}_csv/*'))}
    part = 'cont_foil'
    cont_foil = {get_fn(path): pd.read_csv(path, names=COLNAMES) 
                 for path in tqdm(glob(f'{SIZE_PATH}/{part}_csv/*'))}
    part = 'cont_true'
    cont_true = {get_fn(path): pd.read_csv(path, names=COLNAMES) 
                 for path in tqdm(glob(f'{SIZE_PATH}/{part}_csv/*'))}
    fn_list = list(prime.keys())
    fn = fn_list[0]
    
    print('Scoring compositions with new scores')
    scores = {}
    for fn in tqdm(fn_list):
        scores[fn] = get_scores(cont_true[fn], cont_foil[fn],
                                prime[fn].onset.iloc[-1],
                                increment=0.5, max_t=10.0)

    print('Scoring compositions with old scores')
    old_scores = {}
    for fn in tqdm(fn_list):
        old_scores[fn] = get_scores_old(cont_true[fn], cont_foil[fn], 
                                        prime[fn].onset.iloc[-1],
                                        increment=0.5, max_t=10.0)
    
    for score_type in ['pitch', 'ioi', 'combo']:
        for metric in ['recall', 'precision', 'f1']:
            data = {fn: scores[fn][score_type][metric] for fn in fn_list}
            df = (pd.DataFrame
                     .from_dict(data, orient='index')
                     .reset_index()
                     .rename(columns={'index': 'fn'})
                     .melt(id_vars=['fn'], var_name='t', value_name='score')
                 )
            df['score_type'] = 'revised_scores'
            data2 = {fn: old_scores[fn][score_type][metric] for fn in fn_list}
            df2 = (pd.DataFrame
                     .from_dict(data2, orient='index')
                     .reset_index()
                     .rename(columns={'index': 'fn'})
                     .melt(id_vars=['fn'], var_name='t', value_name='score')
                 )
            df2['score_type'] = 'published_scores'
            
            plt.figure()
            sns.lineplot(x='t', y='score', hue='score_type',
                         data=pd.concat((df, df2), axis=0))
            plt.title(f'{score_type} score, {metric} metric')
    #         plt.ylim([0, 1])
            plt.show()
	import pandas as pd
	import numpy as np
	from tqdm import tqdm
	import seaborn as sns
	import matplotlib.pyplot as plt
	from glob import glob

	def get_scores(true_df, gen_df, start_onset, increment=1.0, max_t=None):
	"""
	Calculates the inter-ontime interval scores. Returns a list of ioi
	scores [ioi_(increment), ioi_(2*increment), ..., ioi_(max_t)].

	References
	----------
	https://www.music-ir.org/mirex/wiki/2018:Patterns_for_Prediction#IOI
	https://github.com/BeritJanssen/PatternsForPrediction/blob/master/evaluate_prediction.py

	Parameters
	----------
	true_df: DataFrame
	A dataframe containing: the onset times (measured in number of
	quarter notes since start) of the true continuation, and the pitches.
	The dataframe should have colums 'onset' and 'pitch' relating to the
	onset times and the midinote pitch number respectively
	gen_df: DataFrame
	As above, but for the generated continuation
	start_onset: float
	the onset time of the final note played in the prime associated with
	these continations
	increment: float
	number of quarter notes to increment by in each iteration
	max_t: float, optional
	the maximum time in crotchets to get the score for. If None, gets scores
	till all onsets in both arrays have been assessed

	Returns
	-------
	scores: dict
	Containing:
	ioi: dict
	dict of ioi scores the first value is for ioi_1, the next is
	for ioi_(1+increment), etc. until ioi_(max_t), keys are _tt
	pitch: dict
	sim of pitch scores
	combo: dict
	sim of combo scores

	"""
	if max_t is None:
	# TODO: get the max_t
	max_onset = max(true_df.onset.max(), gen_df.onset.max())
	max_t = int(max_onset - start_onset)

	score_names = ['ioi', 'pitch', 'combo']
	scores = {name: {'precision': {}, 'recall': {}, 'f1': {}}
	for name in score_names}

	# clean data
	true_df_copy = true_df.copy(deep=True)
	gen_df_copy = gen_df.copy(deep=True)
	for df in [true_df_copy, gen_df_copy]:
	# These roundings are done in the original evaluation code
	# which is linked in docstring references
	df.loc[:, 'pitch'] = df.loc[:, 'pitch'].astype(int)
	df.loc[:, 'onset'] = df.loc[:, 'onset'].round(2)

	nr_steps = int(max_t / increment) # range is only for ints, not floats
	for ii in range(1, nr_steps+1):
	tt = ii*increment
	max_onset = start_onset + tt
	true_notes = [(row.onset, row.pitch) for idx, row in true_df_copy.iterrows()
	if row.onset <= max_onset]
	gen_notes = [(row.onset, row.pitch) for idx, row in gen_df_copy.iterrows()
	if row.onset <= max_onset]
	true_size = len(true_notes)
	gen_size = len(gen_notes)
	if true_size == 0 and gen_size == 0:
	for name in score_names:
	scores[name]['precision'][tt] = 1
	scores[name]['recall'][tt] = 1
	scores[name]['f1'][tt] = 1
	elif true_size == 0: # and gen_size != 0:
	for name in score_names:
	scores[name]['precision'][tt] = 0
	scores[name]['recall'][tt] = 0 # argument this should be NaN...
	# but shouldn't be 1 because there
	# is an implicit 'rest' note
	scores[name]['f1'][tt] = 0
	elif gen_size == 0: # and true_size != 0:
	for name in score_names:
	scores[name]['precision'][tt] = 0 # argument this should be NaN...
	# but shouldn't be 1 because there
	# is an implicit 'rest' note
	scores[name]['recall'][tt] = 0
	scores[name]['f1'][tt] = 0
	else:
	true_onsets, true_pitches = [list(x) for x in zip(*true_notes)]
	gen_onsets, gen_pitches = [list(x) for x in zip(*gen_notes)]
	# TODO: dedup polyphonic submissions? For now, assuming input is
	# monophonic
	# https://github.com/BeritJanssen/PatternsForPrediction/issues/5
	# IOI score =======
	# TODO: This needs to be adapted for polyphonic case
	intersection_size = len(
	set(true_onsets).intersection(set(gen_onsets))
	)
	precision = intersection_size / gen_size
	recall = intersection_size / true_size
	scores['ioi']['precision'][tt] = precision
	scores['ioi']['recall'][tt] = recall
	if (precision + recall) != 0:
	scores['ioi']['f1'][tt] = (2precisionrecall)/(precision + recall)
	else:
	scores['ioi']['f1'][tt] = 0
	# Pitch score =======
	ptr1 = 0
	ptr2 = 0
	true_pitches.sort()
	gen_pitches.sort()
	nr_matches = 0
	while ptr1 < true_size and ptr2 < gen_size:
	if true_pitches[ptr1] == gen_pitches[ptr2]:
	nr_matches += 1
	ptr1 += 1
	ptr2 += 1
	elif true_pitches[ptr1] < gen_pitches[ptr2]:
	ptr1 += 1
	else:
	ptr2 += 1
	precision = nr_matches / gen_size
	recall = nr_matches / true_size
	scores['pitch']['precision'][tt] = precision
	scores['pitch']['recall'][tt] = recall
	if (precision + recall) != 0:
	scores['pitch']['f1'][tt] = (2precisionrecall)/(precision + recall)
	else:
	scores['pitch']['f1'][tt] = 0
	# Combo score =======
	intersection_size = len(
	set(true_notes).intersection(set(gen_notes))
	)
	precision = intersection_size / gen_size
	recall = intersection_size / true_size
	scores['combo']['precision'][tt] = precision
	scores['combo']['recall'][tt] = recall
	if (precision + recall) != 0:
	scores['combo']['f1'][tt] = (2precisionrecall)/(precision + recall)
	else:
	scores['combo']['f1'][tt] = 0
	return scores



	def get_scores_old(true_df, gen_df, start_onset, increment=1.0, max_t=None):
	"""
	Same as get_scores, but as implemented in the competition. Has issue with
	handling NaN cases for precision, recall, and F1. See notes of get_scores
	for details.
	"""
	if max_t is None:
	# TODO: get the max_t
	max_onset = max(true_df.onset.max(), gen_df.onset.max())
	max_t = int(max_onset - start_onset)

	score_names = ['ioi', 'pitch', 'combo']
	scores = {name: {'precision': {}, 'recall': {}, 'f1': {}}
	for name in score_names}

	# clean data
	true_df_copy = true_df.copy(deep=True)
	gen_df_copy = gen_df.copy(deep=True)
	for df in [true_df_copy, gen_df_copy]:
	# These roundings are done in the original evaluation code
	# which is linked in docstring references
	df.loc[:, 'pitch'] = df.loc[:, 'pitch'].astype(int)
	df.loc[:, 'onset'] = df.loc[:, 'onset'].round(2)

	nr_steps = int(max_t / increment) # range is only for ints, not floats
	for ii in range(1, nr_steps+1):
	tt = ii*increment
	max_onset = start_onset + tt
	true_notes = [(row.onset, row.pitch) for idx, row in true_df_copy.iterrows()
	if row.onset <= max_onset]
	gen_notes = [(row.onset, row.pitch) for idx, row in gen_df_copy.iterrows()
	if row.onset <= max_onset]
	true_size = len(true_notes)
	gen_size = len(gen_notes)
	# This is one of the issues - we should handle size zero cases
	if true_size == 0 or gen_size == 0:
	for name in score_names:
	scores[name]['precision'][tt] = np.nan
	scores[name]['recall'][tt] = np.nan
	scores[name]['f1'][tt] = np.nan
	else:
	true_onsets, true_pitches = [list(x) for x in zip(*true_notes)]
	gen_onsets, gen_pitches = [list(x) for x in zip(*gen_notes)]
	# TODO: dedup polyphonic submissions? For now, assuming input is
	# monophonic
	# https://github.com/BeritJanssen/PatternsForPrediction/issues/5
	# IOI score =======
	# TODO: This needs to be adapted for polyphonic case
	intersection_size = len(
	set(true_onsets).intersection(set(gen_onsets))
	)
	precision = intersection_size / gen_size
	recall = intersection_size / true_size
	scores['ioi']['precision'][tt] = precision
	scores['ioi']['recall'][tt] = recall
	# This is one of the issues
	if (precision + recall) != 0:
	scores['ioi']['f1'][tt] = (2precisionrecall)/(precision + recall)
	else:
	scores['ioi']['f1'][tt] = np.nan
	# Pitch score =======
	ptr1 = 0
	ptr2 = 0
	true_pitches.sort()
	gen_pitches.sort()
	nr_matches = 0
	while ptr1 < true_size and ptr2 < gen_size:
	if true_pitches[ptr1] == gen_pitches[ptr2]:
	nr_matches += 1
	ptr1 += 1
	ptr2 += 1
	elif true_pitches[ptr1] < gen_pitches[ptr2]:
	ptr1 += 1
	else:
	ptr2 += 1
	precision = nr_matches / gen_size
	recall = nr_matches / true_size
	scores['pitch']['precision'][tt] = precision
	scores['pitch']['recall'][tt] = recall
	if (precision + recall) != 0:
	scores['pitch']['f1'][tt] = (2precisionrecall)/(precision + recall)
	else:
	scores['pitch']['f1'][tt] = np.nan
	# Combo score =======
	intersection_size = len(
	set(true_notes).intersection(set(gen_notes))
	)
	precision = intersection_size / gen_size
	recall = intersection_size / true_size
	scores['combo']['precision'][tt] = precision
	scores['combo']['recall'][tt] = recall
	if (precision + recall) != 0:
	scores['combo']['f1'][tt] = (2precisionrecall)/(precision + recall)
	else:
	scores['combo']['f1'][tt] = np.nan
	return scores

	if __name__ == '__main__':

	# Change to point towards a folder containing the unzipped data
	DATA_LOC = './data'
	# Change to run on different sizes
	SIZE = 'small'
	SIZE_PATH = f'{DATA_LOC}/PPDD-Jul2018_aud_mono_{SIZE}'
	DESCRIPTOR_PATH = f'{DATA_LOC}/PPDD-Jul2018_aud_mono_{SIZE}/descriptor'
	COLNAMES = ['onset', 'pitch', 'morph', 'dur', 'ch']

	def get_fn(path):
	return path.split('/')[-1].split('.')[0]

	print('Reading csv files')
	part = 'prime'
	midi_path = f'{SIZE_PATH}/{part}_midi'
	prime = {get_fn(path): pd.read_csv(path, names=COLNAMES)
	for path in tqdm(glob(f'{SIZE_PATH}/{part}_csv/*'))}
	part = 'cont_foil'
	cont_foil = {get_fn(path): pd.read_csv(path, names=COLNAMES)
	for path in tqdm(glob(f'{SIZE_PATH}/{part}_csv/*'))}
	part = 'cont_true'
	cont_true = {get_fn(path): pd.read_csv(path, names=COLNAMES)
	for path in tqdm(glob(f'{SIZE_PATH}/{part}_csv/*'))}
	fn_list = list(prime.keys())
	fn = fn_list[0]

	print('Scoring compositions with new scores')
	scores = {}
	for fn in tqdm(fn_list):
	scores[fn] = get_scores(cont_true[fn], cont_foil[fn],
	prime[fn].onset.iloc[-1],
	increment=0.5, max_t=10.0)

	print('Scoring compositions with old scores')
	old_scores = {}
	for fn in tqdm(fn_list):
	old_scores[fn] = get_scores_old(cont_true[fn], cont_foil[fn],
	prime[fn].onset.iloc[-1],
	increment=0.5, max_t=10.0)

	for score_type in ['pitch', 'ioi', 'combo']:
	for metric in ['recall', 'precision', 'f1']:
	data = {fn: scores[fn][score_type][metric] for fn in fn_list}
	df = (pd.DataFrame
	.from_dict(data, orient='index')
	.reset_index()
	.rename(columns={'index': 'fn'})
	.melt(id_vars=['fn'], var_name='t', value_name='score')
	)
	df['score_type'] = 'revised_scores'
	data2 = {fn: old_scores[fn][score_type][metric] for fn in fn_list}
	df2 = (pd.DataFrame
	.from_dict(data2, orient='index')
	.reset_index()
	.rename(columns={'index': 'fn'})
	.melt(id_vars=['fn'], var_name='t', value_name='score')
	)
	df2['score_type'] = 'published_scores'

	plt.figure()
	sns.lineplot(x='t', y='score', hue='score_type',
	data=pd.concat((df, df2), axis=0))
	plt.title(f'{score_type} score, {metric} metric')
	# plt.ylim([0, 1])
	plt.show()