jeanmidevacc · January 24, 2020 03:07
diff --git a/metaflow_client.py b/metaflow_client.py
 informations = []
 for i,run in enumerate(runs):
    
    if run.successful:
        # collect some details on the fisrt and last step of the flow
        step_start = Step(f"{flowname}/{run.id}/start")
        step_end = run.end_task
        
        # Collect the number of cards picked for the features computation
        nbr_cardsselected = step_start.task.data.limittopcards
        
        # Collect general informations on the flow (startdate, enddate, execution time)
        startdate = datetime.strptime(step_start.created_at[:-4], "%Y-%m-%dT%H:%M:%S")
        enddate = datetime.strptime(step_end.finished_at[:-4], "%Y-%m-%dT%H:%M:%S")
        timeexecution = (enddate - startdate).total_seconds()

        # Navigate on the variable produced by the flow
        # Collect the first sample of the training set
        step_segment_decks = Step(f"{flowname}/{run.id}/segment_decks")
        sample_details = step_segment_decks.task.data.df_decks_totrain.iloc[0][["deckid","deckname","archetype"]].values

        # Collect the accuracy and the parameters of the best model
        step = Step(f"{flowname}/{run.id}/select_and_score")
        accuracy = step.task.data.accuracy
        parameters = step.task.data.parameters

        # Print some stuff sometime
        if i%10 == 0:
            print(f"Run:{run.id}")
            print(f"Started at {step_start.created_at[:-4]}")
            print(f"Run for {timeexecution} seconds")
            print("Number of cards selected :", nbr_cardsselected)
            print("First sample of the training set", sample_details)
            print(f"The best RF with {parameters} haa an accuracy of {round(accuracy,2)}")
                                                             
        # Store the details on the run
        information = [run.id, startdate, enddate, timeexecution, nbr_cardsselected, str(sample_details), parameters, accuracy]
        informations.append(information)
    
 # Wrap up the informations collected on the runs
 df_rundetails = pd.DataFrame(informations, columns = ["runid","startdate","enddate","timeexecution","nbr_cards","firstsample_training","parametersRF","accuracy"])
 df_allpredictions = pd.concat(allpredictions, axis = 1)
	informations = []
	for i,run in enumerate(runs):

	if run.successful:
	# collect some details on the fisrt and last step of the flow
	step_start = Step(f"{flowname}/{run.id}/start")
	step_end = run.end_task

	# Collect the number of cards picked for the features computation
	nbr_cardsselected = step_start.task.data.limittopcards

	# Collect general informations on the flow (startdate, enddate, execution time)
	startdate = datetime.strptime(step_start.created_at[:-4], "%Y-%m-%dT%H:%M:%S")
	enddate = datetime.strptime(step_end.finished_at[:-4], "%Y-%m-%dT%H:%M:%S")
	timeexecution = (enddate - startdate).total_seconds()

	# Navigate on the variable produced by the flow
	# Collect the first sample of the training set
	step_segment_decks = Step(f"{flowname}/{run.id}/segment_decks")
	sample_details = step_segment_decks.task.data.df_decks_totrain.iloc[0][["deckid","deckname","archetype"]].values

	# Collect the accuracy and the parameters of the best model
	step = Step(f"{flowname}/{run.id}/select_and_score")
	accuracy = step.task.data.accuracy
	parameters = step.task.data.parameters

	# Print some stuff sometime
	if i%10 == 0:
	print(f"Run:{run.id}")
	print(f"Started at {step_start.created_at[:-4]}")
	print(f"Run for {timeexecution} seconds")
	print("Number of cards selected :", nbr_cardsselected)
	print("First sample of the training set", sample_details)
	print(f"The best RF with {parameters} haa an accuracy of {round(accuracy,2)}")

	# Store the details on the run
	information = [run.id, startdate, enddate, timeexecution, nbr_cardsselected, str(sample_details), parameters, accuracy]
	informations.append(information)

	# Wrap up the informations collected on the runs
	df_rundetails = pd.DataFrame(informations, columns = ["runid","startdate","enddate","timeexecution","nbr_cards","firstsample_training","parametersRF","accuracy"])
	df_allpredictions = pd.concat(allpredictions, axis = 1)