Last active
February 11, 2021 16:29
-
-
Save dataders/afa78a32a327e6e562d1821313819cc2 to your computer and use it in GitHub Desktop.
fetching best model from HyperDriveStep
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import os | |
from pprint import pprint | |
import pandas as pd | |
from azureml.core import Run | |
def df2csv(df, dir, filename, **kwargs): | |
path = os.path.join(dir, filename) | |
print("saving {} to {}".format(filename, dir)) | |
df.to_csv(path, index=False, **kwargs) | |
return df | |
def main(input_json): | |
df = (input_json | |
# convert columns to floats from single-item lists | |
.transform(lambda x: x.apply(lambda y: y[0])) | |
.reset_index() | |
.rename(columns={ | |
"index": "run_id", | |
"geometric mean": "geometric_mean"}) | |
) | |
print(df.head()) | |
best_run = df.query('geometric_mean == geometric_mean.max()') | |
best_run_id = best_run['run_id'].values[0] | |
top3_runs = df.nlargest(3, 'geometric_mean')['geometric_mean'].mean() | |
return df, best_run, best_run_id, top3_runs | |
if __name__ == "__main__": | |
# parameters | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--input_file', dest="input_file", | |
default="outputs/hyperdrive_json") | |
parser.add_argument('--output_dir', dest="output_dir", | |
default="outputs") | |
parser.add_argument('--exp_name', dest="exp_name", | |
default="ret-reproduceerrormsgs") | |
args = parser.parse_args() | |
print("all args: ") | |
pprint(args) | |
# INPUTS | |
cwd = os.getcwd() | |
print("cwd:", cwd) | |
print("dir of cwd", os.listdir(cwd)) | |
parent = os.path.dirname(args.input_file) | |
print("input_dir_parent:", parent) | |
print("dir of input_dir_parent:", os.listdir(parent)) | |
print("input file:", args.input_file) | |
input_json = pd.read_json(args.input_file, orient='index') | |
# MAIN | |
df, best_run, best_run_id, top3_runs = main(input_json) | |
# OUTPUTS | |
os.makedirs(args.output_dir, exist_ok=True) | |
df2csv(df, args.output_dir, "hyperdrive_metrics.csv") | |
df2csv(df, "./outputs", "hyperdrive_metrics.csv") | |
run = Run.get_context() | |
run.log("run_id", best_run_id) | |
run.log("geometric_mean", best_run['geometric_mean'].values[0]) | |
run.log("f1", best_run['f1'].values[0]) | |
run.log("geo_mean (top 3 avg)", top3_runs) | |
# download model of best run | |
exp = run.experiment | |
run_best = Run(experiment=exp, run_id=best_run_id) | |
run_best.download_file('outputs/Attrition.pkl', output_file_path="./outputs") | |
run_best.download_file('outputs/Attrition.pkl', output_file_path=args.output_dir) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
split_data = PipelineData('split_data', datastore=ds_pipeline) | |
hyperdrive_json = PipelineData('hyperdrive_json', is_directory=False, datastore=ds_pipeline) | |
best_run_data = PipelineData('best_run_data', is_directory=True, datastore=ds_pipeline) | |
hyperdrive_step = HyperDriveStep( | |
name='kickoff hyperdrive jobs', | |
hyperdrive_config=hyperdrive_run_config, | |
estimator_entry_script_arguments=["--input_dir", split_data], | |
inputs=[split_data], | |
metrics_output=hyperdrive_json, | |
allow_reuse=pipeline_reuse | |
) | |
best_run_step = PythonScriptStep( | |
name='get best run', | |
script_name='get_metrics.py', | |
arguments=['--input_file', hyperdrive_json, | |
'--output_dir', best_run_data, | |
'--exp_name', project_name | |
], | |
compute_target=compute_target, | |
inputs=[hyperdrive_json], | |
outputs=[best_run_data], | |
runconfig=run_config, | |
source_directory=os.path.join(os.getcwd(), 'compute', 'metrics'), | |
allow_reuse=pipeline_reuse | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment