Last active
September 9, 2023 16:40
-
-
Save d13g0/76f3d092313d261308528297da21bbba to your computer and use it in GitHub Desktop.
Monitoring HPO in ClearML
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## | |
# project: ClearML Tutorial | |
# date: Sep 2nd 2023 | |
import os | |
import logging | |
import time | |
from datetime import datetime | |
import hydra | |
from omegaconf import DictConfig | |
from clearml import Task | |
from clearml.automation import DiscreteParameterRange | |
from clearml.automation import GridSearch, HyperParameterOptimizer | |
from utils import get_semantic_id | |
@hydra.main(version_base=None, config_path='conf', config_name='config-lesson-3') | |
def run_optimization(cfg: DictConfig): | |
"""Runs an optimization task""" | |
# Initialize ClearML logging | |
task_name = f'HPO-task-{get_semantic_id(2)}' #e.g. HPO-task-valerie-eelgrass | |
print(f'Experiment Name: {task_name}') | |
# This is the task we want to optimize | |
# replace this variable with the ID of your experiment | |
base_task_id = '' #add task id here | |
queue = '' #select queue here | |
# create the HPO task | |
hpo_task = Task.init(project_name=cfg.project_name, | |
task_name=task_name, | |
tags=['HPO','Hydra']) | |
# creating an optimizer | |
optimizer = HyperParameterOptimizer( | |
base_task_id=base_task_id, | |
hyper_parameters=[ | |
DiscreteParameterRange('Hydra/trainer.batch_size', values=[8,16,32]), | |
], | |
objective_metric_title='loss', | |
objective_metric_series='test', | |
objective_metric_sign='min', | |
max_number_of_concurrent_tasks=5, | |
optimizer_class=GridSearch, | |
execution_queue=queue, | |
optimization_time_limit=10, | |
time_limit_per_job=3, | |
pool_period_min=0.5) | |
# This will start the optimizer | |
optimizer.start() | |
# This will start the optimizer locally | |
#optimizer.start_locally() | |
# Check every 10 seconds ... | |
while not optimizer.reached_time_limit(): | |
top_exp = optimizer.get_top_experiments(top_k=2) | |
tim = time.time() | |
print(f'Active experiments: {optimizer.get_num_active_experiments()}') | |
print('') | |
print(f'Start time:{datetime.fromtimestamp(optimizer.optimization_start_time)}') | |
print(f'Time limit: {optimizer.get_time_limit()}') | |
print('') | |
print(f'Current time: {datetime.fromtimestamp(tim)}') | |
print(f'Reached time limit: {optimizer.reached_time_limit()}') | |
print(f'Elapsed (min): {optimizer.elapsed()}') | |
print('---') | |
time.sleep(10) | |
# wait until timed-out... this doesn't seem to check whether or not optimization is completed 🤨 | |
optimizer.wait() | |
# stop jobs | |
optimizer.stop() | |
if __name__=='__main__': | |
os.environ['HYDRA_FULL_ERROR'] = '1' | |
run_optimization() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment