This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
debug_rules = [ | |
Rule.sagemaker(rule_configs.overtraining()), | |
Rule.sagemaker(rule_configs.overfit()), | |
Rule.custom(name='MyCustomRule', | |
image_uri='840043622174.dkr.ecr.us-east-2.amazonaws.com/sagemaker-debugger-rule-evaluator:latest', | |
instance_type='ml.t3.medium', | |
source='rules/my_custom_rule.py', | |
rule_to_invoke='CustomGradientRule', | |
volume_size_in_gb=30, | |
rule_parameters={"threshold": "20.0"}) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for epoch in range(10): | |
running_loss = 0.0 | |
for i, data in enumerate(trainloader, 0): | |
inputs, labels = data[0].to(device), data[1].to(device) | |
optimizer.zero_grad() | |
outputs = net(inputs) | |
loss = criterion(outputs, labels) | |
# Use hook to record tensors | |
hook.record_tensor_value(tensor_name="loss", tensor_value=loss) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import smdebug.pytorch as smd | |
net = get_network() | |
criterion = nn.CrossEntropyLoss() | |
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) | |
job_name = 'pytorch-debug-job' | |
hook = smd.Hook(out_dir=f'./smd_outputs/{job_name}', | |
save_config=smd.SaveConfig(save_interval=10), | |
include_collections=['gradients', 'biases']) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import smdebug.tensorflow as smd | |
job_name = 'tf-debug-job' | |
hook = smd.KerasHook(out_dir=f'./smd_outputs/{job_name}', | |
tensorboard_dir=f'./tb_logs/{job_name}', | |
save_config=smd.SaveConfig(save_interval=1), | |
include_collections=['gradients', 'biases']) | |
opt = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, name='SGD') | |
opt = hook.wrap_optimizer(opt) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tf_estimator = TensorFlow(entry_point = 'tf-training-script.py', | |
... | |
... | |
debugger_hook_config = debugger_hook_config) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sagemaker.debugger import Rule, DebuggerHookConfig | |
debugger_hook_config = DebuggerHookConfig( | |
hook_parameters={"save_interval": '100'}, | |
collection_configs=[ | |
CollectionConfig("losses"), | |
CollectionConfig("weights"), | |
CollectionConfig("gradients"), | |
CollectionConfig("biases")] | |
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
train_path = f's3://{bucket_name}/{dataset_folder}/cifar10-dataset/train' | |
val_path = f's3://{bucket_name}/{dataset_folder}/cifar10-dataset/validation' | |
eval_path = f's3://{bucket_name}/{dataset_folder}/cifar10-dataset/eval' | |
hvd_estimator.fit({'train': train_path,'validation': val_path,'eval': eval_path}, | |
job_name=job_name, wait=False) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sagemaker.tensorflow import TensorFlow | |
hvd_estimator = TensorFlow(entry_point = 'cifar10-tf-horovod-sagemaker.py', | |
source_dir = 'code', | |
output_path = output_path + '/', | |
code_location = output_path, | |
role = role, | |
train_instance_count = hvd_instance_count, | |
train_instance_type = hvd_instance_type, | |
train_volume_size = 50, | |
framework_version = '1.15', |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Change 8: Update script to accept hyperparameters as command line arguments | |
parser = argparse.ArgumentParser() | |
# Hyper-parameters | |
parser.add_argument('--epochs', type=int, default=15) | |
parser.add_argument('--learning-rate', type=float, default=0.001) | |
parser.add_argument('--batch-size', type=int, default=256) | |
parser.add_argument('--weight-decay', type=float, default=2e-4) | |
parser.add_argument('--momentum', type=float, default='0.9') | |
parser.add_argument('--optimizer', type=str, default='adam') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
history = model.fit(train_dataset, | |
steps_per_epoch = (NUM_TRAIN_IMAGES // batch_size) // size, | |
validation_data = val_dataset, | |
validation_steps = (NUM_VALID_IMAGES // batch_size) // size, | |
verbose = 1 if hvd.rank() == 0 else 0, | |
epochs = epochs, callbacks=callbacks) |