Skip to content

Instantly share code, notes, and snippets.

@marcossilva
Created August 6, 2020 17:29
Show Gist options
  • Save marcossilva/99fd1db9cbea2dd542230c6e7c4fd6a2 to your computer and use it in GitHub Desktop.
Save marcossilva/99fd1db9cbea2dd542230c6e7c4fd6a2 to your computer and use it in GitHub Desktop.
import boto3
import sagemaker
from sagemaker.amazon.amazon_estimator import get_image_uri
import json
import glob
sess = sagemaker.Session()
bucket = 'workspace-foguetes'
dataset_name = 'VOC2007'
train_data = f's3://{bucket}/datasets/{dataset_name}/train_images'
validation_data = f's3://{bucket}/datasets/{dataset_name}/val_images'
train_annotation = f's3://{bucket}/datasets/{dataset_name}/train_labels'
validation_annotation = f's3://{bucket}/datasets/{dataset_name}/val_labels'
output_location = f's3://{bucket}/datasets/{dataset_name}/output'
checkpoint_location = f's3://{bucket}/datasets/{dataset_name}/model_checkpoints'
train_data_len = len(glob.glob(f'./data/{dataset_name}/train_images/*'))
val_data_len = len(glob.glob(f'./data/{dataset_name}/val_images/*'))
annotations_dict = json.loads(open('classes.json').read())
print(f'{train_data_len} training samples')
print(f'{val_data_len} validation samples')
training_image = get_image_uri(sess.boto_region_name, 'object-detection', repo_version="latest")
od_model = sagemaker.estimator.Estimator(
training_image,
'arn:aws:iam::493093903190:role/service-role/AmazonSageMaker-ExecutionRole-20190415T182908',
train_instance_count=1,
train_instance_type='ml.p2.xlarge',
train_volume_size = 30,
train_max_run = 18000,
input_mode = 'File',
output_path=output_location,
sagemaker_session=sess,
base_job_name='treino-adam-eva-14k',
model_uri=None,
train_use_spot_instances=True,
train_max_wait=18000,
checkpoint_s3_uri=checkpoint_location,
enable_sagemaker_metrics=True
)
od_model.set_hyperparameters(
base_network='vgg-16',
use_pretrained_model=1,
num_classes=len(annotation_dict),
mini_batch_size=8,
epochs=200,
learning_rate=0.001,
lr_scheduler_step='10',
lr_scheduler_factor=0.1,
optimizer='adam',
momentum=0.9,
weight_decay=0.0005,
overlap_threshold=0.5,
nms_threshold=0.45,
image_shape=512,
label_width=12,
num_training_samples=train_data_len,
early_stopping=True,
early_stopping_min_epochs=30,
early_stopping_patience=30,
freeze_layer_pattern="^(conv1_|conv2_|conv3_|conv4_).*"
)
def initialize_channes(x):
return sagemaker.session.s3_input(
x,
distribution='FullyReplicated',
content_type='image/png',
s3_data_type='S3Prefix'
)
data_channels = {
'train': initialize_channes(train_data),
'validation': initialize_channes(validation_data),
'train_annotation': initialize_channes(train_annotation),
'validation_annotation':initialize_channes(validation_annotation)
}
od_model.fit(inputs=data_channels)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment