Created
February 17, 2022 08:27
-
-
Save fclesio/b56bb1e8aff7b79f5be3bb6fa901500b to your computer and use it in GitHub Desktop.
SageMaker Processes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sagemaker | |
from sagemaker.processing import Processor | |
from sagemaker.network import NetworkConfig | |
from sagemaker.session import Session | |
from time import strftime, gmtime | |
sagemaker_session = Session() | |
iam_role = 'arn:aws:iam::xxxxx:role/xxxxx-production' | |
container_uri = 'xxxxx.dkr.ecr.us-east-1.amazonaws.com/xxxxx:latest' | |
instance_count = 1 | |
instance_type = 'ml.m5.12xlarge' | |
volume_size_in_gb = 1024 | |
entrypoint = ['python', 'main.py'] | |
enable_network_isolation = False | |
base_job_name \ | |
= 'xxxxxx' + strftime("%Y%m%d%H%M%S", gmtime()) | |
processing_job_name \ | |
= base_job_name + 'Processing' | |
network_config \ | |
= NetworkConfig( | |
security_group_ids=["sg-xxxxxxxx", "sg-xxxxxxxx", "sg-xxxxxxxx"], | |
subnets=["subnet-xxxxxxxx", "subnet-xxxxxxxx", | |
"subnet-xxxxxxxx", "subnet-xxxxxxxx", | |
"subnet-xxxxxxxx", "subnet-xxxxxxxx"], | |
) | |
env_var_dict = { | |
"DB_REDSHIFT_USER": f"{os.environ.get('DB_REDSHIFT_USER')}", | |
"DB_REDSHIFT_HOST": f"{os.environ.get('DB_REDSHIFT_HOST')}", | |
"DB_REDSHIFT_PASS": f"{os.environ.get('DB_REDSHIFT_PASS')}", | |
"DB_REDSHIFT_PORT": f"{os.environ.get('DB_REDSHIFT_PORT')}", | |
"DB_REDSHIFT_NAME": f"{os.environ.get('DB_REDSHIFT_NAME')}", | |
"AWS_SECRET_ACCESS_KEY": f"{os.environ.get('AWS_SECRET_ACCESS_KEY')}", | |
"AWS_ACCESS_KEY_ID": f"{os.environ.get('AWS_ACCESS_KEY_ID')}", | |
"AWS_DEFAULT_REGION": f"{os.environ.get('AWS_DEFAULT_REGION')}", | |
} | |
processor = Processor( | |
role=iam_role, | |
entrypoint=entrypoint, | |
image_uri=container_uri, | |
instance_count=instance_count, | |
instance_type=instance_type, | |
base_job_name=base_job_name, | |
volume_size_in_gb=volume_size_in_gb, | |
network_config=network_config, | |
sagemaker_session=sagemaker_session, | |
env=env_var_dict | |
) | |
processor.run( | |
inputs=None, | |
outputs=None, | |
wait=True, | |
logs=True, | |
job_name=processing_job_name | |
) | |
job_result \ | |
= sagemaker_session.wait_for_processing_job(processing_job_name) | |
print(job_result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment