Skip to content

Instantly share code, notes, and snippets.

@GarrettMooney
Created October 7, 2024 15:29
Show Gist options
  • Save GarrettMooney/055005d014f9d04e4b3a27877cde165f to your computer and use it in GitHub Desktop.
Save GarrettMooney/055005d014f9d04e4b3a27877cde165f to your computer and use it in GitHub Desktop.
Create sagemaker endpoint from local artifact and inference.py
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "boto3",
# "sagemaker",
# ]
# ///
import subprocess
import time
from time import gmtime, strftime
import boto3
import sagemaker
# Setup
client = boto3.client(service_name="sagemaker")
runtime = boto3.client(service_name="sagemaker-runtime")
boto_session = boto3.session.Session()
s3 = boto_session.resource("s3")
region = boto_session.region_name
print(region)
sagemaker_session = sagemaker.Session()
numeric = ... # NB: check this from the AWS Console under Sagemaker domain
role = "arn:aws:iam::{numeric}:role/SageMakerExecutionRole" # NB: could be named differently; check from AWS Console
prefix = ... # should change this based on the project....assumes a bucket has been made for `{prefix}-dev-etl-integration-bucket`
def main():
# Build tar file with model data + inference code
bashCommand = "tar -cvpzf model.tar.gz artifacts/model.joblib inference.py"
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()
# retrieve sklearn image
image_uri = sagemaker.image_uris.retrieve(
framework="sklearn",
region=region,
version="0.23-1",
py_version="py3",
instance_type="ml.m5.xlarge",
)
# Bucket for model artifacts
default_bucket = (
f"{prefix}-dev-etl-integration-bucket" # sagemaker_session.default_bucket()
)
print(default_bucket)
# Upload tar.gz to bucket
model_artifacts = f"s3://{default_bucket}/model.tar.gz"
response = s3.meta.client.upload_file(
"model.tar.gz", default_bucket, "model.tar.gz"
)
# Step 1: Model Creation
model_name = "sklearn-test" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print("Model name: " + model_name)
create_model_response = client.create_model(
ModelName=model_name,
Containers=[
{
"Image": image_uri,
"Mode": "SingleModel",
"ModelDataUrl": model_artifacts,
"Environment": {
"SAGEMAKER_SUBMIT_DIRECTORY": model_artifacts,
"SAGEMAKER_PROGRAM": "inference.py",
},
}
],
ExecutionRoleArn=role,
)
print("Model Arn: " + create_model_response["ModelArn"])
# Step 2: EPC Creation
sklearn_epc_name = "sklearn-epc" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
endpoint_config_response = client.create_endpoint_config(
EndpointConfigName=sklearn_epc_name,
ProductionVariants=[
{
"VariantName": "sklearnvariant",
"ModelName": model_name,
"InstanceType": "ml.c5.large",
"InitialInstanceCount": 1,
},
],
)
print(
"Endpoint Configuration Arn: " + endpoint_config_response["EndpointConfigArn"]
)
# Step 3: EP Creation
endpoint_name = "sklearn-local-ep" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
create_endpoint_response = client.create_endpoint(
EndpointName=endpoint_name,
EndpointConfigName=sklearn_epc_name,
)
print("Endpoint Arn: " + create_endpoint_response["EndpointArn"])
# Monitor creation
describe_endpoint_response = client.describe_endpoint(EndpointName=endpoint_name)
while describe_endpoint_response["EndpointStatus"] == "Creating":
describe_endpoint_response = client.describe_endpoint(
EndpointName=endpoint_name
)
print(describe_endpoint_response["EndpointStatus"])
time.sleep(15)
print(describe_endpoint_response)
return endpoint_name
if __name__ == "__main__":
endpoint = main()
with open("artifacts/endpoint.txt", "w") as f:
f.write(endpoint)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment