GarrettMooney · October 7, 2024 15:29
diff --git a/create_sagemaker_endpoint.py b/create_sagemaker_endpoint.py
 # /// script
 # requires-python = ">=3.12"
 # dependencies = [
 #     "boto3",
 #     "sagemaker",
 # ]
 # ///
 import subprocess
 import time
 from time import gmtime, strftime

 import boto3
 import sagemaker

 # Setup
 client = boto3.client(service_name="sagemaker")
 runtime = boto3.client(service_name="sagemaker-runtime")
 boto_session = boto3.session.Session()
 s3 = boto_session.resource("s3")
 region = boto_session.region_name
 print(region)
 sagemaker_session = sagemaker.Session()
 numeric = ...  # NB: check this from the AWS Console under Sagemaker domain
 role = "arn:aws:iam::{numeric}:role/SageMakerExecutionRole"  # NB: could be named differently; check from AWS Console
 prefix = ...  # should change this based on the project....assumes a bucket has been made for `{prefix}-dev-etl-integration-bucket`


 def main():
    # Build tar file with model data + inference code
    bashCommand = "tar -cvpzf model.tar.gz artifacts/model.joblib inference.py"
    process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
    output, error = process.communicate()

    # retrieve sklearn image
    image_uri = sagemaker.image_uris.retrieve(
        framework="sklearn",
        region=region,
        version="0.23-1",
        py_version="py3",
        instance_type="ml.m5.xlarge",
    )

    # Bucket for model artifacts
    default_bucket = (
        f"{prefix}-dev-etl-integration-bucket"  # sagemaker_session.default_bucket()
    )
    print(default_bucket)

    # Upload tar.gz to bucket
    model_artifacts = f"s3://{default_bucket}/model.tar.gz"
    response = s3.meta.client.upload_file(
        "model.tar.gz", default_bucket, "model.tar.gz"
    )

    # Step 1: Model Creation
    model_name = "sklearn-test" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
    print("Model name: " + model_name)
    create_model_response = client.create_model(
        ModelName=model_name,
        Containers=[
            {
                "Image": image_uri,
                "Mode": "SingleModel",
                "ModelDataUrl": model_artifacts,
                "Environment": {
                    "SAGEMAKER_SUBMIT_DIRECTORY": model_artifacts,
                    "SAGEMAKER_PROGRAM": "inference.py",
                },
            }
        ],
        ExecutionRoleArn=role,
    )
    print("Model Arn: " + create_model_response["ModelArn"])

    # Step 2: EPC Creation
    sklearn_epc_name = "sklearn-epc" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
    endpoint_config_response = client.create_endpoint_config(
        EndpointConfigName=sklearn_epc_name,
        ProductionVariants=[
            {
                "VariantName": "sklearnvariant",
                "ModelName": model_name,
                "InstanceType": "ml.c5.large",
                "InitialInstanceCount": 1,
            },
        ],
    )
    print(
        "Endpoint Configuration Arn: " + endpoint_config_response["EndpointConfigArn"]
    )

    # Step 3: EP Creation
    endpoint_name = "sklearn-local-ep" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
    create_endpoint_response = client.create_endpoint(
        EndpointName=endpoint_name,
        EndpointConfigName=sklearn_epc_name,
    )
    print("Endpoint Arn: " + create_endpoint_response["EndpointArn"])

    # Monitor creation
    describe_endpoint_response = client.describe_endpoint(EndpointName=endpoint_name)
    while describe_endpoint_response["EndpointStatus"] == "Creating":
        describe_endpoint_response = client.describe_endpoint(
            EndpointName=endpoint_name
        )
        print(describe_endpoint_response["EndpointStatus"])
        time.sleep(15)
    print(describe_endpoint_response)

    return endpoint_name


 if __name__ == "__main__":
    endpoint = main()
    with open("artifacts/endpoint.txt", "w") as f:
        f.write(endpoint)
	# /// script
	# requires-python = ">=3.12"
	# dependencies = [
	# "boto3",
	# "sagemaker",
	# ]
	# ///
	import subprocess
	import time
	from time import gmtime, strftime

	import boto3
	import sagemaker

	# Setup
	client = boto3.client(service_name="sagemaker")
	runtime = boto3.client(service_name="sagemaker-runtime")
	boto_session = boto3.session.Session()
	s3 = boto_session.resource("s3")
	region = boto_session.region_name
	print(region)
	sagemaker_session = sagemaker.Session()
	numeric = ... # NB: check this from the AWS Console under Sagemaker domain
	role = "arn:aws:iam::{numeric}:role/SageMakerExecutionRole" # NB: could be named differently; check from AWS Console
	prefix = ... # should change this based on the project....assumes a bucket has been made for `{prefix}-dev-etl-integration-bucket`


	def main():
	# Build tar file with model data + inference code
	bashCommand = "tar -cvpzf model.tar.gz artifacts/model.joblib inference.py"
	process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
	output, error = process.communicate()

	# retrieve sklearn image
	image_uri = sagemaker.image_uris.retrieve(
	framework="sklearn",
	region=region,
	version="0.23-1",
	py_version="py3",
	instance_type="ml.m5.xlarge",
	)

	# Bucket for model artifacts
	default_bucket = (
	f"{prefix}-dev-etl-integration-bucket" # sagemaker_session.default_bucket()
	)
	print(default_bucket)

	# Upload tar.gz to bucket
	model_artifacts = f"s3://{default_bucket}/model.tar.gz"
	response = s3.meta.client.upload_file(
	"model.tar.gz", default_bucket, "model.tar.gz"
	)

	# Step 1: Model Creation
	model_name = "sklearn-test" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
	print("Model name: " + model_name)
	create_model_response = client.create_model(
	ModelName=model_name,
	Containers=[
	{
	"Image": image_uri,
	"Mode": "SingleModel",
	"ModelDataUrl": model_artifacts,
	"Environment": {
	"SAGEMAKER_SUBMIT_DIRECTORY": model_artifacts,
	"SAGEMAKER_PROGRAM": "inference.py",
	},
	}
	],
	ExecutionRoleArn=role,
	)
	print("Model Arn: " + create_model_response["ModelArn"])

	# Step 2: EPC Creation
	sklearn_epc_name = "sklearn-epc" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
	endpoint_config_response = client.create_endpoint_config(
	EndpointConfigName=sklearn_epc_name,
	ProductionVariants=[
	{
	"VariantName": "sklearnvariant",
	"ModelName": model_name,
	"InstanceType": "ml.c5.large",
	"InitialInstanceCount": 1,
	},
	],
	)
	print(
	"Endpoint Configuration Arn: " + endpoint_config_response["EndpointConfigArn"]
	)

	# Step 3: EP Creation
	endpoint_name = "sklearn-local-ep" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
	create_endpoint_response = client.create_endpoint(
	EndpointName=endpoint_name,
	EndpointConfigName=sklearn_epc_name,
	)
	print("Endpoint Arn: " + create_endpoint_response["EndpointArn"])

	# Monitor creation
	describe_endpoint_response = client.describe_endpoint(EndpointName=endpoint_name)
	while describe_endpoint_response["EndpointStatus"] == "Creating":
	describe_endpoint_response = client.describe_endpoint(
	EndpointName=endpoint_name
	)
	print(describe_endpoint_response["EndpointStatus"])
	time.sleep(15)
	print(describe_endpoint_response)

	return endpoint_name


	if __name__ == "__main__":
	endpoint = main()
	with open("artifacts/endpoint.txt", "w") as f:
	f.write(endpoint)