Skip to content

Instantly share code, notes, and snippets.

View sofianhamiti's full-sized avatar

Sofian Hamiti sofianhamiti

View GitHub Profile
import boto3
class LambdaFunction:
def __init__(self, name, container, model_s3_uri, memory, role, region):
self.name = name
self.container = container
self.model_s3_uri = model_s3_uri
self.memory = memory
self.role = role
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import os
import json
import logging
import argparse
import invokust
from load_test.api_user import ApiUser
from stack.api_gateway import ApiGateway
from stack.lambda_function import LambdaFunction
logging.basicConfig(level=logging.INFO)
try:
logging.info('LOAD TESTING THE API')
stats = run_load_test(host=api.api_endpoint)
# get response time percentiles
response_time_percentile = stats['requests']['POST_/']['response_time_percentiles'][95]
logging.info(f'REPONSE TIME PERCENTILES: {response_time_percentile}')
logging.info(f'LAMBDA MEMORY: {args.lambda_memory}')
# we create this aggregate score to optimize both latency and lambda memory allocation (cost)
version: 0.2
phases:
pre_build:
commands:
- IMAGE_TAG=$(echo $CODEBUILD_RESOLVED_SOURCE_VERSION | cut -c 1-8)
- sh build_and_push.sh ${IMAGE_NAME} ${IMAGE_TAG}
- echo "IMAGE PUSHED TO ECR"
build:
{
"DomainId": "<domain-id>",
"DefaultUserSettings": {
"KernelGatewayAppSettings": {
"CustomImages": [
{
"ImageName": "tf25",
"AppImageConfigName": "tf25-config"
}
]
FROM nvcr.io/nvidia/tritonserver:21.05-py3
# INSTALL AWS CLI
RUN apt-get update && apt-get install -y \
unzip \
&& rm -rf /var/lib/apt/lists/*
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" \
&& unzip awscliv2.zip \
&& ./aws/install \
from aws_cdk import core
from stacks.iam_stack import IamStack
from stacks.vpc_stack import VpcStack
from stacks.ecs_stack import EcsStack
class InferenceStack(core.Stack):
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
super().__init__(scope, id, **kwargs)
#!/bin/bash
## GET MODELS FROM S3 INTO THE /TMP FOLDER
aws s3 sync ${MODEL_REPOSITORY} /tmp/model_repository
# RUN TRITON
/opt/tritonserver/bin/tritonserver --model-repository=/tmp/model_repository
{
"Action": [
"sagemaker:CreateApp"
],
"Resource": [
"*"
],
"Effect": "Deny",
"Sid": "BlockSagemakerOtherThanT3",
"Condition": {