Skip to content

Instantly share code, notes, and snippets.

@sourangshupal
Created May 10, 2025 13:47
Show Gist options
  • Save sourangshupal/54e8702ddcba446bac603dd8908e6ce1 to your computer and use it in GitHub Desktop.
Save sourangshupal/54e8702ddcba446bac603dd8908e6ce1 to your computer and use it in GitHub Desktop.
AWS Sagemaker AI
import json
import boto3
ENDPOINT = "huggingface-pytorch-tgi-inference-"
sagemaker_runtime = boto3.client("sagemaker-runtime", region_name='us-east-1')
def lambda_handler(event, context):
query_params = event['queryStringParameters']
query = query_params['query']
payload = {
"inputs": query,
"parameters": {
"max_new_tokens": 256,
"top_p": 0.9,
"temperature": 0.6,
"top_k": 50,
"repetion_penalty" : 1.03,
"do_sample" : True
}
}
response = sagemaker_runtime.invoke_endpoint(EndpointName=ENDPOINT,
ContentType="application/json",
Body=json.dumps(payload))
predictions = json.loads(response['Body'].read().decode('utf-8'))
final_result = predictions[0]['generated_text']
return {
'statusCode': 200,
'body': json.dumps(final_result)
}
##### LAMBDA API TEST ######
{
"httpMethod" : "GET",
"path" : "/example",
"queryStringParameters" : {
"query" : "Write an article on Computer Vision"
}
}
#### BROWSER/POSTMAN API TEST ###
### Example URL - > www.example_function_url.com/?query=What iS Deep Learning
#### Delete Every AWS Resource Created #####
#Delete Notebook Instance -> Model -> Model Endpoints
# Sharing Python File , Please Convert it into a Notebook
!pip install transformers einops accelerate bitsandbytes
from transformers import pipeline
import torch
import base64
checkpoint = "MBZUAI/LaMini-T5-738M"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
base_model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
!pip install langchain langchain-community langchain-huggingface
from langchain_huggingface import HuggingFacePipeline
def slm_pipeline():
pipe = pipeline(
"text2text-generation",
model = base_model,
tokenizer = tokenizer,w3rrf3
max_length = 256,
do_sample = True,
temperature = 0.3,
top_p = 0.95
)
local_slm = HuggingFacePipeline(pipeline = pipe)
return local_slm
input_prompt = "Write an article about Blockchain and its benefits"
model = slm_pipeline()
gen_text = model.invoke(input_prompt)
gen_text
import json
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri
try:
role = sagemaker.get_execution_role()
except ValueError:
iam = boto3.client('iam')
role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']
# Hub Model configuration. https://huggingface.co/models
hub = {
'HF_MODEL_ID':'MBZUAI/LaMini-T5-738M',
'HF_TASK' : 'text2text-generation',
'device_map' : 'auto',
'torch_dtype' : 'torch.float32'
}
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
image_uri=get_huggingface_llm_image_uri("huggingface",version="3.2.3"),
env=hub,
role=role,
)
# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
initial_instance_count=1,
instance_type="ml.g5.xlarge",
container_startup_health_check_timeout=300,
)
# send request
predictor.predict({
"inputs": "Write an article about Cyber Security",
})
ENDOINT = "huggingface-pytorch-tgi-inference-XXXXXXX"
import boto3
sagemaker_runtime = boto3.client("sagemaker-runtime", region_name='us-east-1')
endpoint_name = ENDPOINT
# API Payload
prompt = "Write an article on Deep learning"
payload = {
'inputs' : prompt ,
'parameters' : {
'max_new_tokens' : 256,
'do_sample' : True,
'temperature' : 0.3,
'top_p' : 0.7,
w,,'elsrd ,bb.gv,3rrf 'top_k' : 50,
'repetion_penalty' : 1.03
}
}
response = sagemaker_runtime.invoke_endpoint(
EndpointName = endpoint_name,
ContentType = "application/json",
Body = json.dumps(payload)
)
predictions = json.loads(response['Body'].read().decode('utf-8'))
final_result =predictions[0]['generated_text']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment