Skip to content

Instantly share code, notes, and snippets.

@helton
Created August 14, 2024 05:05
Show Gist options
  • Save helton/52028432618dac03637403957d9b7070 to your computer and use it in GitHub Desktop.
Save helton/52028432618dac03637403957d9b7070 to your computer and use it in GitHub Desktop.
AWS Bedrock via Portkey inside EKS
# via aws cli
kubectl run aws-cli-pod- --rm -i --tty --image=amazon/aws-cli:2.13.9 --restart=Never --generate-name -- aws sts get-caller-identity
# via boto3
kubectl run aws-credentials-pod- --rm -i --tty --image=python:3.12-slim --restart=Never --generate-name -- bash -c "pip install boto3 && python3 -c \"import boto3; session = boto3.Session(); credentials = session.get_credentials(); print(f'AWS_ACCESS_KEY_ID: {credentials.access_key}'); print(f'AWS_SECRET_ACCESS_KEY: {credentials.secret_key}'); print(f'AWS_SESSION_TOKEN: {credentials.token}')\""
# Notes
# - default session TTL = 1h
import json
import logging
import os
import boto3
from botocore.exceptions import NoCredentialsError, ClientError
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
aws_profile = os.getenv("AWS_PROFILE")
aws_region = os.getenv("AWS_REGION")
logger.info(f"Using AWS profile '{aws_profile}' and region '{aws_region}'")
def generate_embeddings(model_id, body):
"""
Generate a vector of embeddings for a text input using Amazon Titan Multimodal Embeddings G1 on demand.
Args:
model_id (str): The model ID to use.
body (str) : The request body to use.
Returns:
response (JSON): The embeddings that the model generated, token information, and the
reason the model stopped generating embeddings.
"""
logger.info("Generating embeddings with Amazon Titan Multimodal Embeddings G1 model %s", model_id)
bedrock = boto3.client(service_name='bedrock-runtime', region_name=aws_region)
accept = "application/json"
content_type = "application/json"
response = bedrock.invoke_model(
body=body, modelId=model_id, accept=accept, contentType=content_type
)
response_body = json.loads(response.get('body').read())
finish_reason = response_body.get("message")
if finish_reason is not None:
raise Exception(f"Embeddings generation error: {finish_reason}")
return response_body
def generate_message(bedrock_runtime, model_id, system_prompt, messages, max_tokens):
body=json.dumps(
{
"anthropic_version": "bedrock-2023-05-31",
"max_tokens": max_tokens,
"system": system_prompt,
"messages": messages
}
)
response = bedrock_runtime.invoke_model(body=body, modelId=model_id)
response_body = json.loads(response.get('body').read())
return response_body
def test_embedding_model():
model_id = "amazon.titan-embed-image-v1"
input_text = "What are the different services that you offer?"
output_embedding_length = 256
body = json.dumps({
"inputText": input_text,
"embeddingConfig": {
"outputEmbeddingLength": output_embedding_length
}
})
try:
response = generate_embeddings(model_id, body)
print(f"Generated text embeddings of length {output_embedding_length}: {response['embedding']}")
print(f"Input text token count: {response['inputTextTokenCount']}")
except ClientError as err:
message = err.response["Error"]["Message"]
logger.error("A client error occurred: %s", message)
print("A client error occured: " + format(message))
except Exception as err:
logger.error(err)
print(err)
else:
print(f"Finished generating text embeddings with Amazon Titan Multimodal Embeddings G1 model {model_id}.")
def test_text_model():
try:
bedrock_runtime = boto3.client(service_name='bedrock-runtime', region_name=aws_region)
model_id = 'anthropic.claude-3-5-sonnet-20240620-v1:0'
system_prompt = "Please respond only with emoji."
max_tokens = 1000
# Prompt with user turn only.
user_message = {"role": "user", "content": "Hello World"}
messages = [user_message]
response = generate_message (bedrock_runtime, model_id, system_prompt, messages, max_tokens)
print("User turn only.")
print(json.dumps(response, indent=4))
# Prompt with both user turn and prefilled assistant response.
# Anthropic Claude continues by using the prefilled assistant text.
assistant_message = {"role": "assistant", "content": "<emoji>"}
messages = [user_message, assistant_message]
response = generate_message(bedrock_runtime, model_id,system_prompt, messages, max_tokens)
print("User turn and prefilled assistant response.")
print(json.dumps(response, indent=4))
except ClientError as err:
message=err.response["Error"]["Message"]
logger.error("A client error occurred: %s", message)
print("A client error occured: " + format(message))
except Exception as err:
logger.error(err)
print(err)
if __name__ == '__main__':
boto3.setup_default_session(profile_name=aws_profile)
test_embedding_model()
test_text_model()
import logging
import os
from dotenv import load_dotenv
from portkey_ai import Portkey
from botocore.session import Session
load_dotenv()
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
aws_profile = os.getenv("AWS_PROFILE")
aws_region = os.getenv("AWS_REGION")
logger.info(f"Using AWS profile '{aws_profile}' and region '{aws_region}'")
if __name__ == '__main__':
session = Session(profile=aws_profile)
credentials = session.get_credentials().get_frozen_credentials()
client = Portkey(
base_url="http://localhost:8787/v1",
api_key="test",
provider="bedrock",
aws_region=aws_region,
aws_access_key_id=credentials.access_key,
aws_secret_access_key=credentials.secret_key,
aws_session_token=credentials.token
)
# CHAT
result = client.chat.completions.create(
messages= [{ "role": 'user', "content": 'Say this is a test' }],
model= 'anthropic.claude-3-5-sonnet-20240620-v1:0',
max_tokens=250 # Required field for Anthropic
)
print(result)
# EMBEDDING
result = client.embeddings.create(model='amazon.titan-embed-image-v1',
input='Name the tallest buildings in Hawaii');
print(result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment