Skip to content

Instantly share code, notes, and snippets.

@fsndzomga
Created February 10, 2024 19:51
Show Gist options
  • Save fsndzomga/5be8050c80f03adea0e56c139ae6e34e to your computer and use it in GitHub Desktop.
Save fsndzomga/5be8050c80f03adea0e56c139ae6e34e to your computer and use it in GitHub Desktop.
import json
import boto3
newline, bold, unbold = "\n", "\033[1m", "\033[0m"
endpoint_name = "name_of_your_endpoint"
def query_endpoint(payload):
client = boto3.client("runtime.sagemaker", region_name="us-east-1")
response = client.invoke_endpoint(
EndpointName=endpoint_name, InferenceComponentName='huggingface-llm-mistral-7b-20240210-171055', ContentType="application/json", Body=json.dumps(payload).encode("utf-8")
)
model_predictions = json.loads(response["Body"].read())
generated_text = model_predictions[0]["generated_text"]
print(f"Input Text: {payload['inputs']}{newline}" f"Generated Text: {bold}{generated_text}{unbold}{newline}")
# Code generation
payload = {
"inputs": "Write a program to compute factorial in javascript:",
"parameters": {
"max_new_tokens": 200,
},
}
query_endpoint(payload)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment