Skip to content

Instantly share code, notes, and snippets.

@cedricvidal
Last active March 7, 2024 21:56
Show Gist options
  • Save cedricvidal/d1598e3680cfa93dafa08669b98465f4 to your computer and use it in GitHub Desktop.
Save cedricvidal/d1598e3680cfa93dafa08669b98465f4 to your computer and use it in GitHub Desktop.
Consume Azure AI Pay As You Go (PAYG) Open Model endpoint (Llama 2, ...)
# Those endpoints don't use the usual Azure OpenAI scheme, they use the OpenAI scheme.
# They also take the model field to route to the proper deployment, but I haven't verified this works
# Tested with openai 1.13.3
from openai import OpenAI
import logging
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(filename)s:%(funcName)s:%(lineno)d - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S')
# Replace this with the endpoint target
endpoint_url = ''
# Replace this with the endpoint key
api_key = ''
if not api_key:
raise Exception("A key should be provided to invoke the endpoint")
base_url = endpoint_url + '/v1'
client = OpenAI(
base_url = base_url,
api_key=api_key,
)
response = client.chat.completions.create(
model="Llama-2-7b-chat-gmqyf", # model = "deployment_name".
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Does Azure OpenAI support customer managed keys?"},
{"role": "assistant", "content": "Yes, customer managed keys are supported by Azure OpenAI."},
{"role": "user", "content": "Do other Azure AI services support this too?"}
]
)
print(response.choices[0].message.content)
import json
import os
import ssl
import requests
import logging
# These two lines enable debugging at httplib level (requests->urllib3->http.client)
# You will see the REQUEST, including HEADERS and DATA, and RESPONSE with HEADERS but without DATA.
# The only thing missing will be the response.body which is not logged.
try:
import http.client as http_client
except ImportError:
# Python 2
import httplib as http_client
http_client.HTTPConnection.debuglevel = 1
# You must initialize logging, otherwise you'll not see debug output.
logging.basicConfig()
logging.getLogger().setLevel(logging.DEBUG)
requests_log = logging.getLogger("requests.packages.urllib3")
requests_log.setLevel(logging.DEBUG)
requests_log.propagate = True
def allowSelfSignedHttps(allowed):
# bypass the server certificate verification on client side
if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
ssl._create_default_https_context = ssl._create_unverified_context
allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.
# Request data goes here
# The example below assumes JSON formatting which may be updated
# depending on the format your endpoint expects.
# More information can be found here:
# https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script
data = {
"input_data": {
"input_string": ["I believe the meaning of life is"],
"parameters":{
"top_p": 0.9,
"temperature": 0.6,
"max_new_tokens": 96,
"do_sample": "true"
}
}
}
data = {
"model": "llama-2-7b-hf",
"messages": [
{"role": "user", "content": "Can you tell me about your jackets?" }
],
"n": 1,
"top_p": 1.0,
"temperature": 1.0,
"max_new_tokens": 500,
"max_tokens": 500
}
body = json.dumps(data)
url = ''
# Replace this with the primary/secondary key or AMLToken for the endpoint
api_key = ''
model_deployment = Node # replace this if need be
api_type = "chat" # chat or other
if not api_key:
raise Exception("A key should be provided to invoke the endpoint")
def sanitize_endpoint_url(endpoint_url: str, api_type: str):
if api_type.lower() == "chat":
if not endpoint_url.endswith("/v1/chat/completions"):
return endpoint_url + "/v1/chat/completions"
else:
if not endpoint_url.endswith("/v1/completions"):
return endpoint_url + "/v1/completions"
return endpoint_url
# The azureml-model-deployment header will force the request to go to a specific deployment.
# Remove this header to have the request observe the endpoint traffic rules
headers = {
'Content-Type':'application/json',
'Authorization':('Bearer '+ api_key),
}
if model_deployment is not None:
headers['azureml-model-deployment'] = model_deployment
endpoint_url = sanitize_endpoint_url(url, api_type)
print("Calling " + endpoint_url)
try:
result = requests.post(endpoint_url, data=body, headers=headers)
print(result.text)
except requests.exceptions.RequestException as error:
print("The request failed with status code: " + str(error.code))
# Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
print(error.info())
print(error.read().decode("utf8", 'ignore'))
import urllib.request
import json
import os
import ssl
def allowSelfSignedHttps(allowed):
# bypass the server certificate verification on client side
if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
ssl._create_default_https_context = ssl._create_unverified_context
allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.
# Request data goes here
# The example below assumes JSON formatting which may be updated
# depending on the format your endpoint expects.
# More information can be found here:
# https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script
data = {
"model": "Llama-2-7b-chat-gmqyf",
"messages": [
{"role": "system", "content": "You're a useful assistant" },
{"role": "user", "content": "Can you tell me about your jackets?" }
],
"n": 1,
"top_p": 1.0,
"temperature": 1.0,
"max_new_tokens": 500,
"max_tokens": 500
}
url = ''
# Replace this with the primary/secondary key or AMLToken for the endpoint
api_key = ''
api_type = "chat" # chat or other
if not api_key:
raise Exception("A key should be provided to invoke the endpoint")
def sanitize_endpoint_url(endpoint_url: str, api_type: str):
if api_type.lower() == "chat":
if not endpoint_url.endswith("/v1/chat/completions"):
return endpoint_url + "/v1/chat/completions"
else:
if not endpoint_url.endswith("/v1/completions"):
return endpoint_url + "/v1/completions"
return endpoint_url
body = str.encode(json.dumps(data))
# The azureml-model-deployment header will force the request to go to a specific deployment.
# Remove this header to have the request observe the endpoint traffic rules
headers = {
'Content-type':'application/json',
'Authorization':('Bearer '+ api_key),
}
endpoint_url = sanitize_endpoint_url(url, api_type)
req = urllib.request.Request(endpoint_url, body, headers)
try:
response = urllib.request.urlopen(req)
result = response.read()
print(result)
except urllib.error.HTTPError as error:
print("The request failed with status code: " + str(error.code))
# Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
print(error.info())
print(error.read().decode("utf8", 'ignore'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment