Last active
March 7, 2024 21:56
-
-
Save cedricvidal/d1598e3680cfa93dafa08669b98465f4 to your computer and use it in GitHub Desktop.
Consume Azure AI Pay As You Go (PAYG) Open Model endpoint (Llama 2, ...)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Those endpoints don't use the usual Azure OpenAI scheme, they use the OpenAI scheme. | |
# They also take the model field to route to the proper deployment, but I haven't verified this works | |
# Tested with openai 1.13.3 | |
from openai import OpenAI | |
import logging | |
logging.basicConfig(level=logging.DEBUG, | |
format='%(asctime)s - %(levelname)s - %(filename)s:%(funcName)s:%(lineno)d - %(message)s', | |
datefmt='%Y-%m-%d %H:%M:%S') | |
# Replace this with the endpoint target | |
endpoint_url = '' | |
# Replace this with the endpoint key | |
api_key = '' | |
if not api_key: | |
raise Exception("A key should be provided to invoke the endpoint") | |
base_url = endpoint_url + '/v1' | |
client = OpenAI( | |
base_url = base_url, | |
api_key=api_key, | |
) | |
response = client.chat.completions.create( | |
model="Llama-2-7b-chat-gmqyf", # model = "deployment_name". | |
messages=[ | |
{"role": "system", "content": "You are a helpful assistant."}, | |
{"role": "user", "content": "Does Azure OpenAI support customer managed keys?"}, | |
{"role": "assistant", "content": "Yes, customer managed keys are supported by Azure OpenAI."}, | |
{"role": "user", "content": "Do other Azure AI services support this too?"} | |
] | |
) | |
print(response.choices[0].message.content) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import os | |
import ssl | |
import requests | |
import logging | |
# These two lines enable debugging at httplib level (requests->urllib3->http.client) | |
# You will see the REQUEST, including HEADERS and DATA, and RESPONSE with HEADERS but without DATA. | |
# The only thing missing will be the response.body which is not logged. | |
try: | |
import http.client as http_client | |
except ImportError: | |
# Python 2 | |
import httplib as http_client | |
http_client.HTTPConnection.debuglevel = 1 | |
# You must initialize logging, otherwise you'll not see debug output. | |
logging.basicConfig() | |
logging.getLogger().setLevel(logging.DEBUG) | |
requests_log = logging.getLogger("requests.packages.urllib3") | |
requests_log.setLevel(logging.DEBUG) | |
requests_log.propagate = True | |
def allowSelfSignedHttps(allowed): | |
# bypass the server certificate verification on client side | |
if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None): | |
ssl._create_default_https_context = ssl._create_unverified_context | |
allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service. | |
# Request data goes here | |
# The example below assumes JSON formatting which may be updated | |
# depending on the format your endpoint expects. | |
# More information can be found here: | |
# https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script | |
data = { | |
"input_data": { | |
"input_string": ["I believe the meaning of life is"], | |
"parameters":{ | |
"top_p": 0.9, | |
"temperature": 0.6, | |
"max_new_tokens": 96, | |
"do_sample": "true" | |
} | |
} | |
} | |
data = { | |
"model": "llama-2-7b-hf", | |
"messages": [ | |
{"role": "user", "content": "Can you tell me about your jackets?" } | |
], | |
"n": 1, | |
"top_p": 1.0, | |
"temperature": 1.0, | |
"max_new_tokens": 500, | |
"max_tokens": 500 | |
} | |
body = json.dumps(data) | |
url = '' | |
# Replace this with the primary/secondary key or AMLToken for the endpoint | |
api_key = '' | |
model_deployment = Node # replace this if need be | |
api_type = "chat" # chat or other | |
if not api_key: | |
raise Exception("A key should be provided to invoke the endpoint") | |
def sanitize_endpoint_url(endpoint_url: str, api_type: str): | |
if api_type.lower() == "chat": | |
if not endpoint_url.endswith("/v1/chat/completions"): | |
return endpoint_url + "/v1/chat/completions" | |
else: | |
if not endpoint_url.endswith("/v1/completions"): | |
return endpoint_url + "/v1/completions" | |
return endpoint_url | |
# The azureml-model-deployment header will force the request to go to a specific deployment. | |
# Remove this header to have the request observe the endpoint traffic rules | |
headers = { | |
'Content-Type':'application/json', | |
'Authorization':('Bearer '+ api_key), | |
} | |
if model_deployment is not None: | |
headers['azureml-model-deployment'] = model_deployment | |
endpoint_url = sanitize_endpoint_url(url, api_type) | |
print("Calling " + endpoint_url) | |
try: | |
result = requests.post(endpoint_url, data=body, headers=headers) | |
print(result.text) | |
except requests.exceptions.RequestException as error: | |
print("The request failed with status code: " + str(error.code)) | |
# Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure | |
print(error.info()) | |
print(error.read().decode("utf8", 'ignore')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib.request | |
import json | |
import os | |
import ssl | |
def allowSelfSignedHttps(allowed): | |
# bypass the server certificate verification on client side | |
if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None): | |
ssl._create_default_https_context = ssl._create_unverified_context | |
allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service. | |
# Request data goes here | |
# The example below assumes JSON formatting which may be updated | |
# depending on the format your endpoint expects. | |
# More information can be found here: | |
# https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script | |
data = { | |
"model": "Llama-2-7b-chat-gmqyf", | |
"messages": [ | |
{"role": "system", "content": "You're a useful assistant" }, | |
{"role": "user", "content": "Can you tell me about your jackets?" } | |
], | |
"n": 1, | |
"top_p": 1.0, | |
"temperature": 1.0, | |
"max_new_tokens": 500, | |
"max_tokens": 500 | |
} | |
url = '' | |
# Replace this with the primary/secondary key or AMLToken for the endpoint | |
api_key = '' | |
api_type = "chat" # chat or other | |
if not api_key: | |
raise Exception("A key should be provided to invoke the endpoint") | |
def sanitize_endpoint_url(endpoint_url: str, api_type: str): | |
if api_type.lower() == "chat": | |
if not endpoint_url.endswith("/v1/chat/completions"): | |
return endpoint_url + "/v1/chat/completions" | |
else: | |
if not endpoint_url.endswith("/v1/completions"): | |
return endpoint_url + "/v1/completions" | |
return endpoint_url | |
body = str.encode(json.dumps(data)) | |
# The azureml-model-deployment header will force the request to go to a specific deployment. | |
# Remove this header to have the request observe the endpoint traffic rules | |
headers = { | |
'Content-type':'application/json', | |
'Authorization':('Bearer '+ api_key), | |
} | |
endpoint_url = sanitize_endpoint_url(url, api_type) | |
req = urllib.request.Request(endpoint_url, body, headers) | |
try: | |
response = urllib.request.urlopen(req) | |
result = response.read() | |
print(result) | |
except urllib.error.HTTPError as error: | |
print("The request failed with status code: " + str(error.code)) | |
# Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure | |
print(error.info()) | |
print(error.read().decode("utf8", 'ignore')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment