cedricvidal · March 7, 2024 21:56
diff --git a/open_llm_openai.py b/open_llm_openai.py
 # Those endpoints don't use the usual Azure OpenAI scheme, they use the OpenAI scheme.
 # They also take the model field to route to the proper deployment, but I haven't verified this works

 # Tested with openai 1.13.3

 from openai import OpenAI
 import logging

 logging.basicConfig(level=logging.DEBUG,
                    format='%(asctime)s - %(levelname)s - %(filename)s:%(funcName)s:%(lineno)d - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S')

 # Replace this with the endpoint target
 endpoint_url = ''
 # Replace this with the endpoint key
 api_key = ''

 if not api_key:
    raise Exception("A key should be provided to invoke the endpoint")

 base_url = endpoint_url + '/v1'
 client = OpenAI(
  base_url = base_url,
  api_key=api_key,
 )

 response = client.chat.completions.create(
    model="Llama-2-7b-chat-gmqyf", # model = "deployment_name".
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Does Azure OpenAI support customer managed keys?"},
        {"role": "assistant", "content": "Yes, customer managed keys are supported by Azure OpenAI."},
        {"role": "user", "content": "Do other Azure AI services support this too?"}
    ]
 )

 print(response.choices[0].message.content)
diff --git a/open_llm_requests.py b/open_llm_requests.py
 import json
 import os
 import ssl
 import requests

 import logging

 # These two lines enable debugging at httplib level (requests->urllib3->http.client)
 # You will see the REQUEST, including HEADERS and DATA, and RESPONSE with HEADERS but without DATA.
 # The only thing missing will be the response.body which is not logged.
 try:
    import http.client as http_client
 except ImportError:
    # Python 2
    import httplib as http_client
 http_client.HTTPConnection.debuglevel = 1

 # You must initialize logging, otherwise you'll not see debug output.
 logging.basicConfig()
 logging.getLogger().setLevel(logging.DEBUG)
 requests_log = logging.getLogger("requests.packages.urllib3")
 requests_log.setLevel(logging.DEBUG)
 requests_log.propagate = True

 def allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

 allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

 # Request data goes here
 # The example below assumes JSON formatting which may be updated
 # depending on the format your endpoint expects.
 # More information can be found here:
 # https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script
 data = {
  "input_data": {
      "input_string": ["I believe the meaning of life is"],
      "parameters":{   
              "top_p": 0.9,
              "temperature": 0.6,
              "max_new_tokens": 96,
              "do_sample": "true"
      }
  }
 }

 data = {
    "model": "llama-2-7b-hf", 
    "messages": [
            {"role": "user", "content": "Can you tell me about your jackets?" }
        ], 
        "n": 1, 
        "top_p": 1.0, 
        "temperature": 1.0, 
        "max_new_tokens": 500, 
        "max_tokens": 500
    }

 body = json.dumps(data)

 url = ''
 # Replace this with the primary/secondary key or AMLToken for the endpoint
 api_key = ''
 model_deployment = Node # replace this if need be
 api_type = "chat" # chat or other
 if not api_key:
    raise Exception("A key should be provided to invoke the endpoint")


 def sanitize_endpoint_url(endpoint_url: str, api_type: str):
    if api_type.lower() == "chat":
        if not endpoint_url.endswith("/v1/chat/completions"):
            return endpoint_url + "/v1/chat/completions"
    else:
        if not endpoint_url.endswith("/v1/completions"):
            return endpoint_url + "/v1/completions"
    return endpoint_url

 # The azureml-model-deployment header will force the request to go to a specific deployment.
 # Remove this header to have the request observe the endpoint traffic rules
 headers = {
    'Content-Type':'application/json', 
    'Authorization':('Bearer '+ api_key), 
    }

 if model_deployment is not None:
    headers['azureml-model-deployment'] = model_deployment 

 endpoint_url = sanitize_endpoint_url(url, api_type)
 print("Calling " + endpoint_url)

 try:
    result = requests.post(endpoint_url, data=body, headers=headers)
    print(result.text)
 except requests.exceptions.RequestException as error:
    print("The request failed with status code: " + str(error.code))

    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
    print(error.info())
    print(error.read().decode("utf8", 'ignore'))
diff --git a/open_llm_urllib.py b/open_llm_urllib.py
 import urllib.request
 import json
 import os
 import ssl

 def allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

 allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

 # Request data goes here
 # The example below assumes JSON formatting which may be updated
 # depending on the format your endpoint expects.
 # More information can be found here:
 # https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script
 data = {
    "model": "Llama-2-7b-chat-gmqyf",
    "messages": [
            {"role": "system", "content": "You're a useful assistant" },
            {"role": "user", "content": "Can you tell me about your jackets?" }
        ], 
        "n": 1, 
        "top_p": 1.0, 
        "temperature": 1.0, 
        "max_new_tokens": 500, 
        "max_tokens": 500
    }

 url = ''
 # Replace this with the primary/secondary key or AMLToken for the endpoint
 api_key = ''
 api_type = "chat" # chat or other
 if not api_key:
    raise Exception("A key should be provided to invoke the endpoint")


 def sanitize_endpoint_url(endpoint_url: str, api_type: str):
    if api_type.lower() == "chat":
        if not endpoint_url.endswith("/v1/chat/completions"):
            return endpoint_url + "/v1/chat/completions"
    else:
        if not endpoint_url.endswith("/v1/completions"):
            return endpoint_url + "/v1/completions"
    return endpoint_url

 body = str.encode(json.dumps(data))

 # The azureml-model-deployment header will force the request to go to a specific deployment.
 # Remove this header to have the request observe the endpoint traffic rules
 headers = {
    'Content-type':'application/json', 
    'Authorization':('Bearer '+ api_key), 
    }

 endpoint_url = sanitize_endpoint_url(url, api_type)
 req = urllib.request.Request(endpoint_url, body, headers)

 try:
    response = urllib.request.urlopen(req)

    result = response.read()
    print(result)
 except urllib.error.HTTPError as error:
    print("The request failed with status code: " + str(error.code))

    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
    print(error.info())
    print(error.read().decode("utf8", 'ignore'))
	# Those endpoints don't use the usual Azure OpenAI scheme, they use the OpenAI scheme.
	# They also take the model field to route to the proper deployment, but I haven't verified this works

	# Tested with openai 1.13.3

	from openai import OpenAI
	import logging

	logging.basicConfig(level=logging.DEBUG,
	format='%(asctime)s - %(levelname)s - %(filename)s:%(funcName)s:%(lineno)d - %(message)s',
	datefmt='%Y-%m-%d %H:%M:%S')

	# Replace this with the endpoint target
	endpoint_url = ''
	# Replace this with the endpoint key
	api_key = ''

	if not api_key:
	raise Exception("A key should be provided to invoke the endpoint")

	base_url = endpoint_url + '/v1'
	client = OpenAI(
	base_url = base_url,
	api_key=api_key,
	)

	response = client.chat.completions.create(
	model="Llama-2-7b-chat-gmqyf", # model = "deployment_name".
	messages=[
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": "Does Azure OpenAI support customer managed keys?"},
	{"role": "assistant", "content": "Yes, customer managed keys are supported by Azure OpenAI."},
	{"role": "user", "content": "Do other Azure AI services support this too?"}
	]
	)

	print(response.choices[0].message.content)
	import json
	import os
	import ssl
	import requests

	import logging

	# These two lines enable debugging at httplib level (requests->urllib3->http.client)
	# You will see the REQUEST, including HEADERS and DATA, and RESPONSE with HEADERS but without DATA.
	# The only thing missing will be the response.body which is not logged.
	try:
	import http.client as http_client
	except ImportError:
	# Python 2
	import httplib as http_client
	http_client.HTTPConnection.debuglevel = 1

	# You must initialize logging, otherwise you'll not see debug output.
	logging.basicConfig()
	logging.getLogger().setLevel(logging.DEBUG)
	requests_log = logging.getLogger("requests.packages.urllib3")
	requests_log.setLevel(logging.DEBUG)
	requests_log.propagate = True

	def allowSelfSignedHttps(allowed):
	# bypass the server certificate verification on client side
	if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
	ssl._create_default_https_context = ssl._create_unverified_context

	allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

	# Request data goes here
	# The example below assumes JSON formatting which may be updated
	# depending on the format your endpoint expects.
	# More information can be found here:
	# https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script
	data = {
	"input_data": {
	"input_string": ["I believe the meaning of life is"],
	"parameters":{
	"top_p": 0.9,
	"temperature": 0.6,
	"max_new_tokens": 96,
	"do_sample": "true"
	}
	}
	}

	data = {
	"model": "llama-2-7b-hf",
	"messages": [
	{"role": "user", "content": "Can you tell me about your jackets?" }
	],
	"n": 1,
	"top_p": 1.0,
	"temperature": 1.0,
	"max_new_tokens": 500,
	"max_tokens": 500
	}

	body = json.dumps(data)

	url = ''
	# Replace this with the primary/secondary key or AMLToken for the endpoint
	api_key = ''
	model_deployment = Node # replace this if need be
	api_type = "chat" # chat or other
	if not api_key:
	raise Exception("A key should be provided to invoke the endpoint")


	def sanitize_endpoint_url(endpoint_url: str, api_type: str):
	if api_type.lower() == "chat":
	if not endpoint_url.endswith("/v1/chat/completions"):
	return endpoint_url + "/v1/chat/completions"
	else:
	if not endpoint_url.endswith("/v1/completions"):
	return endpoint_url + "/v1/completions"
	return endpoint_url

	# The azureml-model-deployment header will force the request to go to a specific deployment.
	# Remove this header to have the request observe the endpoint traffic rules
	headers = {
	'Content-Type':'application/json',
	'Authorization':('Bearer '+ api_key),
	}

	if model_deployment is not None:
	headers['azureml-model-deployment'] = model_deployment

	endpoint_url = sanitize_endpoint_url(url, api_type)
	print("Calling " + endpoint_url)

	try:
	result = requests.post(endpoint_url, data=body, headers=headers)
	print(result.text)
	except requests.exceptions.RequestException as error:
	print("The request failed with status code: " + str(error.code))

	# Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
	print(error.info())
	print(error.read().decode("utf8", 'ignore'))
	import urllib.request
	import json
	import os
	import ssl

	def allowSelfSignedHttps(allowed):
	# bypass the server certificate verification on client side
	if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
	ssl._create_default_https_context = ssl._create_unverified_context

	allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

	# Request data goes here
	# The example below assumes JSON formatting which may be updated
	# depending on the format your endpoint expects.
	# More information can be found here:
	# https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script
	data = {
	"model": "Llama-2-7b-chat-gmqyf",
	"messages": [
	{"role": "system", "content": "You're a useful assistant" },
	{"role": "user", "content": "Can you tell me about your jackets?" }
	],
	"n": 1,
	"top_p": 1.0,
	"temperature": 1.0,
	"max_new_tokens": 500,
	"max_tokens": 500
	}

	url = ''
	# Replace this with the primary/secondary key or AMLToken for the endpoint
	api_key = ''
	api_type = "chat" # chat or other
	if not api_key:
	raise Exception("A key should be provided to invoke the endpoint")


	def sanitize_endpoint_url(endpoint_url: str, api_type: str):
	if api_type.lower() == "chat":
	if not endpoint_url.endswith("/v1/chat/completions"):
	return endpoint_url + "/v1/chat/completions"
	else:
	if not endpoint_url.endswith("/v1/completions"):
	return endpoint_url + "/v1/completions"
	return endpoint_url

	body = str.encode(json.dumps(data))

	# The azureml-model-deployment header will force the request to go to a specific deployment.
	# Remove this header to have the request observe the endpoint traffic rules
	headers = {
	'Content-type':'application/json',
	'Authorization':('Bearer '+ api_key),
	}

	endpoint_url = sanitize_endpoint_url(url, api_type)
	req = urllib.request.Request(endpoint_url, body, headers)

	try:
	response = urllib.request.urlopen(req)

	result = response.read()
	print(result)
	except urllib.error.HTTPError as error:
	print("The request failed with status code: " + str(error.code))

	# Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
	print(error.info())
	print(error.read().decode("utf8", 'ignore'))