Last active
March 12, 2025 16:45
-
-
Save pamelafox/92f570bb71308d27c4419b1d29c34dbb to your computer and use it in GitHub Desktop.
Using Azure AI SDK and OpenAI SDK with AIServices deepseek deployment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
import os | |
from azure.identity import AzureDeveloperCliCredential, get_bearer_token_provider | |
from dotenv import load_dotenv | |
from openai import AzureOpenAI | |
load_dotenv(override=True) | |
logging.basicConfig(level=logging.DEBUG) | |
credential = AzureDeveloperCliCredential(tenant_id=os.environ["AZURE_TENANT_ID"]) | |
# ml.azure.com for serverless, cognitiveservices for AIServices | |
token_provider = get_bearer_token_provider(credential, "https://cognitiveservices.azure.com/.default") | |
client = AzureOpenAI( | |
api_version=os.environ["AZURE_INFERENCE_API_VERSION"], | |
base_url=os.environ["AZURE_INFERENCE_ENDPOINT"], | |
azure_ad_token_provider=token_provider, | |
) | |
result = client.chat.completions.create( | |
# This *must* be None, even though it technically is meant to be a str, | |
# because thats the only way to get the AzureOpenAI class to skip the addition of 'deployments/model' to the URL: | |
model=None, | |
# We must pass the model in the header, since we need to pass model=None to create | |
extra_headers={"x-ms-model-mesh-model-name": "DeepSeek-R1"}, | |
messages=[ | |
{ | |
"role": "system", | |
"content": "You are a helpful assistant.", | |
}, | |
{ | |
"role": "user", | |
"content": "What is the capital of the United States?", | |
}, | |
], | |
max_tokens=2048, | |
stream=True, | |
) | |
for update in result: | |
if update.choices: | |
print(update.choices[0].delta.content, end="") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import http.client as http_client | |
import os | |
from azure.ai.inference import ChatCompletionsClient | |
from azure.ai.inference.models import SystemMessage, UserMessage | |
from azure.identity import AzureDeveloperCliCredential | |
from dotenv import load_dotenv | |
http_client.HTTPConnection.debuglevel = 1 | |
load_dotenv(override=True) | |
# logging.basicConfig(level=logging.DEBUG) | |
# logging.getLogger("urllib3").setLevel(logging.DEBUG) | |
# logging.getLogger("urllib3").propagate = True | |
# With keys | |
# client = ChatCompletionsClient( | |
# endpoint=os.environ["AZURE_INFERENCE_ENDPOINT"], | |
# credential=AzureKeyCredential(os.environ["AZURE_INFERENCE_KEY"]), | |
# ) | |
# With Entra ID credential | |
client = ChatCompletionsClient( | |
endpoint=os.environ["AZURE_INFERENCE_ENDPOINT"], | |
credential=AzureDeveloperCliCredential(tenant_id=os.environ["AZURE_TENANT_ID"]), | |
credential_scopes=["https://cognitiveservices.azure.com/.default"], | |
model="DeepSeek-R1", | |
) | |
result = client.complete( | |
messages=[ | |
SystemMessage(content="You are a helpful assistant."), | |
UserMessage(content="How many languages are in the world?"), | |
], | |
max_tokens=2048, | |
stream=True, | |
) | |
for update in result: | |
if update.choices: | |
print(update.choices[0].delta.content, end="") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
import os | |
import time | |
from azure.identity import AzureDeveloperCliCredential | |
from dotenv import load_dotenv | |
from openai import OpenAI | |
load_dotenv(override=True) | |
logging.basicConfig(level=logging.DEBUG) | |
credential = AzureDeveloperCliCredential(tenant_id=os.environ["AZURE_TENANT_ID"]) | |
# ml.azure.com for serverless, cognitiveservices for AIServices | |
openai_token = credential.get_token("https://cognitiveservices.azure.com/.default") | |
client = OpenAI( | |
base_url=f'{os.environ["AZURE_INFERENCE_ENDPOINT"]}', | |
# Unfortunately, we can't refresh the token, so we must construct this client before *every* request to be safe | |
api_key=openai_token.token, | |
# This seems to be required, as the chunks won't stream without it | |
default_query={"api-version": os.environ["AZURE_INFERENCE_API_VERSION"]}, | |
) | |
# Before a request, we need to check if the token is expired | |
def maybe_refresh_token(openai_token): | |
if openai_token.expires_on < (time.time() + 60): | |
openai_token = credential.get_token("https://cognitiveservices.azure.com/.default") | |
client.api_key = openai_token.token | |
maybe_refresh_token(openai_token) | |
result = client.chat.completions.create( | |
# This *must* be None, even though it technically is meant to be a str, | |
# because thats the only way to get the AzureOpenAI class to skip the addition of 'deployments/model' to the URL: | |
model="DeepSeek-R1", | |
messages=[ | |
{ | |
"role": "system", | |
"content": "You are a helpful assistant.", | |
}, | |
{ | |
"role": "user", | |
"content": "What is the capital of the United States?", | |
}, | |
], | |
max_tokens=2048, | |
stream=True, | |
) | |
for update in result: | |
if update.choices: | |
print(update.choices[0].delta.content, end="") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment