Skip to content

Instantly share code, notes, and snippets.

@pamelafox
Last active March 12, 2025 16:45
Show Gist options
  • Save pamelafox/92f570bb71308d27c4419b1d29c34dbb to your computer and use it in GitHub Desktop.
Save pamelafox/92f570bb71308d27c4419b1d29c34dbb to your computer and use it in GitHub Desktop.
Using Azure AI SDK and OpenAI SDK with AIServices deepseek deployment
import logging
import os
from azure.identity import AzureDeveloperCliCredential, get_bearer_token_provider
from dotenv import load_dotenv
from openai import AzureOpenAI
load_dotenv(override=True)
logging.basicConfig(level=logging.DEBUG)
credential = AzureDeveloperCliCredential(tenant_id=os.environ["AZURE_TENANT_ID"])
# ml.azure.com for serverless, cognitiveservices for AIServices
token_provider = get_bearer_token_provider(credential, "https://cognitiveservices.azure.com/.default")
client = AzureOpenAI(
api_version=os.environ["AZURE_INFERENCE_API_VERSION"],
base_url=os.environ["AZURE_INFERENCE_ENDPOINT"],
azure_ad_token_provider=token_provider,
)
result = client.chat.completions.create(
# This *must* be None, even though it technically is meant to be a str,
# because thats the only way to get the AzureOpenAI class to skip the addition of 'deployments/model' to the URL:
model=None,
# We must pass the model in the header, since we need to pass model=None to create
extra_headers={"x-ms-model-mesh-model-name": "DeepSeek-R1"},
messages=[
{
"role": "system",
"content": "You are a helpful assistant.",
},
{
"role": "user",
"content": "What is the capital of the United States?",
},
],
max_tokens=2048,
stream=True,
)
for update in result:
if update.choices:
print(update.choices[0].delta.content, end="")
import http.client as http_client
import os
from azure.ai.inference import ChatCompletionsClient
from azure.ai.inference.models import SystemMessage, UserMessage
from azure.identity import AzureDeveloperCliCredential
from dotenv import load_dotenv
http_client.HTTPConnection.debuglevel = 1
load_dotenv(override=True)
# logging.basicConfig(level=logging.DEBUG)
# logging.getLogger("urllib3").setLevel(logging.DEBUG)
# logging.getLogger("urllib3").propagate = True
# With keys
# client = ChatCompletionsClient(
# endpoint=os.environ["AZURE_INFERENCE_ENDPOINT"],
# credential=AzureKeyCredential(os.environ["AZURE_INFERENCE_KEY"]),
# )
# With Entra ID credential
client = ChatCompletionsClient(
endpoint=os.environ["AZURE_INFERENCE_ENDPOINT"],
credential=AzureDeveloperCliCredential(tenant_id=os.environ["AZURE_TENANT_ID"]),
credential_scopes=["https://cognitiveservices.azure.com/.default"],
model="DeepSeek-R1",
)
result = client.complete(
messages=[
SystemMessage(content="You are a helpful assistant."),
UserMessage(content="How many languages are in the world?"),
],
max_tokens=2048,
stream=True,
)
for update in result:
if update.choices:
print(update.choices[0].delta.content, end="")
import logging
import os
import time
from azure.identity import AzureDeveloperCliCredential
from dotenv import load_dotenv
from openai import OpenAI
load_dotenv(override=True)
logging.basicConfig(level=logging.DEBUG)
credential = AzureDeveloperCliCredential(tenant_id=os.environ["AZURE_TENANT_ID"])
# ml.azure.com for serverless, cognitiveservices for AIServices
openai_token = credential.get_token("https://cognitiveservices.azure.com/.default")
client = OpenAI(
base_url=f'{os.environ["AZURE_INFERENCE_ENDPOINT"]}',
# Unfortunately, we can't refresh the token, so we must construct this client before *every* request to be safe
api_key=openai_token.token,
# This seems to be required, as the chunks won't stream without it
default_query={"api-version": os.environ["AZURE_INFERENCE_API_VERSION"]},
)
# Before a request, we need to check if the token is expired
def maybe_refresh_token(openai_token):
if openai_token.expires_on < (time.time() + 60):
openai_token = credential.get_token("https://cognitiveservices.azure.com/.default")
client.api_key = openai_token.token
maybe_refresh_token(openai_token)
result = client.chat.completions.create(
# This *must* be None, even though it technically is meant to be a str,
# because thats the only way to get the AzureOpenAI class to skip the addition of 'deployments/model' to the URL:
model="DeepSeek-R1",
messages=[
{
"role": "system",
"content": "You are a helpful assistant.",
},
{
"role": "user",
"content": "What is the capital of the United States?",
},
],
max_tokens=2048,
stream=True,
)
for update in result:
if update.choices:
print(update.choices[0].delta.content, end="")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment