pamelafox · March 18, 2025 20:10
diff --git a/deepseek_azure_openai.py b/deepseek_azure_openai.py
 import logging
 import os

 from azure.identity import AzureDeveloperCliCredential, get_bearer_token_provider
 from dotenv import load_dotenv
 from openai import AzureOpenAI

 load_dotenv(override=True)
 logging.basicConfig(level=logging.DEBUG)


 credential = AzureDeveloperCliCredential(tenant_id=os.environ["AZURE_TENANT_ID"])
 # ml.azure.com for serverless, cognitiveservices for AIServices
 token_provider = get_bearer_token_provider(credential, "https://cognitiveservices.azure.com/.default")

 client = AzureOpenAI(
    api_version=os.environ["AZURE_INFERENCE_API_VERSION"],
    base_url=os.environ["AZURE_INFERENCE_ENDPOINT"],
    azure_ad_token_provider=token_provider,
 )

 result = client.chat.completions.create(
    # This *must* be None, even though it technically is meant to be a str,
    # because thats the only way to get the AzureOpenAI class to skip the addition of 'deployments/model' to the URL:
    model=None,
    # We must pass the model in the header, since we need to pass model=None to create
    extra_headers={"x-ms-model-mesh-model-name": "DeepSeek-R1"},
    messages=[
        {
            "role": "system",
            "content": "You are a helpful assistant.",
        },
        {
            "role": "user",
            "content": "What is the capital of the United States?",
        },
    ],
    max_tokens=2048,
    stream=True,
 )

 for update in result:
    if update.choices:
        print(update.choices[0].delta.content, end="")
diff --git a/deepseek_azureai.py b/deepseek_azureai.py
 import http.client as http_client
 import os

 from azure.ai.inference import ChatCompletionsClient
 from azure.ai.inference.models import SystemMessage, UserMessage
 from azure.identity import AzureDeveloperCliCredential
 from dotenv import load_dotenv

 http_client.HTTPConnection.debuglevel = 1

 load_dotenv(override=True)
 # logging.basicConfig(level=logging.DEBUG)
 # logging.getLogger("urllib3").setLevel(logging.DEBUG)
 # logging.getLogger("urllib3").propagate = True


 # With keys
 # client = ChatCompletionsClient(
 #    endpoint=os.environ["AZURE_INFERENCE_ENDPOINT"],
 #    credential=AzureKeyCredential(os.environ["AZURE_INFERENCE_KEY"]),
 # )

 # With Entra ID credential
 client = ChatCompletionsClient(
    endpoint=os.environ["AZURE_INFERENCE_ENDPOINT"],
    credential=AzureDeveloperCliCredential(tenant_id=os.environ["AZURE_TENANT_ID"]),
    credential_scopes=["https://cognitiveservices.azure.com/.default"],
    model="DeepSeek-R1",
 )

 result = client.complete(
    messages=[
        SystemMessage(content="You are a helpful assistant."),
        UserMessage(content="How many languages are in the world?"),
    ],
    max_tokens=2048,
    stream=True,
 )

 for update in result:
    if update.choices:
        print(update.choices[0].delta.content, end="")
diff --git a/deepseek_openai_apikey.py b/deepseek_openai_apikey.py
 import logging
 import os

 from dotenv import load_dotenv
 from openai import OpenAI

 load_dotenv(override=True)
 logging.basicConfig(level=logging.DEBUG)

 client = OpenAI(
    base_url=f'{os.environ["AZURE_INFERENCE_ENDPOINT"]}',
    api_key="ignoredkey",
    default_query={"api-version": os.environ["AZURE_INFERENCE_API_VERSION"]},
    default_headers={"api-key": os.environ["AZURE_INFERENCE_API_KEY"]}
 )

 result = client.chat.completions.create(
    model="DeepSeek-R1",
    messages=[
        {
            "role": "system",
            "content": "You are a helpful assistant.",
        },
        {
            "role": "user",
            "content": "What is the capital of the United States?",
        },
    ],
    max_tokens=2048,
    stream=True,
 )

 for update in result:
    if update.choices:
        print(update.choices[0].delta.content, end="")
diff --git a/deepseek_openai_tokenprovider.py b/deepseek_openai_tokenprovider.py
 import logging
 import os

 from azure.identity import AzureDeveloperCliCredential, get_bearer_token_provider
 from dotenv import load_dotenv
 from openai import OpenAI

 load_dotenv(override=True)
 logging.basicConfig(level=logging.DEBUG)

 credential = AzureDeveloperCliCredential(tenant_id=os.environ["AZURE_TENANT_ID"])
 # ml.azure.com for serverless, cognitiveservices for AIServices
 openai_token_provider = get_bearer_token_provider(credential, "https://cognitiveservices.azure.com/.default")

 client = OpenAI(
    base_url=f'{os.environ["AZURE_INFERENCE_ENDPOINT"]}',
    api_key=openai_token_provider(),
    # This seems to be required, as the chunks won't stream without it
    default_query={"api-version": os.environ["AZURE_INFERENCE_API_VERSION"]},
 )

 # We must set the api_key each time we issue a new request, to make sure our token is refreshed as needed
 client.api_key = openai_token_provider()
 result = client.chat.completions.create(
    model="DeepSeek-R1",
    messages=[
        {
            "role": "system",
            "content": "You are a helpful assistant.",
        },
        {
            "role": "user",
            "content": "What is the capital of the United States?",
        },
    ],
    max_tokens=2048,
    stream=True,
 )

 for update in result:
    if update.choices:
        print(update.choices[0].delta.content, end="")
diff --git a/deepseek_openai_tokenrefresh.py b/deepseek_openai_tokenrefresh.py
 import logging
 import os
 import time

 from azure.identity import AzureDeveloperCliCredential
 from dotenv import load_dotenv
 from openai import OpenAI

 load_dotenv(override=True)
 logging.basicConfig(level=logging.DEBUG)

 credential = AzureDeveloperCliCredential(tenant_id=os.environ["AZURE_TENANT_ID"])
 # ml.azure.com for serverless, cognitiveservices for AIServices
 openai_token = credential.get_token("https://cognitiveservices.azure.com/.default")

 client = OpenAI(
    base_url=f'{os.environ["AZURE_INFERENCE_ENDPOINT"]}',
    api_key=openai_token.token,
    # This seems to be required, as the chunks won't stream without it
    default_query={"api-version": os.environ["AZURE_INFERENCE_API_VERSION"]},
 )

 # Before a request, we need to check if the token is expired
 def maybe_refresh_token(openai_token):
    if openai_token.expires_on < (time.time() + 60):
        openai_token = credential.get_token("https://cognitiveservices.azure.com/.default")
        client.api_key = openai_token.token

 maybe_refresh_token(openai_token)

 result = client.chat.completions.create(
    model="DeepSeek-R1",
    messages=[
        {
            "role": "system",
            "content": "You are a helpful assistant.",
        },
        {
            "role": "user",
            "content": "What is the capital of the United States?",
        },
    ],
    max_tokens=2048,
    stream=True,
 )

 for update in result:
    if update.choices:
        print(update.choices[0].delta.content, end="")
	import logging
	import os

	from azure.identity import AzureDeveloperCliCredential, get_bearer_token_provider
	from dotenv import load_dotenv
	from openai import AzureOpenAI

	load_dotenv(override=True)
	logging.basicConfig(level=logging.DEBUG)


	credential = AzureDeveloperCliCredential(tenant_id=os.environ["AZURE_TENANT_ID"])
	# ml.azure.com for serverless, cognitiveservices for AIServices
	token_provider = get_bearer_token_provider(credential, "https://cognitiveservices.azure.com/.default")

	client = AzureOpenAI(
	api_version=os.environ["AZURE_INFERENCE_API_VERSION"],
	base_url=os.environ["AZURE_INFERENCE_ENDPOINT"],
	azure_ad_token_provider=token_provider,
	)

	result = client.chat.completions.create(
	# This must be None, even though it technically is meant to be a str,
	# because thats the only way to get the AzureOpenAI class to skip the addition of 'deployments/model' to the URL:
	model=None,
	# We must pass the model in the header, since we need to pass model=None to create
	extra_headers={"x-ms-model-mesh-model-name": "DeepSeek-R1"},
	messages=[
	{
	"role": "system",
	"content": "You are a helpful assistant.",
	},
	{
	"role": "user",
	"content": "What is the capital of the United States?",
	},
	],
	max_tokens=2048,
	stream=True,
	)

	for update in result:
	if update.choices:
	print(update.choices[0].delta.content, end="")
	import http.client as http_client
	import os

	from azure.ai.inference import ChatCompletionsClient
	from azure.ai.inference.models import SystemMessage, UserMessage
	from azure.identity import AzureDeveloperCliCredential
	from dotenv import load_dotenv

	http_client.HTTPConnection.debuglevel = 1

	load_dotenv(override=True)
	# logging.basicConfig(level=logging.DEBUG)
	# logging.getLogger("urllib3").setLevel(logging.DEBUG)
	# logging.getLogger("urllib3").propagate = True


	# With keys
	# client = ChatCompletionsClient(
	# endpoint=os.environ["AZURE_INFERENCE_ENDPOINT"],
	# credential=AzureKeyCredential(os.environ["AZURE_INFERENCE_KEY"]),
	# )

	# With Entra ID credential
	client = ChatCompletionsClient(
	endpoint=os.environ["AZURE_INFERENCE_ENDPOINT"],
	credential=AzureDeveloperCliCredential(tenant_id=os.environ["AZURE_TENANT_ID"]),
	credential_scopes=["https://cognitiveservices.azure.com/.default"],
	model="DeepSeek-R1",
	)

	result = client.complete(
	messages=[
	SystemMessage(content="You are a helpful assistant."),
	UserMessage(content="How many languages are in the world?"),
	],
	max_tokens=2048,
	stream=True,
	)

	for update in result:
	if update.choices:
	print(update.choices[0].delta.content, end="")
	import logging
	import os
	import time

	from azure.identity import AzureDeveloperCliCredential
	from dotenv import load_dotenv
	from openai import OpenAI

	load_dotenv(override=True)
	logging.basicConfig(level=logging.DEBUG)

	credential = AzureDeveloperCliCredential(tenant_id=os.environ["AZURE_TENANT_ID"])
	# ml.azure.com for serverless, cognitiveservices for AIServices
	openai_token = credential.get_token("https://cognitiveservices.azure.com/.default")

	client = OpenAI(
	base_url=f'{os.environ["AZURE_INFERENCE_ENDPOINT"]}',
	api_key=openai_token.token,
	# This seems to be required, as the chunks won't stream without it
	default_query={"api-version": os.environ["AZURE_INFERENCE_API_VERSION"]},
	)

	# Before a request, we need to check if the token is expired
	def maybe_refresh_token(openai_token):
	if openai_token.expires_on < (time.time() + 60):
	openai_token = credential.get_token("https://cognitiveservices.azure.com/.default")
	client.api_key = openai_token.token

	maybe_refresh_token(openai_token)

	result = client.chat.completions.create(
	model="DeepSeek-R1",
	messages=[
	{
	"role": "system",
	"content": "You are a helpful assistant.",
	},
	{
	"role": "user",
	"content": "What is the capital of the United States?",
	},
	],
	max_tokens=2048,
	stream=True,
	)

	for update in result:
	if update.choices:
	print(update.choices[0].delta.content, end="")