Skip to content

Instantly share code, notes, and snippets.

@vicradon
Last active December 9, 2024 21:21
Show Gist options
  • Save vicradon/fef222d084658c860fc63b85f406df6d to your computer and use it in GitHub Desktop.
Save vicradon/fef222d084658c860fc63b85f406df6d to your computer and use it in GitHub Desktop.
Image analysis lab
from dotenv import load_dotenv
import os
from array import array
from PIL import Image, ImageDraw
import sys
import time
from matplotlib import pyplot as plt
import numpy as np
# import namespaces
import azure.ai.vision as sdk
def main():
global cv_client
try:
# Get Configuration Settings
load_dotenv()
ai_endpoint = os.getenv('AI_SERVICE_ENDPOINT')
ai_key = os.getenv('AI_SERVICE_KEY')
# Get image
image_file = 'images/street.jpg'
if len(sys.argv) > 1:
image_file = sys.argv[1]
# Authenticate Azure AI Vision client
cv_client = sdk.VisionServiceOptions(ai_endpoint, ai_key)
# Analyze image
AnalyzeImage(image_file, cv_client)
# Generate thumbnail
BackgroundForeground(image_file, cv_client)
except Exception as ex:
print(ex)
def AnalyzeImage(image_file, cv_client):
print('\nAnalyzing', image_file)
# Specify features to be retrieved
analysis_options = sdk.ImageAnalysisOptions()
features = analysis_options.features = (
sdk.ImageAnalysisFeature.CAPTION |
sdk.ImageAnalysisFeature.DENSE_CAPTIONS |
sdk.ImageAnalysisFeature.TAGS |
sdk.ImageAnalysisFeature.OBJECTS |
sdk.ImageAnalysisFeature.PEOPLE
)
# Get image analysis
image = sdk.VisionSource(image_file)
image_analyzer = sdk.ImageAnalyzer(cv_client, image, analysis_options)
result = image_analyzer.analyze()
if result.reason == sdk.ImageAnalysisResultReason.ANALYZED:
# Get image captions
if result.caption is not None:
print("\nCaption:")
print(" Caption: '{}' (confidence: {:.2f}%)".format(result.caption.content, result.caption.confidence * 100))
# Get image dense captions
if result.dense_captions is not None:
print("\nDense Captions:")
for caption in result.dense_captions:
print(" Caption: '{}' (confidence: {:.2f}%)".format(caption.content, caption.confidence * 100))
# Get image tags
if result.tags is not None:
print("\nTags:")
for tag in result.tags:
print(" Tag: '{}' (confidence: {:.2f}%)".format(tag.name, tag.confidence * 100))
# Get objects in the image
if result.objects is not None:
print("\nObjects in image:")
# Prepare image for drawing
image = Image.open(image_file)
fig = plt.figure(figsize=(image.width/100, image.height/100))
plt.axis('off')
draw = ImageDraw.Draw(image)
color = 'cyan'
for detected_object in result.objects:
# Print object name
print(" {} (confidence: {:.2f}%)".format(detected_object.name, detected_object.confidence * 100))
# Draw object bounding box
r = detected_object.bounding_box
bounding_box = ((r.x, r.y), (r.x + r.w, r.y + r.h))
draw.rectangle(bounding_box, outline=color, width=3)
plt.annotate(detected_object.name,(r.x, r.y), backgroundcolor=color)
# Save annotated image
plt.imshow(image)
plt.tight_layout(pad=0)
outputfile = 'objects.jpg'
fig.savefig(outputfile)
print(' Results saved in', outputfile)
# Get people in the image
# Get people in the image
if result.people is not None:
print("\nPeople in image:")
# Prepare image for drawing
image = Image.open(image_file)
fig = plt.figure(figsize=(image.width/100, image.height/100))
plt.axis('off')
draw = ImageDraw.Draw(image)
color = 'cyan'
for detected_people in result.people:
# Draw object bounding box
r = detected_people.bounding_box
bounding_box = ((r.x, r.y), (r.x + r.w, r.y + r.h))
draw.rectangle(bounding_box, outline=color, width=3)
# Return the confidence of the person detected
#print(" {} (confidence: {:.2f}%)".format(detected_people.bounding_box, detected_people.confidence * 100))
# Save annotated image
plt.imshow(image)
plt.tight_layout(pad=0)
outputfile = 'people.jpg'
fig.savefig(outputfile)
print(' Results saved in', outputfile)
else:
error_details = sdk.ImageAnalysisErrorDetails.from_result(result)
print(" Analysis failed.")
print(" Error reason: {}".format(error_details.reason))
print(" Error code: {}".format(error_details.error_code))
print(" Error message: {}".format(error_details.message))
def BackgroundForeground(image_file, cv_client):
# Remove the background from the image or generate a foreground matte
print('\nRemove the background from the image or generate a foreground matte')
image = sdk.VisionSource(image_file)
analysis_options = sdk.ImageAnalysisOptions()
# Set the image analysis segmentation mode to background or foreground
analysis_options.segmentation_mode = sdk.ImageSegmentationMode.BACKGROUND_REMOVAL
image_analyzer = sdk.ImageAnalyzer(cv_client, image, analysis_options)
result = image_analyzer.analyze()
if result.reason == sdk.ImageAnalysisResultReason.ANALYZED:
image_buffer = result.segmentation_result.image_buffer
print(" Segmentation result:")
print(" Output image buffer size (bytes) = {}".format(len(image_buffer)))
print(" Output image height = {}".format(result.segmentation_result.image_height))
print(" Output image width = {}".format(result.segmentation_result.image_width))
output_image_file = "newimage.jpg"
with open(output_image_file, 'wb') as binary_file:
binary_file.write(image_buffer)
print(" File {} written to disk".format(output_image_file))
else:
error_details = sdk.ImageAnalysisErrorDetails.from_result(result)
print(" Analysis failed.")
print(" Error reason: {}".format(error_details.reason))
print(" Error code: {}".format(error_details.error_code))
print(" Error message: {}".format(error_details.message))
print(" Did you set the computer vision endpoint and key?")
if __name__ == "__main__":
main()
from dotenv import load_dotenv
import os
# Import namespaces
from azure.core.credentials import AzureKeyCredential
from azure.ai.textanalytics import TextAnalyticsClient
def main():
try:
# Get Configuration Settings
load_dotenv()
ai_endpoint = os.getenv('AI_SERVICE_ENDPOINT')
ai_key = os.getenv('AI_SERVICE_KEY')
# Create client using endpoint and key
credential = AzureKeyCredential(ai_key)
ai_client = TextAnalyticsClient(endpoint=ai_endpoint, credential=credential)
# Analyze each text file in the reviews folder
reviews_folder = 'reviews'
for file_name in os.listdir(reviews_folder):
# Read the file contents
print('\n-------------\n' + file_name)
text = open(os.path.join(reviews_folder, file_name), encoding='utf8').read()
print('\n' + text)
# Get language
detectedLanguage = ai_client.detect_language(documents=[text])[0]
print('\nLanguage: {}'.format(detectedLanguage.primary_language.name))
# Get sentiment
sentimentAnalysis = ai_client.analyze_sentiment(documents=[text])[0]
print("\nSentiment: {}".format(sentimentAnalysis.sentiment))
# Get key phrases
phrases = ai_client.extract_key_phrases(documents=[text])[0].key_phrases
if len(phrases) > 0:
print("\nKey Phrases:")
for phrase in phrases:
print('\t{}'.format(phrase))
# Get entities
entities = ai_client.recognize_entities(documents=[text])[0].entities
if len(entities) > 0:
print("\nEntities")
for entity in entities:
print('\t{} ({})'.format(entity.text, entity.category))
# Get linked entities
entities = ai_client.recognize_linked_entities(documents=[text])[0].entities
if len(entities) > 0:
print("\nLinks")
for linked_entity in entities:
print('\t{} ({})'.format(linked_entity.name, linked_entity.url))
except Exception as ex:
print(ex)
if __name__ == "__main__":
main()
import os
from dotenv import load_dotenv
# Add Azure OpenAI package
from openai import AzureOpenAI
def main():
try:
# Get configuration settings
load_dotenv()
azure_oai_endpoint = os.getenv("AZURE_OAI_ENDPOINT")
azure_oai_key = os.getenv("AZURE_OAI_KEY")
azure_oai_model = os.getenv("AZURE_OAI_MODEL")
# Read text from file
text = open(file="../text-files/sample-text.txt", encoding="utf8").read()
print("\nSending request for summary to Azure OpenAI endpoint...\n\n")
client = AzureOpenAI(
azure_endpoint = azure_oai_endpoint,
api_key=azure_oai_key,
api_version="2023-05-15"
)
# Send request to Azure OpenAI model
response = client.chat.completions.create(
model=azure_oai_model,
temperature=0.7,
max_tokens=120,
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Summarize the following text in 20 words or less:\n" + text}
]
)
print("Summary: " + response.choices[0].message.content + "\n")
except Exception as ex:
print(ex)
if __name__ == '__main__':
main()
pip install azure-ai-vision==0.15.1b1
import os
from dotenv import load_dotenv
# Add Azure OpenAI package
from openai import AzureOpenAI
# Set to True to print the full response from OpenAI for each call
printFullResponse = False
def main():
try:
# Get configuration settings
load_dotenv()
azure_oai_endpoint = os.getenv("AZURE_OAI_ENDPOINT")
azure_oai_key = os.getenv("AZURE_OAI_KEY")
azure_oai_model = os.getenv("AZURE_OAI_MODEL")
# Configure the Azure OpenAI client
client = AzureOpenAI(
azure_endpoint = azure_oai_endpoint,
api_key=azure_oai_key,
api_version="2023-05-15"
)
while True:
print('1: Basic prompt (no prompt engineering)\n' +
'2: Prompt with email formatting and basic system message\n' +
'3: Prompt with formatting and specifying content\n' +
'4: Prompt adjusting system message to be light and use jokes\n' +
'\'quit\' to exit the program\n')
command = input('Enter a number:')
if command == '1':
call_openai_model(messages="../prompts/basic.txt", model=azure_oai_model, client=client)
elif command =='2':
call_openai_model(messages="../prompts/email-format.txt", model=azure_oai_model, client=client)
elif command =='3':
call_openai_model(messages="../prompts/specify-content.txt", model=azure_oai_model, client=client)
elif command =='4':
call_openai_model(messages="../prompts/specify-tone.txt", model=azure_oai_model, client=client)
elif command.lower() == 'quit':
print('Exiting program...')
break
else :
print("Invalid input. Please try again.")
except Exception as ex:
print(ex)
def call_openai_model(messages, model, client):
# In this sample, each file contains both the system and user messages
# First, read them into variables, strip whitespace, then build the messages array
file = open(file=messages, encoding="utf8")
system_message = file.readline().split(':', 1)[1].strip()
user_message = file.readline().split(':', 1)[1].strip()
# Print the messages to the console
print("System message: " + system_message)
print("User message: " + user_message)
# Format and send the request to the model
messages =[
{"role": "system", "content": system_message},
{"role": "user", "content": user_message},
]
# Call the Azure OpenAI model
response = client.chat.completions.create(
model=model,
messages=messages,
temperature=0.7,
max_tokens=800
)
if printFullResponse:
print(response)
print("Completion: \n\n" + response.choices[0].message.content + "\n")
if __name__ == '__main__':
main()
from dotenv import load_dotenv
import os
import time
from PIL import Image, ImageDraw
from matplotlib import pyplot as plt
# Import namespaces
import azure.ai.vision as sdk
def main():
global cv_client
try:
# Get Configuration Settings
load_dotenv()
ai_endpoint = os.getenv('AI_SERVICE_ENDPOINT')
ai_key = os.getenv('AI_SERVICE_KEY')
# Authenticate Azure AI Vision client
cv_client = sdk.VisionServiceOptions(ai_endpoint, ai_key)
# Menu for text reading functions
print('\n1: Use Read API for image (Lincoln.jpg)\n2: Read handwriting (Note.jpg)\nAny other key to quit\n')
command = input('Enter a number:')
if command == '1':
image_file = os.path.join('images','Lincoln.jpg')
GetTextRead(image_file)
elif command =='2':
image_file = os.path.join('images','Note.jpg')
GetTextRead(image_file)
except Exception as ex:
print(ex)
def GetTextRead(image_file):
print('\n')
# Use Analyze image function to read text in image
# Use Analyze image function to read text in image
print('Reading text in {}\n'.format(image_file))
analysis_options = sdk.ImageAnalysisOptions()
features = analysis_options.features = (
# Specify features to be retrieved
sdk.ImageAnalysisFeature.TEXT
)
# Get image analysis
image = sdk.VisionSource(image_file)
image_analyzer = sdk.ImageAnalyzer(cv_client, image, analysis_options)
result = image_analyzer.analyze()
if result.reason == sdk.ImageAnalysisResultReason.ANALYZED:
# Get image captions
if result.text is not None:
print("\nText:")
# Prepare image for drawing
image = Image.open(image_file)
fig = plt.figure(figsize=(image.width/100, image.height/100))
plt.axis('off')
draw = ImageDraw.Draw(image)
color = 'cyan'
for line in result.text.lines:
# Return the text detected in the image
# Return the text detected in the image
print(line.content)
drawLinePolygon = True
r = line.bounding_polygon
bounding_polygon = ((r[0], r[1]),(r[2], r[3]),(r[4], r[5]),(r[6], r[7]))
# Return each line detected in the image and the position bounding box around each line
# Return each word detected in the image and the position bounding box around each word with the confidence level of each word
# Return each word detected in the image and the position bounding box around each word with the confidence level of each word
for word in line.words:
r = word.bounding_polygon
bounding_polygon = ((r[0], r[1]),(r[2], r[3]),(r[4], r[5]),(r[6], r[7]))
print(" Word: '{}', Bounding Polygon: {}, Confidence: {}".format(word.content, bounding_polygon,word.confidence))
# Draw word bounding polygon
drawLinePolygon = False
draw.polygon(bounding_polygon, outline=color, width=3)
# Draw line bounding polygon
if drawLinePolygon:
draw.polygon(bounding_polygon, outline=color, width=3)
# Save image
plt.imshow(image)
plt.tight_layout(pad=0)
outputfile = 'text.jpg'
fig.savefig(outputfile)
print('\n Results saved in', outputfile)
if __name__ == "__main__":
main()
from dotenv import load_dotenv
from datetime import datetime
import os
# Import namespaces
import azure.cognitiveservices.speech as speech_sdk
from playsound import playsound
def main():
try:
global speech_config
# Get Configuration Settings
load_dotenv()
ai_key = os.getenv('SPEECH_KEY')
ai_region = os.getenv('SPEECH_REGION')
# Configure speech service
speech_config = speech_sdk.SpeechConfig(subscription=ai_key, region=ai_region)
print('Ready to use speech service in:', speech_config.region)
# Get spoken input
command = TranscribeCommand()
if command.lower() == 'what time is it?':
TellTime()
except Exception as ex:
print(ex)
def TranscribeCommand():
command = ''
# Configure speech recognition
current_dir = os.getcwd()
audioFile = current_dir + '\\time.wav'
playsound(audioFile)
audio_config = speech_sdk.AudioConfig(filename=audioFile)
speech_recognizer = speech_sdk.SpeechRecognizer(speech_config, audio_config)
# Process speech input
speech = speech_recognizer.recognize_once_async().get()
print(speech.text)
if speech.reason == speech_sdk.ResultReason.RecognizedSpeech:
command = speech.text
print(command)
else:
print(speech.reason)
if speech.reason == speech_sdk.ResultReason.Canceled:
cancellation = speech.cancellation_details
print(cancellation.reason)
print(cancellation.error_details)
# Return the command
return command
def TellTime():
now = datetime.now()
response_text = 'The time is {}:{:02d}'.format(now.hour,now.minute)
# Configure speech synthesis
speech_config.speech_synthesis_voice_name = "en-GB-RyanNeural"
speech_synthesizer = speech_sdk.SpeechSynthesizer(speech_config)
# Synthesize spoken output
speak = speech_synthesizer.speak_text_async(response_text).get()
if speak.reason != speech_sdk.ResultReason.SynthesizingAudioCompleted:
print(speak.reason)
# Print the response
print(response_text)
if __name__ == "__main__":
main()
@vicradon
Copy link
Author

vicradon commented Feb 5, 2024

A dense enchanted forest where treehouses are connected by suspended bridges, and diverse elves and creatures celebrate under giant luminescent mushrooms

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment