Created
April 10, 2019 19:24
-
-
Save aribornstein/f7db19d95c61a82c8156e79078e874e7 to your computer and use it in GitHub Desktop.
Azure Cognitive Services OCR From File Example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
# If you are using a Jupyter notebook, uncomment the following line. | |
#%matplotlib inline | |
import matplotlib.pyplot as plt | |
from matplotlib.patches import Rectangle | |
from PIL import Image | |
from io import BytesIO | |
# Replace <Subscription Key> with your valid subscription key. | |
subscription_key = "" | |
assert subscription_key | |
# You must use the same region in your REST call as you used to get your | |
# subscription keys. For example, if you got your subscription keys from | |
# westus, replace "westcentralus" in the URI below with "westus". | |
# | |
# Free trial subscription keys are generated in the "westus" region. | |
# If you use a free trial subscription key, you shouldn't need to change | |
# this region. | |
vision_base_url = "https://westeurope.api.cognitive.microsoft.com/vision/v2.0/" | |
analyze_url = vision_base_url + "ocr" | |
# Set image_path to the local path of an image that you want to analyze. | |
image_path = "foo.png" | |
# Read the image into a byte array | |
image_data = open(image_path, "rb").read() | |
headers = {'Ocp-Apim-Subscription-Key': subscription_key, | |
'Content-Type': 'application/octet-stream'} | |
params = {'language': 'unk', 'detectOrientation': 'true'} | |
response = requests.post( | |
analyze_url, headers=headers, params=params, data=image_data) | |
response.raise_for_status() | |
# The 'analysis' object contains various fields that describe the image. The most | |
# relevant caption for the image is obtained from the 'description' property. | |
analysis = response.json() | |
print(analysis) | |
# Extract the word bounding boxes and text. | |
line_infos = [region["lines"] for region in analysis["regions"]] | |
word_infos = [] | |
for line in line_infos: | |
for word_metadata in line: | |
for word_info in word_metadata["words"]: | |
word_infos.append(word_info) | |
word_infos | |
# Display the image and overlay it with the extracted text. | |
plt.figure(figsize=(5, 5)) | |
image = Image.open(BytesIO(image_data)) | |
ax = plt.imshow(image, alpha=0.5) | |
for word in word_infos: | |
bbox = [int(num) for num in word["boundingBox"].split(",")] | |
text = word["text"] | |
origin = (bbox[0], bbox[1]) | |
patch = Rectangle(origin, bbox[2], bbox[3], fill=False, linewidth=2, color='y') | |
ax.axes.add_patch(patch) | |
plt.text(origin[0], origin[1], text, fontsize=20, weight="bold", va="top") | |
plt.axis("off") | |
print("OCR Text", " ".join([word["text"] for word in word_infos])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment