Skip to content

Instantly share code, notes, and snippets.

@aribornstein
Created April 10, 2019 19:24
Show Gist options
  • Save aribornstein/f7db19d95c61a82c8156e79078e874e7 to your computer and use it in GitHub Desktop.
Save aribornstein/f7db19d95c61a82c8156e79078e874e7 to your computer and use it in GitHub Desktop.
Azure Cognitive Services OCR From File Example
import requests
# If you are using a Jupyter notebook, uncomment the following line.
#%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from PIL import Image
from io import BytesIO
# Replace <Subscription Key> with your valid subscription key.
subscription_key = ""
assert subscription_key
# You must use the same region in your REST call as you used to get your
# subscription keys. For example, if you got your subscription keys from
# westus, replace "westcentralus" in the URI below with "westus".
#
# Free trial subscription keys are generated in the "westus" region.
# If you use a free trial subscription key, you shouldn't need to change
# this region.
vision_base_url = "https://westeurope.api.cognitive.microsoft.com/vision/v2.0/"
analyze_url = vision_base_url + "ocr"
# Set image_path to the local path of an image that you want to analyze.
image_path = "foo.png"
# Read the image into a byte array
image_data = open(image_path, "rb").read()
headers = {'Ocp-Apim-Subscription-Key': subscription_key,
'Content-Type': 'application/octet-stream'}
params = {'language': 'unk', 'detectOrientation': 'true'}
response = requests.post(
analyze_url, headers=headers, params=params, data=image_data)
response.raise_for_status()
# The 'analysis' object contains various fields that describe the image. The most
# relevant caption for the image is obtained from the 'description' property.
analysis = response.json()
print(analysis)
# Extract the word bounding boxes and text.
line_infos = [region["lines"] for region in analysis["regions"]]
word_infos = []
for line in line_infos:
for word_metadata in line:
for word_info in word_metadata["words"]:
word_infos.append(word_info)
word_infos
# Display the image and overlay it with the extracted text.
plt.figure(figsize=(5, 5))
image = Image.open(BytesIO(image_data))
ax = plt.imshow(image, alpha=0.5)
for word in word_infos:
bbox = [int(num) for num in word["boundingBox"].split(",")]
text = word["text"]
origin = (bbox[0], bbox[1])
patch = Rectangle(origin, bbox[2], bbox[3], fill=False, linewidth=2, color='y')
ax.axes.add_patch(patch)
plt.text(origin[0], origin[1], text, fontsize=20, weight="bold", va="top")
plt.axis("off")
print("OCR Text", " ".join([word["text"] for word in word_infos]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment