Last active
October 26, 2021 13:18
-
-
Save csiebler/9731bdea11edd12a2e7c99efb03378f9 to your computer and use it in GitHub Desktop.
Comparison of sequential vs parallel Azure Read API processing time
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import io | |
import time | |
n = 50 | |
# Enter your resource details here | |
url = "https://xxxxxxx.cognitiveservices.azure.com/vision/v3.2/read/analyze?language=en&pages=1&readingOrder=natural" | |
key = "xxxxxxx" | |
headers = { | |
"Content-Type": "application/json", | |
"Ocp-Apim-Subscription-Key": key | |
} | |
def start_ocr(url, headers): | |
json = { | |
"url": "https://docs.microsoft.com/en-us/azure/cognitive-services/computer-vision/images/description.png" | |
} | |
start_time = time.time() | |
result = requests.post(url, json=json, headers=headers) | |
assert(result.status_code == 202) | |
duration = time.time() - start_time | |
if (duration < 0.1): | |
time.sleep(0.1-duration) # Ensure we stay under 10 TPS | |
return result.headers['Operation-Location'] | |
def wait_for_results(url, headers): | |
status = "running" | |
while(status != "succeeded"): | |
start_time = time.time() | |
status = requests.get(url, headers=headers).json()['status'] | |
duration = time.time() - start_time | |
if (duration < 0.1): | |
time.sleep(0.1-duration) # Ensure we stay under 10 TPS | |
### Run sequential test #### | |
start_time_seq = time.time() | |
for i in range(0,n): | |
r_url = start_ocr(url, headers) | |
wait_for_results(r_url, headers) | |
# Do something with the results :) | |
duration_seq = time.time() - start_time_seq | |
print(f"Sequential batch of {n} took {duration_seq} seconds") | |
#### Run parallel test #### | |
start_time_parallel = time.time() | |
urls = [] | |
for i in range(0,n): | |
urls.append(start_ocr(url, headers)) | |
assert(len(urls) == n) | |
for url in urls: | |
wait_for_results(url, headers) | |
# Do something with the results :) | |
duration_parallel = time.time() - start_time_parallel | |
print(f"Parallel batch of {n} took {duration_parallel} seconds ({int(100*duration_parallel/duration_seq)}%)") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Output for
westeurope
: