Skip to content

Instantly share code, notes, and snippets.

@vkurpad
Last active September 23, 2022 06:31
Show Gist options
  • Save vkurpad/030213538bab2ee4332776649ced1062 to your computer and use it in GitHub Desktop.
Save vkurpad/030213538bab2ee4332776649ced1062 to your computer and use it in GitHub Desktop.
!pip install azure-ai-formrecognizer
!pip install azure-storage-blob
def train_model(form_training_client, container_sas_url, container_name):
poller = form_training_client.begin_training(
container_sas_url, use_training_labels=True, model_name=container_name
)
model = poller.result()
# Custom model information
print("Model ID: {}".format(model.model_id))
print("Status: {}".format(model.status))
print("Model name: {}".format(model.model_name))
print("Is this a composed model?: {}".format(model.properties.is_composed_model))
print("Training started on: {}".format(model.training_started_on))
print("Training completed on: {}".format(model.training_completed_on))
print("Recognized fields:")
# looping through the submodels, which contains the fields they were trained on
# The labels are based on the ones you gave the training document.
for submodel in model.submodels:
print("...The submodel has model ID: {}".format(submodel.model_id))
print("...The submodel with form type {} has an average accuracy '{}'".format(
submodel.form_type, submodel.accuracy
))
for name, field in submodel.fields.items():
print("...The model found the field '{}' with an accuracy of {}".format(
name, field.accuracy
))
# Training result information
for doc in model.training_documents:
print("Document name: {}".format(doc.name))
print("Document status: {}".format(doc.status))
print("Document page count: {}".format(doc.page_count))
print("Document errors: {}".format(doc.errors))
return model
from azure.storage.blob import BlobServiceClient
from datetime import datetime, timedelta
from azure.storage.blob import BlobClient, generate_container_sas, ContainerSasPermissions
from azure.ai.formrecognizer import FormTrainingClient
endpoint = "Your Form Recognizer Endpoint"
key = "Your Form Recognizer Key"
storage_account = "Your storage account name"
storage_key = "Your storage key"
service = BlobServiceClient(account_url=f"https://{storage_account}.blob.core.windows.net/", credential=storage_key)
containers = service.list_containers()
form_training_client = FormTrainingClient(endpoint, AzureKeyCredential(key))
models = []
for container in containers:
print(container.name)
sas_container = generate_container_sas(account_name=storage_account,
container_name=container.name ,
account_key=storage_key,
permission=ContainerSasPermissions(read=True, write=True, list=True),
expiry=datetime.utcnow() + timedelta(hours=1))
print(sas_blob)
container_name = container.name
sas_uri = f"https://{storage_account}.blob.core.windows.net/{container_name}?{sas_container}"
print(sas_uri)
models.add( train_model(form_training_client, sas_uri, "test"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment