Skip to content

Instantly share code, notes, and snippets.

@masitings
Created January 31, 2024 18:41
Show Gist options
  • Save masitings/10a164a7799b8547ca018728b00c59fc to your computer and use it in GitHub Desktop.
Save masitings/10a164a7799b8547ca018728b00c59fc to your computer and use it in GitHub Desktop.
document_ai_ocr_ktp.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyNzM7ZYYooQ/JA3K9asYYNZ",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/masitings/10a164a7799b8547ca018728b00c59fc/document_ai_ocr_ktp.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "nIM7nIARbLDf",
"outputId": "5531638a-dbfa-4c38-9aa7-77d62c8dc52f"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
]
}
],
"source": [
"from google.colab import drive\n",
"import os\n",
"\n",
"drive.mount('/content/drive')"
]
},
{
"cell_type": "code",
"source": [
"!pip install google-cloud-documentai\n",
"!pip install orjson\n",
"!pip install google-cloud"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "a4Sk3h1yesv9",
"outputId": "b4ffa9d4-e47a-4c08-98c2-27827d5e0ff4"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: google-cloud-documentai in /usr/local/lib/python3.10/dist-packages (2.21.1)\n",
"Requirement already satisfied: google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-documentai) (2.11.1)\n",
"Requirement already satisfied: proto-plus<2.0.0dev,>=1.22.3 in /usr/local/lib/python3.10/dist-packages (from google-cloud-documentai) (1.23.0)\n",
"Requirement already satisfied: protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5 in /usr/local/lib/python3.10/dist-packages (from google-cloud-documentai) (3.20.3)\n",
"Requirement already satisfied: googleapis-common-protos<2.0.dev0,>=1.56.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (1.62.0)\n",
"Requirement already satisfied: google-auth<3.0.dev0,>=2.14.1 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (2.17.3)\n",
"Requirement already satisfied: requests<3.0.0.dev0,>=2.18.0 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (2.31.0)\n",
"Requirement already satisfied: grpcio<2.0dev,>=1.33.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (1.60.0)\n",
"Requirement already satisfied: grpcio-status<2.0.dev0,>=1.33.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (1.48.2)\n",
"Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (5.3.2)\n",
"Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (0.3.0)\n",
"Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (1.16.0)\n",
"Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (4.9)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0.dev0,>=2.18.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0.dev0,>=2.18.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (3.6)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0.dev0,>=2.18.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (2.0.7)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0.dev0,>=2.18.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (2023.11.17)\n",
"Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3.0.dev0,>=2.14.1->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0->google-cloud-documentai) (0.5.1)\n",
"Requirement already satisfied: orjson in /usr/local/lib/python3.10/dist-packages (3.9.12)\n",
"Collecting google-cloud\n",
" Downloading google_cloud-0.34.0-py2.py3-none-any.whl (1.8 kB)\n",
"Installing collected packages: google-cloud\n",
"Successfully installed google-cloud-0.34.0\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = f'/content/drive/MyDrive/docai/credential.json'"
],
"metadata": {
"id": "ogEv8AoDfmuJ"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import base64\n",
"from google.cloud import documentai\n",
"from google.api_core.client_options import ClientOptions\n",
"\n",
"\n",
"opts = ClientOptions(api_endpoint=\"us-documentai.googleapis.com\")\n",
"\n",
"client = documentai.DocumentProcessorServiceClient(client_options=opts)\n",
"\n",
"name = client.processor_version_path('760497433370', 'us', '615d49163b214b2', 'pretrained-foundation-model-v1.0-2023-08-22')\n",
"\n",
"with open('/content/drive/MyDrive/ocr/dataset/ktp5.png', 'rb') as image_file:\n",
" fileStream = image_file.read()\n",
"\n",
"raw_document = documentai.RawDocument(content=fileStream, mime_type='image/png')\n",
"\n",
"request = documentai.ProcessRequest(\n",
" name=name,\n",
" raw_document=raw_document,\n",
" field_mask='entities',\n",
" process_options=None\n",
")\n",
"\n",
"result = client.process_document(request=request)\n",
"\n",
"document = result.document\n",
"\n",
"ktp = dict()\n",
"\n",
"for entity in document.entities:\n",
" ktp[entity.type_] = entity.mention_text\n",
"\n",
"print(ktp)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "FNmlKNSbfzEt",
"outputId": "9c36690d-876e-46b2-af3b-612d4f898fa8"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{'birth_place': 'NIAS,', 'nik': '1204050503670001', 'name': 'EDO FURNAMA', 'city': 'KABUPATEN NIAS', 'birth_date': '05-03-1967', 'province': 'PROVINSI SUMATERA UTARA'}\n"
]
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment