nyck33 · July 15, 2023 02:31
diff --git a/trocer-multiline-attempt.ipynb b/trocer-multiline-attempt.ipynb
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "mount_file_id": "1GYx0_XFeebWyvLqJF303HZyTGLv0l6u6",
      "authorship_tag": "ABX9TyOYBmm36s8AbRk9dn7Nix+d",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "bff71520ffd443c6b0828678c6fb162e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_5f4ccb51c9d145c6b846343a0830fc59",
              "IPY_MODEL_240554008a9440f8b756be51ef05ea17",
              "IPY_MODEL_c36a17da5f9e4376950040110f02d849"
            ],
            "layout": "IPY_MODEL_f8ef55483ebc4b2badab984d489f2778"
          }
        },
        "5f4ccb51c9d145c6b846343a0830fc59": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_414fe69f70844be3968ee5aafa7ef9c9",
            "placeholder": "",
            "style": "IPY_MODEL_6612ff5b8af54425ad62e412cdbbd799",
            "value": "Downloading (…)lve/main/config.json: 100%"
          }
        },
        "240554008a9440f8b756be51ef05ea17": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_ee9248bb92e64283841b4c5541632290",
            "max": 4242,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_6f32e82119e3455aa3f9088b806af894",
            "value": 4242
          }
        },
        "c36a17da5f9e4376950040110f02d849": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_84f60735af0a4c7ca165814f93484fee",
            "placeholder": "",
            "style": "IPY_MODEL_e81558f67dba4059986a199e81c0cd01",
            "value": " 4.24k/4.24k [00:00&lt;00:00, 126kB/s]"
          }
        },
        "f8ef55483ebc4b2badab984d489f2778": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "414fe69f70844be3968ee5aafa7ef9c9": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6612ff5b8af54425ad62e412cdbbd799": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "ee9248bb92e64283841b4c5541632290": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6f32e82119e3455aa3f9088b806af894": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "84f60735af0a4c7ca165814f93484fee": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e81558f67dba4059986a199e81c0cd01": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "ed0f2d648365425ea5a1005f2dc13d79": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_bf9d999b077a4993a5e0b89dbb8e91ea",
              "IPY_MODEL_09bfb53089f94be09795bbf887c270e0",
              "IPY_MODEL_3ac2b54f224c4dd885191bd5d769e080"
            ],
            "layout": "IPY_MODEL_d0f3ee9e40b146a0bab6897ba506bb18"
          }
        },
        "bf9d999b077a4993a5e0b89dbb8e91ea": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_20ac01d5cef44ce8a337c58ea06ad784",
            "placeholder": "",
            "style": "IPY_MODEL_384e491be51241ae9651e7e795b49f99",
            "value": "Downloading pytorch_model.bin: 100%"
          }
        },
        "09bfb53089f94be09795bbf887c270e0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b18d5bce791e48eaa81f39cb78557b18",
            "max": 2432726799,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_abb30caff22047528756dd929c1fb22e",
            "value": 2432726799
          }
        },
        "3ac2b54f224c4dd885191bd5d769e080": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_468a2f39dac8431994c0575058b4b1ad",
            "placeholder": "",
            "style": "IPY_MODEL_66ef9a74265c451ea4d9dd7800ea32b5",
            "value": " 2.43G/2.43G [00:30&lt;00:00, 86.5MB/s]"
          }
        },
        "d0f3ee9e40b146a0bab6897ba506bb18": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "20ac01d5cef44ce8a337c58ea06ad784": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "384e491be51241ae9651e7e795b49f99": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "b18d5bce791e48eaa81f39cb78557b18": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "abb30caff22047528756dd929c1fb22e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "468a2f39dac8431994c0575058b4b1ad": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "66ef9a74265c451ea4d9dd7800ea32b5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/nyck33/5c9d191cdf547631f23c9f2d61881379/trocer-multiline-attempt.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Bard says:\n",
        "Yes, TrOCR can do OCR on more than one line at a time. It was specifically designed to do document-level OCR, which means that it can recognize text from a full-page image that contains multiple lines of text. However, it is important to note that the accuracy of TrOCR on multiline images will depend on the quality of the image and the layout of the text. If the image is blurry or the text is poorly aligned, TrOCR may not be able to recognize all of the text correctly.\n",
        "\n",
        "There are a few different ways to feed an entire document to TrOCR. One way is to use a text segmentation algorithm to split the document into individual lines, and then feed each line separately to TrOCR. Another way is to use a pre-trained model that has been fine-tuned on a dataset of multiline images. This will improve the accuracy of TrOCR on multiline images, but it will also require more training data.\n",
        "\n",
        "Here are some resources that you may find helpful:\n",
        "\n",
        "* TrOCR: Transformer-based Optical Character Recognition with Pre-trained Models: https://arxiv.org/abs/2212.05525\n",
        "* Extending TrOCR for Text Localization-Free OCR of Full-Page Scanned Receipt Images: https://arxiv.org/abs/2203.15562\n",
        "* How to run inference on multiline text image: https://github.com/microsoft/unilm/issues/628\n",
        "\n",
        "Then Bard says for Hugging Face:\n",
        "\n",
        "Yes, there is a Hugging Face TrOCR model for multi-line images. It is called **microsoft/trocr-large-stage1** and it was trained on a dataset of multiline images. You can use this model to recognize text from a full-page image that contains multiple lines of text.\n",
        "\n",
        "Here is the link to the model on the Hugging Face Hub: https://huggingface.co/microsoft/trocr-large-stage1\n",
        "\n",
        "To use this model, you will need to install the Transformers library. Once you have installed the Transformers library, you can load the model using the following code:\n",
        "\n",
        "```python\n",
        "from transformers import TrOCRModel\n",
        "\n",
        "model = TrOCRModel.from_pretrained(\"microsoft/trocr-large-stage1\")\n",
        "```\n",
        "\n",
        "Once you have loaded the model, you can use it to recognize text from an image by passing the image to the model's `predict()` method. The `predict()` method will return a list of strings, each of which represents a line of text that was recognized in the image.\n",
        "\n",
        "Here is an example of how to use the model to recognize text from an image:\n",
        "\n",
        "```python\n",
        "import PIL.Image\n",
        "\n",
        "image = PIL.Image.open(\"my_image.jpg\")\n",
        "\n",
        "predictions = model.predict(image)\n",
        "\n",
        "for prediction in predictions:\n",
        "    print(prediction)\n",
        "```\n",
        "\n",
        "This code will print a list of strings, each of which represents a line of text that was recognized in the image.\n",
        "\n",
        "I hope this helps!\n",
        "\n"
      ],
      "metadata": {
        "id": "0aEEdxOu58Nj"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "! pip install transformers\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "OTdY4wlH6avb",
        "outputId": "5fd80c39-06c8-479f-c10f-ad697bb5903d"
      },
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting transformers\n",
            "  Downloading transformers-4.30.2-py3-none-any.whl (7.2 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.2/7.2 MB\u001b[0m \u001b[31m43.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.2)\n",
            "Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)\n",
            "  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m23.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.22.4)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.1)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2022.10.31)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.27.1)\n",
            "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)\n",
            "  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m69.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting safetensors>=0.3.1 (from transformers)\n",
            "  Downloading safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m41.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.65.0)\n",
            "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (2023.6.0)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (4.7.1)\n",
            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (1.26.16)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.5.7)\n",
            "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.12)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n",
            "Installing collected packages: tokenizers, safetensors, huggingface-hub, transformers\n",
            "Successfully installed huggingface-hub-0.16.4 safetensors-0.3.1 tokenizers-0.13.3 transformers-4.30.2\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "! pip install pillow"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "lxwdiaDj8Pb2",
        "outputId": "900871ab-87b3-4e1b-fbfb-3a14d96f6354"
      },
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: pillow in /usr/local/lib/python3.10/dist-packages (8.4.0)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "import PIL.Image\n",
        "\n",
        "def get_image_list(directory):\n",
        "  \"\"\"Returns a list of images in the specified directory.\"\"\"\n",
        "  image_list = []\n",
        "  for file in os.listdir(directory):\n",
        "    if file.endswith(\".jpg\") or file.endswith(\".jpeg\") or file.endswith(\".png\"):\n",
        "      image = PIL.Image.open(os.path.join(directory, file))\n",
        "      image_list.append(image)\n",
        "  return image_list\n",
        "\n",
        "img_dir = \"/content/drive/MyDrive/1_BardESL/images/\"\n",
        "\n"
      ],
      "metadata": {
        "id": "o542iQga8NXv"
      },
      "execution_count": 13,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Install CRAFT to detect lines and hopefully get lines of text from multiline documents\n"
      ],
      "metadata": {
        "id": "5A4FUnxxKN0x"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "! git clone https://github.com/clovaai/CRAFT-pytorch.git"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "rhPHS_BZKWuh",
        "outputId": "36862757-3669-49ff-8343-001c9831ff19"
      },
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Cloning into 'CRAFT-pytorch'...\n",
            "remote: Enumerating objects: 59, done.\u001b[K\n",
            "remote: Total 59 (delta 0), reused 0 (delta 0), pack-reused 59\u001b[K\n",
            "Unpacking objects: 100% (59/59), 1.69 MiB | 9.21 MiB/s, done.\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "%cd /content/CRAFT-pytorch/\n",
        "! pip install -r requirements.txt"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "IXDkTTM_KacA",
        "outputId": "b3eb2720-df18-4bf0-aff8-1d1535ac5e15"
      },
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "/content/CRAFT-pytorch\n",
            "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 1)) (2.0.1+cu118)\n",
            "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 2)) (0.15.2+cu118)\n",
            "Requirement already satisfied: opencv-python in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 3)) (4.7.0.72)\n",
            "Requirement already satisfied: scikit-image in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 4)) (0.19.3)\n",
            "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 5)) (1.10.1)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements.txt (line 1)) (3.12.2)\n",
            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements.txt (line 1)) (4.7.1)\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements.txt (line 1)) (1.11.1)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements.txt (line 1)) (3.1)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements.txt (line 1)) (3.1.2)\n",
            "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements.txt (line 1)) (2.0.0)\n",
            "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->-r requirements.txt (line 1)) (3.25.2)\n",
            "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->-r requirements.txt (line 1)) (16.0.6)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from torchvision->-r requirements.txt (line 2)) (1.22.4)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from torchvision->-r requirements.txt (line 2)) (2.27.1)\n",
            "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.10/dist-packages (from torchvision->-r requirements.txt (line 2)) (8.4.0)\n",
            "Requirement already satisfied: imageio>=2.4.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image->-r requirements.txt (line 4)) (2.25.1)\n",
            "Requirement already satisfied: tifffile>=2019.7.26 in /usr/local/lib/python3.10/dist-packages (from scikit-image->-r requirements.txt (line 4)) (2023.7.10)\n",
            "Requirement already satisfied: PyWavelets>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image->-r requirements.txt (line 4)) (1.4.1)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from scikit-image->-r requirements.txt (line 4)) (23.1)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->-r requirements.txt (line 1)) (2.1.3)\n",
            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision->-r requirements.txt (line 2)) (1.26.16)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision->-r requirements.txt (line 2)) (2023.5.7)\n",
            "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision->-r requirements.txt (line 2)) (2.0.12)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision->-r requirements.txt (line 2)) (3.4)\n",
            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->-r requirements.txt (line 1)) (1.3.0)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "! wget https://drive.google.com/open?id=1Jk4eGD7crsqCCg9C9VjCLkMN3ze8kutZ"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "yHDEGep8QriA",
        "outputId": "fe25fe27-9fb9-4e73-9765-ea66abf7cbe7"
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "--2023-07-15 01:56:18--  https://drive.google.com/open?id=1Jk4eGD7crsqCCg9C9VjCLkMN3ze8kutZ\n",
            "Resolving drive.google.com (drive.google.com)... 173.194.210.100, 173.194.210.102, 173.194.210.113, ...\n",
            "Connecting to drive.google.com (drive.google.com)|173.194.210.100|:443... connected.\n",
            "HTTP request sent, awaiting response... 307 Temporary Redirect\n",
            "Location: https://drive.google.com/file/d/1Jk4eGD7crsqCCg9C9VjCLkMN3ze8kutZ/view?usp=drive_open [following]\n",
            "--2023-07-15 01:56:18--  https://drive.google.com/file/d/1Jk4eGD7crsqCCg9C9VjCLkMN3ze8kutZ/view?usp=drive_open\n",
            "Reusing existing connection to drive.google.com:443.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: unspecified [text/html]\n",
            "Saving to: ‘open?id=1Jk4eGD7crsqCCg9C9VjCLkMN3ze8kutZ’\n",
            "\n",
            "open?id=1Jk4eGD7crs     [ <=>                ]  77.89K  --.-KB/s    in 0.004s  \n",
            "\n",
            "2023-07-15 01:56:19 (17.2 MB/s) - ‘open?id=1Jk4eGD7crsqCCg9C9VjCLkMN3ze8kutZ’ saved [79760]\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "images = get_image_list(img_dir)\n",
        "\n",
        "len(images)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "br6UO-P2RYuj",
        "outputId": "c9042973-1646-4802-fb7f-3f3fbf22c71e"
      },
      "execution_count": 14,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "5"
            ]
          },
          "metadata": {},
          "execution_count": 14
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "! pip install --upgrade torchvision==0.13"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 609
        },
        "id": "o-tO1-6tUn_D",
        "outputId": "ebdd955d-ee39-4131-9e0f-748600229473"
      },
      "execution_count": 19,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting torchvision==0.13\n",
            "  Downloading torchvision-0.13.0-cp310-cp310-manylinux1_x86_64.whl (19.1 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.1/19.1 MB\u001b[0m \u001b[31m66.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torchvision==0.13) (4.7.1)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from torchvision==0.13) (1.22.4)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from torchvision==0.13) (2.27.1)\n",
            "Collecting torch==1.12.0 (from torchvision==0.13)\n",
            "  Downloading torch-1.12.0-cp310-cp310-manylinux1_x86_64.whl (776.3 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m776.3/776.3 MB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.10/dist-packages (from torchvision==0.13) (8.4.0)\n",
            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision==0.13) (1.26.16)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision==0.13) (2023.5.7)\n",
            "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision==0.13) (2.0.12)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision==0.13) (3.4)\n",
            "Installing collected packages: torch, torchvision\n",
            "  Attempting uninstall: torch\n",
            "    Found existing installation: torch 2.0.1+cu118\n",
            "    Uninstalling torch-2.0.1+cu118:\n",
            "      Successfully uninstalled torch-2.0.1+cu118\n",
            "  Attempting uninstall: torchvision\n",
            "    Found existing installation: torchvision 0.15.2+cu118\n",
            "    Uninstalling torchvision-0.15.2+cu118:\n",
            "      Successfully uninstalled torchvision-0.15.2+cu118\n",
            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
            "torchaudio 2.0.2+cu118 requires torch==2.0.1, but you have torch 1.12.0 which is incompatible.\n",
            "torchdata 0.6.1 requires torch==2.0.1, but you have torch 1.12.0 which is incompatible.\n",
            "torchtext 0.15.2 requires torch==2.0.1, but you have torch 1.12.0 which is incompatible.\u001b[0m\u001b[31m\n",
            "\u001b[0mSuccessfully installed torch-1.12.0 torchvision-0.13.0\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.colab-display-data+json": {
              "pip_warning": {
                "packages": [
                  "torch"
                ]
              }
            }
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "%cd /content/CRAFT-pytorch\n",
        "! pwd"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "JkB2zdshVPYm",
        "outputId": "b1090cac-32e6-4daf-edd0-68263f603a02"
      },
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "/content/CRAFT-pytorch\n",
            "/content/CRAFT-pytorch\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "! pip install cudatoolkit==11.8"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "snDIQC9fXx3Y",
        "outputId": "42fafc74-8ac7-4de5-ce82-540ae9be4bb2"
      },
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\u001b[31mERROR: Could not find a version that satisfies the requirement cudatoolkit==11.8 (from versions: none)\u001b[0m\u001b[31m\n",
            "\u001b[0m\u001b[31mERROR: No matching distribution found for cudatoolkit==11.8\u001b[0m\u001b[31m\n",
            "\u001b[0m"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "! python test.py --trained_model='/content/craft_mlt_25k.pth' --test_folder=img_dir"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "gYP6r-DoTo1T",
        "outputId": "a17b1a6f-4bf0-42f0-e369-212f2c28e5b8"
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "/usr/local/lib/python3.10/dist-packages/torchvision/models/_utils.py:252: UserWarning: Accessing the model URLs via the internal dictionary of the module is deprecated since 0.13 and will be removed in 0.15. Please access them via the appropriate Weights Enum instead.\n",
            "  warnings.warn(\n",
            "/usr/local/lib/python3.10/dist-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and will be removed in 0.15, please use 'weights' instead.\n",
            "  warnings.warn(\n",
            "/usr/local/lib/python3.10/dist-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and will be removed in 0.15. The current behavior is equivalent to passing `weights=None`.\n",
            "  warnings.warn(msg)\n",
            "Loading weights from checkpoint (/content/craft_mlt_25k.pth)\n",
            "Traceback (most recent call last):\n",
            "  File \"/content/CRAFT-pytorch/test.py\", line 128, in <module>\n",
            "    net.load_state_dict(copyStateDict(torch.load(args.trained_model)))\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/serialization.py\", line 713, in load\n",
            "    return _legacy_load(opened_file, map_location, pickle_module, **pickle_load_args)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/serialization.py\", line 930, in _legacy_load\n",
            "    result = unpickler.load()\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/serialization.py\", line 876, in persistent_load\n",
            "    wrap_storage=restore_location(obj, location),\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/serialization.py\", line 175, in default_restore_location\n",
            "    result = fn(storage, location)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/serialization.py\", line 152, in _cuda_deserialize\n",
            "    device = validate_cuda_device(location)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/serialization.py\", line 136, in validate_cuda_device\n",
            "    raise RuntimeError('Attempting to deserialize object on a CUDA '\n",
            "RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from transformers import TrOCRProcessor, VisionEncoderDecoderModel\n",
        "from transformers import TrOCRPreTrainedModel\n",
        "from PIL import Image\n",
        "\n",
        "model = TrOCRPreTrainedModel.from_pretrained(\"microsoft/trocr-large-stage1\")\n",
        "\n",
        "#model = VisionEncoderDecoderModel.from_pretrained(\"microsoft/trocr-large-stage1\")\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 516,
          "referenced_widgets": [
            "bff71520ffd443c6b0828678c6fb162e",
            "5f4ccb51c9d145c6b846343a0830fc59",
            "240554008a9440f8b756be51ef05ea17",
            "c36a17da5f9e4376950040110f02d849",
            "f8ef55483ebc4b2badab984d489f2778",
            "414fe69f70844be3968ee5aafa7ef9c9",
            "6612ff5b8af54425ad62e412cdbbd799",
            "ee9248bb92e64283841b4c5541632290",
            "6f32e82119e3455aa3f9088b806af894",
            "84f60735af0a4c7ca165814f93484fee",
            "e81558f67dba4059986a199e81c0cd01",
            "ed0f2d648365425ea5a1005f2dc13d79",
            "bf9d999b077a4993a5e0b89dbb8e91ea",
            "09bfb53089f94be09795bbf887c270e0",
            "3ac2b54f224c4dd885191bd5d769e080",
            "d0f3ee9e40b146a0bab6897ba506bb18",
            "20ac01d5cef44ce8a337c58ea06ad784",
            "384e491be51241ae9651e7e795b49f99",
            "b18d5bce791e48eaa81f39cb78557b18",
            "abb30caff22047528756dd929c1fb22e",
            "468a2f39dac8431994c0575058b4b1ad",
            "66ef9a74265c451ea4d9dd7800ea32b5"
          ]
        },
        "id": "nm7EO5Nw6bby",
        "outputId": "eb48f3e4-b97f-4068-f439-cfc49d80203d"
      },
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading (…)lve/main/config.json:   0%|          | 0.00/4.24k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "bff71520ffd443c6b0828678c6fb162e"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "You are using a model of type vision-encoder-decoder to instantiate a model of type trocr. This is not supported for all configurations of models and can yield errors.\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading pytorch_model.bin:   0%|          | 0.00/2.43G [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "ed0f2d648365425ea5a1005f2dc13d79"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "error",
          "ename": "KeyboardInterrupt",
          "evalue": "ignored",
          "traceback": [
            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
            "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
            "\u001b[0;32m<ipython-input-11-452b5348afff>\u001b[0m in \u001b[0;36m<cell line: 5>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mPIL\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mImage\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTrOCRPreTrainedModel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_pretrained\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"microsoft/trocr-large-stage1\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0;31m#model = VisionEncoderDecoderModel.from_pretrained(\"microsoft/trocr-large-stage1\")\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py\u001b[0m in \u001b[0;36mfrom_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m   2602\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mis_sharded\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mstate_dict\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2603\u001b[0m                 \u001b[0;31m# Time to load the checkpoint\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2604\u001b[0;31m                 \u001b[0mstate_dict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_state_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresolved_archive_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2605\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2606\u001b[0m             \u001b[0;31m# set dtype to instantiate the model under:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py\u001b[0m in \u001b[0;36mload_state_dict\u001b[0;34m(checkpoint_file)\u001b[0m\n\u001b[1;32m    461\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0msafe_load_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcheckpoint_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    462\u001b[0m     \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 463\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcheckpoint_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"cpu\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    464\u001b[0m     \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    465\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/serialization.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(f, map_location, pickle_module, weights_only, **pickle_load_args)\u001b[0m\n\u001b[1;32m    807\u001b[0m                     \u001b[0;32mexcept\u001b[0m \u001b[0mRuntimeError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    808\u001b[0m                         \u001b[0;32mraise\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mUnpicklingError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mUNSAFE_MESSAGE\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 809\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0m_load\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopened_zipfile\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpickle_module\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mpickle_load_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    810\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mweights_only\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    811\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/serialization.py\u001b[0m in \u001b[0;36m_load\u001b[0;34m(zip_file, map_location, pickle_module, pickle_file, **pickle_load_args)\u001b[0m\n\u001b[1;32m   1170\u001b[0m     \u001b[0munpickler\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mUnpicklerWrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mpickle_load_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1171\u001b[0m     \u001b[0munpickler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpersistent_load\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpersistent_load\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1172\u001b[0;31m     \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0munpickler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1173\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1174\u001b[0m     \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_utils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_loaded_sparse_tensors\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/serialization.py\u001b[0m in \u001b[0;36mpersistent_load\u001b[0;34m(saved_id)\u001b[0m\n\u001b[1;32m   1140\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1141\u001b[0m             \u001b[0mnbytes\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnumel\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_utils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_element_size\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1142\u001b[0;31m             \u001b[0mtyped_storage\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_tensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnbytes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_maybe_decode_ascii\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlocation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1143\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1144\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mtyped_storage\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/serialization.py\u001b[0m in \u001b[0;36mload_tensor\u001b[0;34m(dtype, numel, key, location)\u001b[0m\n\u001b[1;32m   1110\u001b[0m         \u001b[0mname\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf'data/{key}'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1111\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1112\u001b[0;31m         \u001b[0mstorage\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mzip_file\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_storage_from_record\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnumel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mUntypedStorage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_typed_storage\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_untyped_storage\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1113\u001b[0m         \u001b[0;31m# TODO: Once we decide to break serialization FC, we can\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1114\u001b[0m         \u001b[0;31m# stop wrapping with TypedStorage\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "processor = TrOCRProcessor.from_pretrained('microsoft/trocr-large-stage1')\n",
        "\n",
        "#training value\n",
        "#pixel_values = processor(image, return_tensors=\"pt\").pixel"
      ],
      "metadata": {
        "id": "OVcXXWVr_2no"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "all_img_predictions = []\n",
        "\n",
        "for img in images:\n",
        "  #predictions is a list of strings for each line\n",
        "  predictions = model.predict(img)\n",
        "  all_img_predictions.append(predictions)\n",
        "\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 356
        },
        "id": "1UGVEf0x6bmC",
        "outputId": "2ec3883c-e02d-4ec4-8490-08d0996f7891"
      },
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "error",
          "ename": "AttributeError",
          "evalue": "ignored",
          "traceback": [
            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
            "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
            "\u001b[0;32m<ipython-input-12-b3177112233c>\u001b[0m in \u001b[0;36m<cell line: 3>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mimg\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mimages\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m   \u001b[0;31m#predictions is a list of strings for each line\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m   \u001b[0mpredictions\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      6\u001b[0m   \u001b[0mall_img_predictions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpredictions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m   1612\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmodules\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1613\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mmodules\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1614\u001b[0;31m         raise AttributeError(\"'{}' object has no attribute '{}'\".format(\n\u001b[0m\u001b[1;32m   1615\u001b[0m             type(self).__name__, name))\n\u001b[1;32m   1616\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;31mAttributeError\u001b[0m: 'VisionEncoderDecoderModel' object has no attribute 'predict'"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "for ps in all_img_predictions:\n",
        "  for p in ps:\n",
        "    print(p)\n",
        "  print('\\n\\n')"
      ],
      "metadata": {
        "id": "dJpsTFJy6bv8"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
 }