This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Create a data provider for the dataset | |
| data_provider = DataProvider( | |
| dataset=dataset, | |
| skip_validation=True, | |
| batch_size=configs.batch_size, | |
| data_preprocessors=[ImageReader()], | |
| transformers=[ | |
| # ImageShowCV2(), # uncomment to show images during training | |
| ImageResizer(configs.width, configs.height, keep_aspect_ratio=False), | |
| LabelIndexer(configs.vocab), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| dataset, vocab, max_len = [], set(), 0 | |
| # Preprocess the dataset by the specific IAM_Words dataset file structure | |
| words = open(os.path.join(dataset_path, "words.txt"), "r").readlines() | |
| for line in tqdm(words): | |
| if line.startswith("#"): | |
| continue | |
| line_split = line.split(" ") | |
| if line_split[1] == "err": |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def download_and_unzip(url, extract_to='Datasets', chunk_size=1024*1024): | |
| http_response = urlopen(url) | |
| data = b'' | |
| iterations = http_response.length // chunk_size + 1 | |
| for _ in tqdm(range(iterations)): | |
| data += http_response.read(chunk_size) | |
| zipfile = ZipFile(BytesIO(data)) | |
| zipfile.extractall(path=extract_to) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import tarfile | |
| from tqdm import tqdm | |
| from io import BytesIO | |
| from zipfile import ZipFile | |
| from urllib.request import urlopen | |
| import torch | |
| import torch.optim as optim | |
| from torchsummaryX import summary |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ... | |
| Image: Datasets/IAM_Words/words/b05/b05-017/b05-017-03-03.png, Label: won't, Prediction: won't, CER: 0.0 | |
| Image: Datasets/IAM_Words/words/a01/a01-049u/a01-049u-07-06.png, Label: session, Prediction: sessicn, CER: 0.14285714285714285 | |
| Image: Datasets/IAM_Words/words/a02/a02-000/a02-000-07-00.png, Label: but, Prediction: but, CER: 0.0 | |
| Image: Datasets/IAM_Words/words/m02/m02-087/m02-087-06-02.png, Label: as, Prediction: as, CER: 0.0 | |
| Image: Datasets/IAM_Words/words/g06/g06-037j/g06-037j-07-01.png, Label: his, Prediction: his, CER: 0.0 | |
| Image: Datasets/IAM_Words/words/g06/g06-047i/g06-047i-02-09.png, Label: human, Prediction: human, CER: 0.0 | |
| Image: Datasets/IAM_Words/words/c03/c03-094c/c03-094c-08-03.png, Label: gaudy, Prediction: gaudy, CER: 0.0 | |
| Image: Datasets/IAM_Words/words/e04/e04-132/e04-132-01-04.png, Label: on, Prediction: on, CER: 0.0 | |
| Image: Datasets/IAM_Words/words/k02/k02-018/k02-018-04-01.png, Label: surprised, Prediction: supised, CER: 0.2222222222222222 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| =================================================================================== | |
| Kernel Shape Output Shape Params \ | |
| Layer | |
| 0_rb1.convb1.Conv2d_conv [3, 16, 3, 3] [1, 16, 32, 128] 448.0 | |
| 1_rb1.convb1.BatchNorm2d_bn [16] [1, 16, 32, 128] 32.0 | |
| 2_rb1.LeakyReLU_act1 - [1, 16, 32, 128] - | |
| 3_rb1.convb2.Conv2d_conv [16, 16, 3, 3] [1, 16, 32, 128] 2.32k | |
| 4_rb1.convb2.BatchNorm2d_bn [16] [1, 16, 32, 128] 32.0 | |
| 5_rb1.Conv2d_shortcut [3, 16, 1, 1] [1, 16, 32, 128] 64.0 | |
| 6_rb1.LeakyReLU_act2 - [1, 16, 32, 128] - |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import cv2 | |
| import typing | |
| import numpy as np | |
| from mltu.inferenceModel import OnnxInferenceModel | |
| from mltu.utils.text_utils import ctc_decoder, get_cer | |
| class ImageToWordModel(OnnxInferenceModel): | |
| def __init__(self, *args, **kwargs): | |
| super().__init__(*args, **kwargs) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all | |
| TensorBoard 2.10.1 at http://localhost:6006/ (Press CTRL+C to quit) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Tensorboard --logdir Models\08_handwriting_recognition_torch\202303142139\logs |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # create callbacks | |
| earlyStopping = EarlyStopping(monitor='val_CER', patience=20, mode="min", verbose=1) | |
| modelCheckpoint = ModelCheckpoint(configs.model_path + '/model.pt', monitor='val_CER', mode="min", save_best_only=True, verbose=1) | |
| tb_callback = TensorBoard(configs.model_path + '/logs') | |
| reduce_lr = ReduceLROnPlateau(monitor='val_CER', factor=0.9, patience=10, verbose=1, mode='min', min_lr=1e-6) | |
| model2onnx = Model2onnx( | |
| saved_model_path=configs.model_path + '/model.pt', | |
| input_shape=(1, configs.height, configs.width, 3), | |
| verbose=1, | |
| metadata={"vocab": configs.vocab} |