This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class LazyTextDataset(Dataset): | |
| def __init__(self, filename): | |
| self._filename = filename | |
| self._total_data = 0 | |
| self._total_data = int(subprocess.check_output("wc -l " + filename, shell=True).split()[0]) | |
| def __getitem__(self, idx): | |
| line = linecache.getline(self._filename, idx + 1) | |
| csv_line = csv.reader([line]) | |
| return next(csv_line) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| "################################################################################\n", | |
| "# INSTALL CONDA ON GOOGLE COLAB\n", | |
| "################################################################################\n", | |
| "! wget https://repo.anaconda.com/miniconda/Miniconda3-py37_4.8.2-Linux-x86_64.sh\n", | |
| "! chmod +x Miniconda3-py37_4.8.2-Linux-x86_64.sh\n", | |
| "! bash ./Miniconda3-py37_4.8.2-Linux-x86_64.sh -b -f -p /usr/local\n", | |
| "import sys\n", | |
| "sys.path.append('/usr/local/lib/python3.7/site-packages/')" |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from tkinter import * | |
| from PIL import ImageTk,Image | |
| import time | |
| import os | |
| targetImageWidth = 850 | |
| targetImageHeight = 400 | |
| inputImageWidth = 0 | |
| inputImageHeight = 0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class CustomIterableDatasetv2(IterableDataset): | |
| def __init__(self, filename_en, filename_gm): | |
| #Store the filenames in object's memory | |
| self.filename_en = filename_en | |
| self.filename_gm = filename_gm | |
| #And that's it, we no longer need to store the contents in the memory |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| dataset = CustomIterableDatasetv1('path_to/somefile') | |
| dataloader = DataLoader(dataset, batch_size = 64) | |
| for X, y in dataloader: | |
| print(len(X)) # 64 | |
| print(y.shape) # (64,) | |
| ### Do something with X and y | |
| ### |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class CustomIterableDatasetv1(IterableDataset): | |
| def __init__(self, filename): | |
| #Store the filename in object's memory | |
| self.filename = filename | |
| #And that's it, we no longer need to store the contents in the memory | |
| def preprocess(self, text): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #Creating the iterable dataset object | |
| dataset = CustomIterableDataset('path_to/somefile') | |
| #Creating the dataloader | |
| dataloader = DataLoader(dataset, batch_size = 64) | |
| for data in dataloader: | |
| #Data is a list containing 64 (=batch_size) consecutive lines of the file | |
| print(len(data)) #[64,] | |
| #We still need to separate the text and labels from each other and preprocess the text |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from torch.utils.data import IterableDataset | |
| class CustomIterableDataset(IterableDataset): | |
| def __init__(self, filename): | |
| #Store the filename in object's memory | |
| self.filename = filename | |
| #And that's it, we no longer need to store the contents in the memory |