Manuel Romero mrm8488

🏠

Working from home

NLP/NLG Senior Engineer. Team lead and Back-End senior dev. Scrum Master and PM/PO by Scrum Alliance.

mrm8488 / text_dataset_pytorch.py

Created April 17, 2020 03:17

Create an efficient text dataset

	class LazyTextDataset(Dataset):
	def __init__(self, filename):
	self._filename = filename
	self._total_data = 0
	self._total_data = int(subprocess.check_output("wc -l " + filename, shell=True).split()[0])

	def __getitem__(self, idx):
	line = linecache.getline(self._filename, idx + 1)
	csv_line = csv.reader([line])
	return next(csv_line)

mrm8488 / install_conda_google_colab.txt

Last active April 8, 2020 03:55

	"################################################################################\n",
	"# INSTALL CONDA ON GOOGLE COLAB\n",
	"################################################################################\n",
	"! wget https://repo.anaconda.com/miniconda/Miniconda3-py37_4.8.2-Linux-x86_64.sh\n",
	"! chmod +x Miniconda3-py37_4.8.2-Linux-x86_64.sh\n",
	"! bash ./Miniconda3-py37_4.8.2-Linux-x86_64.sh -b -f -p /usr/local\n",
	"import sys\n",
	"sys.path.append('/usr/local/lib/python3.7/site-packages/')"

mrm8488 / smallberta_pretraining.ipynb

Created February 25, 2020 20:44 — forked from aditya-malte/smallberta_pretraining.ipynb

smallBERTa_Pretraining.ipynb

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

mrm8488 / smallberta_pretraining.ipynb

Created February 25, 2020 20:44 — forked from aditya-malte/smallberta_pretraining.ipynb

smallBERTa_Pretraining.ipynb

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

mrm8488 / hypercropz.py

Created February 24, 2020 01:08 — forked from connorbell/hypercropz.py

	from tkinter import *
	from PIL import ImageTk,Image
	import time
	import os

	targetImageWidth = 850
	targetImageHeight = 400

	inputImageWidth = 0
	inputImageHeight = 0

mrm8488 / iterable_dataset_v2.py

Created February 23, 2020 03:28 — forked from kabirahuja2431/iterable_dataset_v2.py

	class CustomIterableDatasetv2(IterableDataset):

	def __init__(self, filename_en, filename_gm):

	#Store the filenames in object's memory
	self.filename_en = filename_en
	self.filename_gm = filename_gm

	#And that's it, we no longer need to store the contents in the memory

mrm8488 / iterable_dataloader_v2.py

Created February 23, 2020 03:27 — forked from kabirahuja2431/iterable_dataloader_v2.py

	dataset = CustomIterableDatasetv1('path_to/somefile')
	dataloader = DataLoader(dataset, batch_size = 64)

	for X, y in dataloader:
	print(len(X)) # 64
	print(y.shape) # (64,)

	### Do something with X and y

	###

mrm8488 / iterable_dataset_v1.py

Created February 23, 2020 03:26 — forked from kabirahuja2431/iterable_dataset_v1.py

	class CustomIterableDatasetv1(IterableDataset):

	def __init__(self, filename):

	#Store the filename in object's memory
	self.filename = filename

	#And that's it, we no longer need to store the contents in the memory

	def preprocess(self, text):

mrm8488 / iterable_dataloader_v0.py

Created February 23, 2020 03:22 — forked from kabirahuja2431/iterable_dataloader_v0.py

	#Creating the iterable dataset object
	dataset = CustomIterableDataset('path_to/somefile')
	#Creating the dataloader
	dataloader = DataLoader(dataset, batch_size = 64)

	for data in dataloader:
	#Data is a list containing 64 (=batch_size) consecutive lines of the file
	print(len(data)) #[64,]

	#We still need to separate the text and labels from each other and preprocess the text

mrm8488 / iterable_dataset_v0.py

Created February 23, 2020 03:20 — forked from kabirahuja2431/iterable_dataset_v0.py

	from torch.utils.data import IterableDataset

	class CustomIterableDataset(IterableDataset):

	def __init__(self, filename):

	#Store the filename in object's memory
	self.filename = filename

	#And that's it, we no longer need to store the contents in the memory