Rahul Agarwal MLWhiz

🤓

Focusing

Autodidact Data Scientist, Amateur Guitarist, Pessimistic Go-getter.

MLWhiz / preprocessing.py

Created May 16, 2021 20:08

	# Dealing with long docs:
	max_length = 384 # The maximum length of a feature (question and context)
	doc_stride = 128 # The authorized overlap between two part of the context when splitting it

	def prepare_train_features(examples):
	# Tokenize our examples with truncation and padding, but keep the overflows using a stride. This results
	# in one example possible giving several features when a context is long, each of those features having a
	# context that overlaps a bit the context of the previous feature.
	tokenized_examples = tokenizer(
	examples["question" ],

MLWhiz / visualize_squad.py

Created May 16, 2021 20:04

	datasets = load_dataset("squad")
	def visualize(datasets, datatype = 'train', n_questions=10):
	n = len(datasets[datatype])
	random_questions=random.choices(list(range(n)),k=n_questions)
	for i in random_questions:
	print(f"Context:{datasets[datatype][i]['context']}")
	print(f"Question:{datasets[datatype][i]['question']}")
	print(f"Answer:{datasets[datatype][i]['answers']['text']}")
	print(f"Answer Start in Text:{datasets[datatype][i]['answers']['answer_start']}")
	print("-"*100)

MLWhiz / hugginface.py

Created May 16, 2021 19:39

	from datasets import load_dataset, load_metric
	import random
	from transformers import AutoTokenizer
	import transformers
	from transformers import AutoModelForQuestionAnswering, TrainingArguments, Trainer
	import torch
	from transformers import default_data_collator
	from transformers import AutoTokenizer, AutoModelForQuestionAnswering
	import torch
	tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")

MLWhiz / Complex_small.py

Created December 10, 2020 23:20

	import math
	class Complex:
	def __init__(self, re=0, im=0):
	self.re = re
	self.im = im
	def __add__(self, other):
	# If Int or Float Added, return a Complex number where float/int is added to the real part
	if isinstance(other, int) or isinstance(other, float):
	return Complex(self.re + other,self.im)
	# If Complex Number added return a new complex number having a real and complex part

MLWhiz / dunder_ops.py

Created November 28, 2020 19:08

	import math
	class Complex:
	def __init__(self, re=0, im=0):
	self.re = re
	self.im = im

	def __add__(self, other):
	if isinstance(other, int) or isinstance(other, float):
	return Complex(self.re + other,self.im)
	elif isinstance(other, Complex):

MLWhiz / complexNumBasicOperators.py

Created November 28, 2020 18:52

	import math
	class Complex:
	def __init__(self, re=0, im=0):
	self.re = re
	self.im = im

	def __add__(self, other):
	if isinstance(other, int) or isinstance(other, float):
	return Complex(self.re + other,self.im)
	elif isinstance(other, Complex):

MLWhiz / answer.py

Created November 24, 2020 19:20

MLWhiz / Shapes.py

Last active November 24, 2020 16:05

MLWhiz / training_with_GPU.py

Created September 7, 2020 15:19

	num_epochs = 5
	for epoch in range(num_epochs):
	model.train()
	for x_batch,y_batch in train_dataloader:
	if train_on_gpu:
	x_batch,y_batch = x_batch.cuda(), y_batch.cuda()
	optimizer.zero_grad()
	pred = model(x_batch)
	loss = loss_criterion(pred, y_batch)
	loss.backward()

MLWhiz / Parallelize_GPU.py

Created September 7, 2020 15:18

	# Whether to train on a gpu
	train_on_gpu = torch.cuda.is_available()
	print(f'Train on gpu: {train_on_gpu}')# Number of gpus
	if train_on_gpu:
	gpu_count = torch.cuda.device_count()
	print(f'{gpu_count} gpus detected.')
	if gpu_count > 1:
	multi_gpu = True
	else:
	multi_gpu = False