Prince Grover groverpr

Senior Applied Scientist at Amazon

groverpr / load_base_model

Created April 13, 2020 04:19

	def load_base_model(model_path, epoch, ctx, layer_name=None, n_inputs=2):
	""" Loads the model from given model path
	and returns a subnetwork that gives output from layer_name
	"""
	net = gluon.nn.SymbolBlock.imports(
	model_path + "-symbol.json",
	['data%i' % i for i in range(n_inputs)],
	model_path + "-%.4d.params" % epoch,
	ctx=ctx,
	)

groverpr / generate_tsne

Created March 29, 2020 05:47

	few_words = ['great', 'excellent', 'best', 'perfect', 'wonderful', 'well',
	'fun', 'love', 'amazing', 'also', 'enjoyed', 'favorite', 'it',
	'and', 'loved', 'highly', 'bit', 'job', 'today', 'beautiful',
	'you', 'definitely', 'superb', 'brilliant', 'world', 'liked',
	'still', 'enjoy', 'life', 'very', 'especially', 'see', 'fantastic',
	'both', 'shows', 'good', 'may', 'terrific', 'heart', 'classic',
	'will', 'enjoyable', 'beautifully', 'always', 'true', 'perfectly',
	'surprised', 'think', 'outstanding', 'most',

	'bad', 'worst', 'awful', 'waste', 'boring', 'poor', 'terrible',

groverpr / train_fn

Created March 29, 2020 05:42

groverpr / use_custom_net

Created March 22, 2020 02:09

	# Preparation of network arguments
	ctx = [mx.gpu(0)] # use a GPU
	tt = transformer_pipe.named_steps['token2index'] # to get token to integer map
	max_idx = max(tt.tok2idx.values())+1 # size of vocabulary of all tokens in training data
	tok_embed_dim = 64 # embedding size of each token
	review_embed_dim = 50 # embedding size of hidden state in GRU

	input_output_embed_map = {"token_embed": (max_idx, tok_embed_dim),
	"hidden_embed": (None, review_embed_dim)}

groverpr / custom_net

Created March 22, 2020 01:52

	class CustomSeqNet(gluon.nn.HybridBlock):
	"""
	Custom defined network for sequence data that is used to predict a binary label.

	"""
	def __init__(self, input_output_embed_map, dense_sizes=[100], dropouts=[0.2], activation="relu"):
	"""
	input_output_embed_map: {"token_embed": (max_tok_idx, tok_embed_dim), "hidden_embed": (,hidden_embed_dim))}
	"""
	self.dense_sizes = dense_sizes # list of output dimension of dense layers

groverpr / mxnet_dataset_dataloader

Created March 12, 2020 08:01

	def get_dataloader(dataset,
	dataset_type="train", # valid/test
	batch_size=256,
	bucket_num=5,
	shuffle=True, # true for training
	num_workers=1):

	# Batchify function appends the length of each sequence to feed as addtional input
	combined_batchify_fn = nlp.data.batchify.Tuple(
	nlp.data.batchify.Pad(axis=0, ret_length=True),

groverpr / transfer_learn_mxnet_notebook.ipynb

Created February 25, 2020 02:02

Full notebook for transfer learning tutorial using MXNet

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

groverpr / transformers_sklearn_eg

Last active February 25, 2020 01:46

	class Tokenize(BaseEstimator, TransformerMixin):
	"""
	Takes in pandas series and applies tokenization on each row based on given split pattern.
	"""
	def __init__(self, split_pat=f"([{string.punctuation}])"):
	self.split_pat = split_pat # re pattern used to split string to tokens. default splits over any string punctuation

	def tokenize(self, s):
	""" Tokenize string """
	re_tok = re.compile(self.split_pat)

groverpr / transformers_sklearn

Created February 25, 2020 01:42

	# Transformers present in Pipeline work in series. 1->2->3->4
	transformer_pipe = Pipeline(steps=[
	("null_impute", NullImputer(strategy="constant", fill_value="null")),
	("lower_case", LowerCaser()),
	("tokenize", Tokenize(f"([{string.punctuation}])")),
	("token2index", Tok2Idx())
	])

	X_train_transformed = transformer_pipe.fit_transform(X_train) # fit and transform on train data
	X_valid_transformed = transformer_pipe.transform(X_valid)

groverpr / comparisons.ipynb

Created July 1, 2019 09:27

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.