Analytics India Magazine analyticsindiamagazine

Analytics India Magazine chronicles technological progress in the space of analytics, artificial intelligence, data science & big data.

analyticsindiamagazine / Data_preprocessing_1_BERT.py

Created November 15, 2019 11:53

	train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None,
	text_a = x[DATA_COLUMN],
	text_b = None,
	label = x[LABEL_COLUMN]), axis = 1)
	val_InputExamples = val.apply(lambda x: bert.run_classifier.InputExample(guid=None,
	text_a = x[DATA_COLUMN],
	text_b = None,
	label = x[LABEL_COLUMN]), axis = 1)

analyticsindiamagazine / Data_preprocessing_2_BERT.py

Created November 15, 2019 12:52

	# This is a path to an uncased (all lowercase) version of BERT
	BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"

	def create_tokenizer_from_hub_module():
	"""Get the vocab file and casing info from the Hub module."""
	with tf.Graph().as_default():
	bert_module = hub.Module(BERT_MODEL_HUB)
	tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
	with tf.Session() as sess:
	vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],

analyticsindiamagazine / BERT_MODEL.py

Created November 15, 2019 12:58

	def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,
	num_labels):

	bert_module = hub.Module(
	BERT_MODEL_HUB,
	trainable=True)
	bert_inputs = dict(
	input_ids=input_ids,
	input_mask=input_mask,
	segment_ids=segment_ids)

analyticsindiamagazine / dockerfile.gpt

Created November 19, 2019 11:23

	FROM tensorflow/tensorflow:1.12.0-py3
	ENV LANG=C.UTF-8
	RUN mkdir /gpt-2
	WORKDIR /gpt-2
	ADD . /gpt-2
	RUN pip3 install -r requirements.txt

analyticsindiamagazine / Predicting_Food_Delivery_Time.ipynb

Last active November 26, 2019 09:10

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

analyticsindiamagazine / Exploring_data_features.py

Created November 26, 2019 06:20

	#Loading Data
	train = pd.read_excel("/GD/.../DataSets/Data_Train.xlsx")

	#display the dataframe head(10) returns the first 10 rows
	train.head(10)

	#Shape of a table (number or rows, number of columns)
	train.shape

	#Display the names of all columns in a dataframe

analyticsindiamagazine / clean_loc_cui.py

Created November 26, 2019 06:31

	#A function to find the maximun number of features in a single cell
	def max_features_in_single_row(train, test, delimiter):
	max_info = 0
	item_lis = list(train.append(test))
	for i in item_lis:
	if len(i.split("{}".format(delimiter))) > max_info:
	max_info = len(i.split("{}".format(delimiter)))
	print("\n","-"*35)
	print("Max_Features in One Observation = ", max_info)
	return max_info

analyticsindiamagazine / Cleaning_ratings_votes_reviews.py

Created November 26, 2019 06:37

	#A function to find all the non numeric values
	def non_numerals(series):
	non_numerals = []
	for i in series.unique():
	try :
	i = float(i)
	except:
	non_numerals.append(i)
	return non_numerals

analyticsindiamagazine / Preprocessing.py

Created November 26, 2019 08:48

	#Filling Nulls/Nans
	train.fillna('NAN', inplace = True)

	#Label Encoding
	from sklearn.preprocessing import LabelEncoder
	le_c = LabelEncoder().fit(unique_cuisines)
	train['Cuisines'] = le_l.transform(train['Cuisines'])

	#Scaling
	cols = [list of columns to apply standard scaler]

analyticsindiamagazine / modeling.py

Created November 26, 2019 09:07

	#Splitting training set into training and validation sets
	from sklearn.model_selection import train_test_split
	train, val = train_test_split(train_sample, test_size = 0.1, random_state = 123)

	#Seperating the independent and dependent variables
	cols = [list of column names in order] #last column corresponds to y or target variable
	#Training set
	X_train = train[cols[:-1]]
	Y_train = train[cols[-1]]
	#Validation set