Analytics India Magazine analyticsindiamagazine

Analytics India Magazine chronicles technological progress in the space of analytics, artificial intelligence, data science & big data.

analyticsindiamagazine / WIT_training_1.py

Last active November 27, 2019 06:17

	# LINEAR CLASSIFIER

	# Set the number of training steps
	num_steps = 1500

	# A logger for logging the training data
	tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.DEBUG)

	# Defining the classifier
	classifier = tf.estimator.LinearClassifier(

analyticsindiamagazine / WIT_after_prep.py

Created November 27, 2019 05:54

	# Convert dataset to tf.Example protos
	examples = df_to_examples(df)

	#Create feature spec for the data
	feature_spec = create_feature_spec(df, features_and_labels)

	#Creating input features from training set
	train_inpf = functools.partial(tfexamples_input_fn, examples, feature_spec, label_column)

analyticsindiamagazine / WIT_b4_prep.py

Created November 27, 2019 05:40

	# Specify the dataframe
	df = train

	# Setting the categorical feature to predict or classify (Target)
	label_column = 'Delivery_Time'

	#List the uique classes in the target column
	classes = list(df[label_column].unique())

	#Encode the categories in the target column using the unique classes

analyticsindiamagazine / WIT_preprocessing.py

Created November 27, 2019 05:25

	# Creates a tf feature spec from the dataframe and columns specified.
	def create_feature_spec(df, columns=None):
	feature_spec = {}
	if columns == None:
	columns = df.columns.values.tolist()
	for f in columns:
	if df[f].dtype is np.dtype(np.int64):
	feature_spec[f] = tf.compat.v1.FixedLenFeature(shape=(), dtype=tf.int64)
	elif df[f].dtype is np.dtype(np.float64):
	feature_spec[f] = tf.compat.v1.FixedLenFeature(shape=(), dtype=tf.float32)

analyticsindiamagazine / importsforwhatif.py

Created November 27, 2019 03:51

	#Importing tensorflow 2
	try:
	%tensorflow_version 2.x #gpu
	except Exception:
	pass
	import tensorflow as tf

	#Installing the What-If Tool widget
	try:
	!pip install --upgrade witwidget

analyticsindiamagazine / modeling.py

Created November 26, 2019 09:07

	#Splitting training set into training and validation sets
	from sklearn.model_selection import train_test_split
	train, val = train_test_split(train_sample, test_size = 0.1, random_state = 123)

	#Seperating the independent and dependent variables
	cols = [list of column names in order] #last column corresponds to y or target variable
	#Training set
	X_train = train[cols[:-1]]
	Y_train = train[cols[-1]]
	#Validation set

analyticsindiamagazine / Preprocessing.py

Created November 26, 2019 08:48

	#Filling Nulls/Nans
	train.fillna('NAN', inplace = True)

	#Label Encoding
	from sklearn.preprocessing import LabelEncoder
	le_c = LabelEncoder().fit(unique_cuisines)
	train['Cuisines'] = le_l.transform(train['Cuisines'])

	#Scaling
	cols = [list of columns to apply standard scaler]

analyticsindiamagazine / Cleaning_ratings_votes_reviews.py

Created November 26, 2019 06:37

	#A function to find all the non numeric values
	def non_numerals(series):
	non_numerals = []
	for i in series.unique():
	try :
	i = float(i)
	except:
	non_numerals.append(i)
	return non_numerals

analyticsindiamagazine / clean_loc_cui.py

Created November 26, 2019 06:31

	#A function to find the maximun number of features in a single cell
	def max_features_in_single_row(train, test, delimiter):
	max_info = 0
	item_lis = list(train.append(test))
	for i in item_lis:
	if len(i.split("{}".format(delimiter))) > max_info:
	max_info = len(i.split("{}".format(delimiter)))
	print("\n","-"*35)
	print("Max_Features in One Observation = ", max_info)
	return max_info

analyticsindiamagazine / Exploring_data_features.py

Created November 26, 2019 06:20

	#Loading Data
	train = pd.read_excel("/GD/.../DataSets/Data_Train.xlsx")

	#display the dataframe head(10) returns the first 10 rows
	train.head(10)

	#Shape of a table (number or rows, number of columns)
	train.shape

	#Display the names of all columns in a dataframe