crawles’s gists

crawles / explanations_metadata.py

Last active February 23, 2020 20:48

	explanation_metadata = {
	"inputs": {
	"dayofweek": {
	"input_tensor_name": "dayofweek:0",
	"input_baselines": [baselines_mode[0][0]] # Thursday
	},
	"hourofday": {
	"input_tensor_name": "hourofday:0",
	"input_baselines": [baselines_mode[0][1]] # 8pm
	},

crawles / python_head_query.py

Created February 14, 2020 15:34

Run a preview in bigquery using the `head` command. Save the result to a pandas df.

	from io import StringIO

	def head(table, n=10):
	head_list = !bq head --n $n --table $table
	head_str = '\n'.join([head_list[1]] + head_list[3:-1])
	return pd.read_csv(StringIO(head_str), delimiter="\|").iloc[:, 1:-1]

	df = head('publicdata:samples.natality')

crawles / permutation_importances.py

Last active March 5, 2019 02:23

	def permutation_importances(est, X_eval, y_eval, metric, features):
	"""Column by column, shuffle values and observe effect on eval set.
	source: http://explained.ai/rf-importance/index.html
	A similar approach can be done during training. See "Drop-column importance"
	in the above article."""
	def accuracy_metric(est, X, y):
	"""TensorFlow estimator accuracy."""
	eval_input_fn = make_input_fn(X,
	y=y,
	shuffle=False,

crawles / importances.py

Created March 5, 2019 02:21

	# Get importances
	importances = est.experimental_feature_importances(normalize=True)
	df_imp = pd.Series(importances)

	# Visualize importances.
	N = 8
	ax = (df_imp.iloc[0:N][::-1]
	.plot(kind='barh'))

crawles / predict_explain.py

Created March 5, 2019 02:20

	# Make predictions.
	pred_dicts = list(est.experimental_predict_with_explanations(eval_input_fn))
	df_dfc = pd.DataFrame([pred['dfc'] for pred in pred_dicts])
	# Plot results.
	ID = 182
	example = df_dfc.iloc[ID] # Choose ith example from evaluation set.
	TOP_N = 8 # View top 8 features.
	sorted_ix = example.abs().sort_values()[-TOP_N:].index
	ax = example[sorted_ix].plot(kind='barh')

crawles / train_trees_explain.py

Created March 5, 2019 02:18

	params = {
	'n_trees': 50,
	'max_depth': 3,
	'n_batches_per_layer': 1,
	# You must enable center_bias = True to get DFCs. This will force the model to
	# make an initial prediction before using any features (e.g. use the mean of
	# the training labels for regression or log odds for classification when
	# using cross entropy loss).
	'center_bias': True
	}

crawles / train_trees.py

Created March 5, 2019 02:17

	# Since data fits into memory, use entire dataset per layer. It will be faster.
	# Above one batch is defined as the entire dataset.
	n_batches = 1
	est = tf.estimator.BoostedTreesClassifier(feature_columns,
	n_batches_per_layer=n_batches)

	# The model will stop training once the specified number of trees is built, not
	# based on the number of steps.
	est.train(train_input_fn, max_steps=100)

crawles / linear_model.py

Created March 5, 2019 02:16

	linear_est = tf.estimator.LinearClassifier(feature_columns)

	# Train model.
	linear_est.train(train_input_fn, max_steps=100)

	# Evaluation.
	result = linear_est.evaluate(eval_input_fn)

crawles / input_fn.py

Created March 5, 2019 02:10

	# Use entire batch since this is such a small dataset.
	NUM_EXAMPLES = len(y_train)

	def make_input_fn(X, y, n_epochs=None, shuffle=True):
	def input_fn():
	dataset = tf.data.Dataset.from_tensor_slices((dict(X), y))
	if shuffle:
	dataset = dataset.shuffle(NUM_EXAMPLES)
	# For training, cycle thru dataset as many times as need (n_epochs=None).
	dataset = dataset.repeat(n_epochs)

crawles / preview_features.py

Last active March 5, 2019 02:09

	example = dict(dftrain.head(1))
	class_fc = tf.feature_column.indicator_column(tf.feature_column.categorical_column_with_vocabulary_list('class', ('First', 'Second', 'Third')))
	print('Feature value: "{}"'.format(example['class'].iloc[0]))
	print('One-hot encoded: ', tf.keras.layers.DenseFeatures([class_fc])(example).numpy())

	# Feature value: "Third"
	# One-hot encoded: [[0. 0. 1.]]

Chris Rawles crawles