This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
explanation_metadata = { | |
"inputs": { | |
"dayofweek": { | |
"input_tensor_name": "dayofweek:0", | |
"input_baselines": [baselines_mode[0][0]] # Thursday | |
}, | |
"hourofday": { | |
"input_tensor_name": "hourofday:0", | |
"input_baselines": [baselines_mode[0][1]] # 8pm | |
}, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from io import StringIO | |
def head(table, n=10): | |
head_list = !bq head --n $n --table $table | |
head_str = '\n'.join([head_list[1]] + head_list[3:-1]) | |
return pd.read_csv(StringIO(head_str), delimiter="|").iloc[:, 1:-1] | |
df = head('publicdata:samples.natality') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def permutation_importances(est, X_eval, y_eval, metric, features): | |
"""Column by column, shuffle values and observe effect on eval set. | |
source: http://explained.ai/rf-importance/index.html | |
A similar approach can be done during training. See "Drop-column importance" | |
in the above article.""" | |
def accuracy_metric(est, X, y): | |
"""TensorFlow estimator accuracy.""" | |
eval_input_fn = make_input_fn(X, | |
y=y, | |
shuffle=False, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Get importances | |
importances = est.experimental_feature_importances(normalize=True) | |
df_imp = pd.Series(importances) | |
# Visualize importances. | |
N = 8 | |
ax = (df_imp.iloc[0:N][::-1] | |
.plot(kind='barh')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Make predictions. | |
pred_dicts = list(est.experimental_predict_with_explanations(eval_input_fn)) | |
df_dfc = pd.DataFrame([pred['dfc'] for pred in pred_dicts]) | |
# Plot results. | |
ID = 182 | |
example = df_dfc.iloc[ID] # Choose ith example from evaluation set. | |
TOP_N = 8 # View top 8 features. | |
sorted_ix = example.abs().sort_values()[-TOP_N:].index | |
ax = example[sorted_ix].plot(kind='barh') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
params = { | |
'n_trees': 50, | |
'max_depth': 3, | |
'n_batches_per_layer': 1, | |
# You must enable center_bias = True to get DFCs. This will force the model to | |
# make an initial prediction before using any features (e.g. use the mean of | |
# the training labels for regression or log odds for classification when | |
# using cross entropy loss). | |
'center_bias': True | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Since data fits into memory, use entire dataset per layer. It will be faster. | |
# Above one batch is defined as the entire dataset. | |
n_batches = 1 | |
est = tf.estimator.BoostedTreesClassifier(feature_columns, | |
n_batches_per_layer=n_batches) | |
# The model will stop training once the specified number of trees is built, not | |
# based on the number of steps. | |
est.train(train_input_fn, max_steps=100) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
linear_est = tf.estimator.LinearClassifier(feature_columns) | |
# Train model. | |
linear_est.train(train_input_fn, max_steps=100) | |
# Evaluation. | |
result = linear_est.evaluate(eval_input_fn) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Use entire batch since this is such a small dataset. | |
NUM_EXAMPLES = len(y_train) | |
def make_input_fn(X, y, n_epochs=None, shuffle=True): | |
def input_fn(): | |
dataset = tf.data.Dataset.from_tensor_slices((dict(X), y)) | |
if shuffle: | |
dataset = dataset.shuffle(NUM_EXAMPLES) | |
# For training, cycle thru dataset as many times as need (n_epochs=None). | |
dataset = dataset.repeat(n_epochs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
example = dict(dftrain.head(1)) | |
class_fc = tf.feature_column.indicator_column(tf.feature_column.categorical_column_with_vocabulary_list('class', ('First', 'Second', 'Third'))) | |
print('Feature value: "{}"'.format(example['class'].iloc[0])) | |
print('One-hot encoded: ', tf.keras.layers.DenseFeatures([class_fc])(example).numpy()) | |
# Feature value: "Third" | |
# One-hot encoded: [[0. 0. 1.]] |
NewerOlder