netskink · January 27, 2020 15:01
diff --git a/gistfile1.txt b/gistfile1.txt
 # from
 # https://www.tensorflow.org/beta/tutorials/estimators/boosted_trees
 # TODO: 2.0 is out now
 #!pip install tensorflow==2.0.0-rc1
 #try:
 #    %tensorflow_version 2.x
 #except Exception:
 #    pass


 from __future__ import absolute_import, division, print_function, unicode_literals

 import numpy as np
 import tensorflow as tf
 import platform
 import pandas as pd
 tf.random.set_seed(123)


 print(platform.python_version())
 print(tf.version.GIT_VERSION)
 print(tf.version.VERSION)
 print(tf.__version__)

 # Load dataset.
 dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')
 dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')
 y_train = dftrain.pop('survived')
 y_eval = dfeval.pop('survived')

 print(dftrain.shape[0])
 print(dfeval.shape[0])

 fc = tf.feature_column
 CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck', 'embark_town', 'alone']
 NUMERIC_COLUMNS = ['age', 'fare']

 def one_hot_cat_column(feature_name, vocab):
    return tf.feature_column.indicator_column(
        tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocab))

 feature_columns = []
 for feature_name in CATEGORICAL_COLUMNS:
    # Need to one-hot encode categorical features.
    vocabulary = dftrain[feature_name].unique()
    feature_columns.append(one_hot_cat_column(feature_name, vocabulary))

 for feature_name in NUMERIC_COLUMNS:
    feature_columns.append(tf.feature_column.numeric_column(feature_name,
                                           dtype=tf.float32))
 example = dict(dftrain.head(1))
 print(example)
 class_fc = tf.feature_column.indicator_column(
    tf.feature_column.categorical_column_with_vocabulary_list(
        'class',
        ('First', 'Second', 'Third')))
 print(class_fc)

 print('Feature value: \"{}\"'.format(example['class'].iloc[0]))
 print('One-hot encoded: ', tf.keras.layers.DenseFeatures([class_fc])(example).numpy())


 # Use entire batch since this is such a small dataset.
 NUM_EXAMPLES = len(y_train)

 def make_input_fn(X, y, n_epochs=None, shuffle=True):
    def input_fn():
        dataset = tf.data.Dataset.from_tensor_slices((dict(X), y))
        if shuffle:
            dataset = dataset.shuffle(NUM_EXAMPLES)
        # For training, cycle thru dataset as many times as need (n_epochs=None).
        dataset = dataset.repeat(n_epochs)
        # In memory training doesn't use batching.
        dataset = dataset.batch(NUM_EXAMPLES)
        return dataset
    return input_fn



 # Training and evaluation input functions.
 train_input_fn = make_input_fn(dftrain, y_train)
 eval_input_fn = make_input_fn(dfeval, y_eval, shuffle=False, n_epochs=1)



 # Since data fits into memory, use entire dataset per layer. It will be faster.
 # Above one batch is defined as the entire dataset.
 n_batches = 1
 est = tf.estimator.BoostedTreesClassifier(feature_columns,
                                          n_batches_per_layer=n_batches)

 # The model will stop training once the specified number of trees is built, not
 # based on the number of steps.
 ##########
 # In jupyter this will crash the kernel
 ############
 est.train(train_input_fn, max_steps=100)

 # EVAL
 result = est.evalute(eval_input_fn)
 print(pd.Series(result))
	# from
	# https://www.tensorflow.org/beta/tutorials/estimators/boosted_trees
	# TODO: 2.0 is out now
	#!pip install tensorflow==2.0.0-rc1
	#try:
	# %tensorflow_version 2.x
	#except Exception:
	# pass


	from __future__ import absolute_import, division, print_function, unicode_literals

	import numpy as np
	import tensorflow as tf
	import platform
	import pandas as pd
	tf.random.set_seed(123)


	print(platform.python_version())
	print(tf.version.GIT_VERSION)
	print(tf.version.VERSION)
	print(tf.__version__)

	# Load dataset.
	dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')
	dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')
	y_train = dftrain.pop('survived')
	y_eval = dfeval.pop('survived')

	print(dftrain.shape[0])
	print(dfeval.shape[0])

	fc = tf.feature_column
	CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck', 'embark_town', 'alone']
	NUMERIC_COLUMNS = ['age', 'fare']

	def one_hot_cat_column(feature_name, vocab):
	return tf.feature_column.indicator_column(
	tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocab))

	feature_columns = []
	for feature_name in CATEGORICAL_COLUMNS:
	# Need to one-hot encode categorical features.
	vocabulary = dftrain[feature_name].unique()
	feature_columns.append(one_hot_cat_column(feature_name, vocabulary))

	for feature_name in NUMERIC_COLUMNS:
	feature_columns.append(tf.feature_column.numeric_column(feature_name,
	dtype=tf.float32))
	example = dict(dftrain.head(1))
	print(example)
	class_fc = tf.feature_column.indicator_column(
	tf.feature_column.categorical_column_with_vocabulary_list(
	'class',
	('First', 'Second', 'Third')))
	print(class_fc)

	print('Feature value: \"{}\"'.format(example['class'].iloc[0]))
	print('One-hot encoded: ', tf.keras.layers.DenseFeatures([class_fc])(example).numpy())


	# Use entire batch since this is such a small dataset.
	NUM_EXAMPLES = len(y_train)

	def make_input_fn(X, y, n_epochs=None, shuffle=True):
	def input_fn():
	dataset = tf.data.Dataset.from_tensor_slices((dict(X), y))
	if shuffle:
	dataset = dataset.shuffle(NUM_EXAMPLES)
	# For training, cycle thru dataset as many times as need (n_epochs=None).
	dataset = dataset.repeat(n_epochs)
	# In memory training doesn't use batching.
	dataset = dataset.batch(NUM_EXAMPLES)
	return dataset
	return input_fn



	# Training and evaluation input functions.
	train_input_fn = make_input_fn(dftrain, y_train)
	eval_input_fn = make_input_fn(dfeval, y_eval, shuffle=False, n_epochs=1)



	# Since data fits into memory, use entire dataset per layer. It will be faster.
	# Above one batch is defined as the entire dataset.
	n_batches = 1
	est = tf.estimator.BoostedTreesClassifier(feature_columns,
	n_batches_per_layer=n_batches)

	# The model will stop training once the specified number of trees is built, not
	# based on the number of steps.
	##########
	# In jupyter this will crash the kernel
	############
	est.train(train_input_fn, max_steps=100)

	# EVAL
	result = est.evalute(eval_input_fn)
	print(pd.Series(result))