ntakouris · July 8, 2020 13:29
diff --git a/preprocessing_adv.py b/preprocessing_adv.py
 def preprocessing_fn(inputs):
  """Preprocess input columns into transformed columns."""
  # Since we are modifying some features and leaving others unchanged, we
  # start by setting `outputs` to a copy of `inputs.
  outputs = inputs.copy()

  # Scale numeric columns to have range [0, 1].
  for key in NUMERIC_FEATURE_KEYS:
    outputs[key] = tft.scale_to_0_1(outputs[key])

  for key in OPTIONAL_NUMERIC_FEATURE_KEYS:
    # This is a SparseTensor because it is optional. Here we fill in a default
    # value when it is missing.
    sparse = tf.sparse.SparseTensor(outputs[key].indices, outputs[key].values,
                                    [outputs[key].dense_shape[0], 1])
    dense = tf.sparse.to_dense(sp_input=sparse, default_value=0.)
    # Reshaping from a batch of vectors of size 1 to a batch to scalars.
    dense = tf.squeeze(dense, axis=1)
    outputs[key] = tft.scale_to_0_1(dense)

  # For all categorical columns except the label column, we generate a
  # vocabulary but do not modify the feature.  This vocabulary is instead
  # used in the trainer, by means of a feature column, to convert the feature
  # from a string to an integer id.
  for key in CATEGORICAL_FEATURE_KEYS:
    tft.vocabulary(inputs[key], vocab_filename=key)

  # For the label column we provide the mapping from string to index.
  table_keys = ['>50K', '<=50K']
  initializer = tf.lookup.KeyValueTensorInitializer(
      keys=table_keys,
      values=tf.cast(tf.range(len(table_keys)), tf.int64),
      key_dtype=tf.string,
      value_dtype=tf.int64)
  table = tf.lookup.StaticHashTable(initializer, default_value=-1)
  outputs[LABEL_KEY] = table.lookup(outputs[LABEL_KEY])

  return outputs
	def preprocessing_fn(inputs):
	"""Preprocess input columns into transformed columns."""
	# Since we are modifying some features and leaving others unchanged, we
	# start by setting `outputs` to a copy of `inputs.
	outputs = inputs.copy()

	# Scale numeric columns to have range [0, 1].
	for key in NUMERIC_FEATURE_KEYS:
	outputs[key] = tft.scale_to_0_1(outputs[key])

	for key in OPTIONAL_NUMERIC_FEATURE_KEYS:
	# This is a SparseTensor because it is optional. Here we fill in a default
	# value when it is missing.
	sparse = tf.sparse.SparseTensor(outputs[key].indices, outputs[key].values,
	[outputs[key].dense_shape[0], 1])
	dense = tf.sparse.to_dense(sp_input=sparse, default_value=0.)
	# Reshaping from a batch of vectors of size 1 to a batch to scalars.
	dense = tf.squeeze(dense, axis=1)
	outputs[key] = tft.scale_to_0_1(dense)

	# For all categorical columns except the label column, we generate a
	# vocabulary but do not modify the feature. This vocabulary is instead
	# used in the trainer, by means of a feature column, to convert the feature
	# from a string to an integer id.
	for key in CATEGORICAL_FEATURE_KEYS:
	tft.vocabulary(inputs[key], vocab_filename=key)

	# For the label column we provide the mapping from string to index.
	table_keys = ['>50K', '<=50K']
	initializer = tf.lookup.KeyValueTensorInitializer(
	keys=table_keys,
	values=tf.cast(tf.range(len(table_keys)), tf.int64),
	key_dtype=tf.string,
	value_dtype=tf.int64)
	table = tf.lookup.StaticHashTable(initializer, default_value=-1)
	outputs[LABEL_KEY] = table.lookup(outputs[LABEL_KEY])

	return outputs
No results found