jfsantos-ds · January 3, 2022 11:27
diff --git a/after_one_hot.tsv b/after_one_hot.tsv
diff --git a/before_one_hot.tsv b/before_one_hot.tsv
diff --git a/generator.py b/generator.py
 class Generator(tf.keras.Model):
    def __init__(self, batch_size):
        self.batch_size = batch_size

    def build_model(self, input_shape, dim, data_dim, activation_info: Optional[NamedTuple] = None):
        input = Input(shape=input_shape, batch_size=self.batch_size)
        x = Dense(dim, activation='relu')(input)
        x = Dense(dim * 2, activation='relu')(x)
        x = Dense(dim * 4, activation='relu')(x)
        x = Dense(data_dim)(x)
        if activation_info:
            x = GumbelSoftmaxActivation(activation_info)(x)
        return Model(inputs=input, outputs=x)
diff --git a/gumbel_softmax_snip.py b/gumbel_softmax_snip.py
 from typing import Optional

 from tensorflow import Tensor, TensorShape, one_hot, squeeze, stop_gradient
 from tensorflow.keras.layers import Layer
 from tensorflow.keras.utils import register_keras_serializable
 from tensorflow.math import log
 from tensorflow.nn import softmax
 from tensorflow.random import categorical, uniform

 TOL = 1e-20


 def gumbel_noise(shape: TensorShape) -> Tensor:
    """Create a single sample from the standard (loc = 0, scale = 1) Gumbel distribution."""
    uniform_sample = uniform(shape, seed=0)
    return -log(-log(uniform_sample + TOL) + TOL)

 @register_keras_serializable(package='Synthetic Data', name='GumbelSoftmaxLayer')
 class GumbelSoftmaxLayer(Layer):
    "A Gumbel-Softmax layer implementation that should be stacked on top of a categorical feature logits."

    def __init__(self, tau: float = 0.2, name: Optional[str] = None, **kwargs):
        super().__init__(name=name, **kwargs)
        self.tau = tau

    def call(self, _input):
        """Computes Gumbel-Softmax for the logits output of a particular categorical feature."""
        noised_input = _input + gumbel_noise(_input.shape)
        soft_sample = softmax(noised_input/self.tau, -1)
        hard_sample = stop_gradient(squeeze(one_hot(categorical(log(soft_sample), 1), _input.shape[-1]), 1))
        return hard_sample, soft_sample

    def get_config(self):
        config = super().get_config().copy()
        config.update({'tau': self.tau})
        return config
diff --git a/no_gs_example.tsv b/no_gs_example.tsv
diff --git a/soft_softmax_example.tsv b/soft_softmax_example.tsv
ID	Gender_Male	Gender_Female	AgeRange_10-19	AgeRange_20-29
1	1	0	0	1
2	0	1	1	0
	class Generator(tf.keras.Model):
	def __init__(self, batch_size):
	self.batch_size = batch_size

	def build_model(self, input_shape, dim, data_dim, activation_info: Optional[NamedTuple] = None):
	input = Input(shape=input_shape, batch_size=self.batch_size)
	x = Dense(dim, activation='relu')(input)
	x = Dense(dim * 2, activation='relu')(x)
	x = Dense(dim * 4, activation='relu')(x)
	x = Dense(data_dim)(x)
	if activation_info:
	x = GumbelSoftmaxActivation(activation_info)(x)
	return Model(inputs=input, outputs=x)
	from typing import Optional

	from tensorflow import Tensor, TensorShape, one_hot, squeeze, stop_gradient
	from tensorflow.keras.layers import Layer
	from tensorflow.keras.utils import register_keras_serializable
	from tensorflow.math import log
	from tensorflow.nn import softmax
	from tensorflow.random import categorical, uniform

	TOL = 1e-20


	def gumbel_noise(shape: TensorShape) -> Tensor:
	"""Create a single sample from the standard (loc = 0, scale = 1) Gumbel distribution."""
	uniform_sample = uniform(shape, seed=0)
	return -log(-log(uniform_sample + TOL) + TOL)

	@register_keras_serializable(package='Synthetic Data', name='GumbelSoftmaxLayer')
	class GumbelSoftmaxLayer(Layer):
	"A Gumbel-Softmax layer implementation that should be stacked on top of a categorical feature logits."

	def __init__(self, tau: float = 0.2, name: Optional[str] = None, **kwargs):
	super().__init__(name=name, **kwargs)
	self.tau = tau

	def call(self, _input):
	"""Computes Gumbel-Softmax for the logits output of a particular categorical feature."""
	noised_input = _input + gumbel_noise(_input.shape)
	soft_sample = softmax(noised_input/self.tau, -1)
	hard_sample = stop_gradient(squeeze(one_hot(categorical(log(soft_sample), 1), _input.shape[-1]), 1))
	return hard_sample, soft_sample

	def get_config(self):
	config = super().get_config().copy()
	config.update({'tau': self.tau})
	return config