chiragjn · July 29, 2019 10:08
diff --git a/feedforwardadapterlayer.py b/feedforwardadapterlayer.py
 import numpy as np
 import tensorflow as tf
 K = tf.keras.backend


 def gelu(x):
    """Gaussian Error Linear Unit.
    This is a smoother version of the RELU.
    Original paper: https://arxiv.org/abs/1606.08415
    Args:
    x: float Tensor to perform activation.
    Returns:
    `x` with the GELU activation applied.
    """
    cdf = 0.5 * (1.0 + tf.tanh((np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3)))))
    return x * cdf

 class FeedFowardAdapterLayer(tf.keras.layers.Layer):
    def __init__(self, hidden_size=64, init_scale=1e-3, **kwargs):
        self.hidden_size = hidden_size
        self.init_scale = init_scale
        super(FeedFowardAdapterLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        in_size = input_shape[1]
        self.dense1 = tf.keras.layers.Dense(units=self.hidden_size,
                                            activation=gelu,
                                            kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=self.init_scale),
                                            bias_initializer=tf.keras.initializers.Zeros())
        self.dense2 = tf.keras.layers.Dense(units=in_size,
                                            activation=None,
                                            kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=self.init_scale),
                                            bias_initializer=tf.keras.initializers.Zeros())
        super(FeedFowardAdapterLayer, self).build(input_shape)  # Be sure to call this at the end

    def call(self, x):
        return  x + self.dense2(self.dense1(x))
        
    def compute_output_shape(self, input_shape):
        return input_shape
	import numpy as np
	import tensorflow as tf
	K = tf.keras.backend


	def gelu(x):
	"""Gaussian Error Linear Unit.
	This is a smoother version of the RELU.
	Original paper: https://arxiv.org/abs/1606.08415
	Args:
	x: float Tensor to perform activation.
	Returns:
	`x` with the GELU activation applied.
	"""
	cdf = 0.5 * (1.0 + tf.tanh((np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3)))))
	return x * cdf

	class FeedFowardAdapterLayer(tf.keras.layers.Layer):
	def __init__(self, hidden_size=64, init_scale=1e-3, **kwargs):
	self.hidden_size = hidden_size
	self.init_scale = init_scale
	super(FeedFowardAdapterLayer, self).__init__(**kwargs)

	def build(self, input_shape):
	in_size = input_shape[1]
	self.dense1 = tf.keras.layers.Dense(units=self.hidden_size,
	activation=gelu,
	kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=self.init_scale),
	bias_initializer=tf.keras.initializers.Zeros())
	self.dense2 = tf.keras.layers.Dense(units=in_size,
	activation=None,
	kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=self.init_scale),
	bias_initializer=tf.keras.initializers.Zeros())
	super(FeedFowardAdapterLayer, self).build(input_shape) # Be sure to call this at the end

	def call(self, x):
	return x + self.dense2(self.dense1(x))

	def compute_output_shape(self, input_shape):
	return input_shape