Skip to content

Instantly share code, notes, and snippets.

@chiragjn
Last active July 29, 2019 10:08
Show Gist options
  • Save chiragjn/8c4c2c1213224c1154d6b341c5cc09a7 to your computer and use it in GitHub Desktop.
Save chiragjn/8c4c2c1213224c1154d6b341c5cc09a7 to your computer and use it in GitHub Desktop.
Adapter FF layer as described in Adapter-bert
import numpy as np
import tensorflow as tf
K = tf.keras.backend
def gelu(x):
"""Gaussian Error Linear Unit.
This is a smoother version of the RELU.
Original paper: https://arxiv.org/abs/1606.08415
Args:
x: float Tensor to perform activation.
Returns:
`x` with the GELU activation applied.
"""
cdf = 0.5 * (1.0 + tf.tanh((np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3)))))
return x * cdf
class FeedFowardAdapterLayer(tf.keras.layers.Layer):
def __init__(self, hidden_size=64, init_scale=1e-3, **kwargs):
self.hidden_size = hidden_size
self.init_scale = init_scale
super(FeedFowardAdapterLayer, self).__init__(**kwargs)
def build(self, input_shape):
in_size = input_shape[1]
self.dense1 = tf.keras.layers.Dense(units=self.hidden_size,
activation=gelu,
kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=self.init_scale),
bias_initializer=tf.keras.initializers.Zeros())
self.dense2 = tf.keras.layers.Dense(units=in_size,
activation=None,
kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=self.init_scale),
bias_initializer=tf.keras.initializers.Zeros())
super(FeedFowardAdapterLayer, self).build(input_shape) # Be sure to call this at the end
def call(self, x):
return x + self.dense2(self.dense1(x))
def compute_output_shape(self, input_shape):
return input_shape
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment