Skip to content

Instantly share code, notes, and snippets.

@Lanme
Last active November 1, 2018 06:46
Show Gist options
  • Save Lanme/236b9563f69c11b42c4e4cbff248a168 to your computer and use it in GitHub Desktop.
Save Lanme/236b9563f69c11b42c4e4cbff248a168 to your computer and use it in GitHub Desktop.
quick_code_to_tensorflow
###tf.contrib.seq2seq.AttentionWrapper封装好了attn层
def task_specific_attention(self, inputs, output_size,
initializer=layers.xavier_initializer(),
activation_fn=tf.tanh, scope=None):
"""
multiply为加性模型
Performs task-specific attention reduction, using learned
attention context vector (constant within task of interest).
Args:
inputs: Tensor of shape [batch_size, units, input_size]
`input_size` must be static (known)
`units` axis will be attended over (reduced from output)
`batch_size` will be preserved
output_size: Size of output's inner (feature) dimension
Returns:
outputs: Tensor of shape [batch_size, output_dim].
"""
assert len(inputs.get_shape()) == 3 and inputs.get_shape()[-1].value is not None
with tf.variable_scope(scope or 'attention') as scope:
# u_w, attention 向量
attention_context_vector = tf.get_variable(name='attention_context_vector', shape=[output_size],
initializer=initializer, dtype=tf.float32)
# 全连接层,把 h_i 转为 u_i , shape= [batch_size, units, input_size] -> [batch_size, units, output_size]
input_projection = layers.fully_connected(inputs, output_size, activation_fn=activation_fn, scope=scope)
# 输出 [batch_size, units]
vector_attn = tf.reduce_sum(tf.multiply(input_projection, attention_context_vector), axis=2, keep_dims=True)
attention_weights = tf.nn.softmax(vector_attn, dim=1)
tf.summary.histogram('attention_weigths', attention_weights)
weighted_projection = tf.multiply(inputs, attention_weights)
outputs = tf.reduce_sum(weighted_projection, axis=1)
return outputs # 输出 [batch_size, hidden_size*2]
def batchnorm(self, Ylogits, offset, convolutional=False):
"""batchnormalization.
Args:
Ylogits: 1D向量或者是3D的卷积结果。
num_updates: 迭代的global_step
offset:表示beta,全局均值;在 RELU 激活中一般初始化为 0.1。
scale:表示lambda,全局方差;在 sigmoid 激活中需要,这 RELU 激活中作用不大。
m: 表示batch均值;v:表示batch方差。
bnepsilon:一个很小的浮点数,防止除以 0.
Returns:
Ybn: 和 Ylogits 的维度一样,就是经过 Batch Normalization 处理的结果。
update_moving_everages:更新mean和variance,主要是给最后的 test 使用。
"""
exp_moving_avg = tf.train.ExponentialMovingAverage(0.999,
self._global_step) # adding the iteration prevents from averaging across non-existing iterations
bnepsilon = 1e-5
if convolutional:
mean, variance = tf.nn.moments(Ylogits, [0, 1, 2])
else:
mean, variance = tf.nn.moments(Ylogits, [0])
update_moving_everages = exp_moving_avg.apply([mean, variance])
m = tf.cond(self.tst, lambda: exp_moving_avg.average(mean), lambda: mean)
v = tf.cond(self.tst, lambda: exp_moving_avg.average(variance), lambda: variance)
Ybn = tf.nn.batch_normalization(Ylogits, m, v, offset, None, bnepsilon)
return Ybn, update_moving_everages
def bi_gru(self, inputs):
"""build the bi-GRU network. 返回个所有层的隐含状态。"""
cells_fw = [self.gru_cell() for _ in range(self.n_layer)]
cells_bw = [self.gru_cell() for _ in range(self.n_layer)]
initial_states_fw = [cell_fw.zero_state(self.batch_size, tf.float32) for cell_fw in cells_fw]
initial_states_bw = [cell_bw.zero_state(self.batch_size, tf.float32) for cell_bw in cells_bw]
outputs, _, _ = rnn.stack_bidirectional_dynamic_rnn(cells_fw, cells_bw, inputs,
initial_states_fw=initial_states_fw,
initial_states_bw=initial_states_bw, dtype=tf.float32)
return outputs
from collections import Counter
def build_vocab(corpus):
"""
Build a vocabulary with word frequencies for an entire corpus.
Returns a dictionary `w -> (i, f)`, mapping word strings to pairs of
word ID and word corpus frequency.
"""
vocab = Counter()
for line in corpus:
tokens = line.strip().split()
vocab.update(tokens)
return {word: (i, freq) for i, (word, freq) in enumerate(vocab.items())}
import tqdm
def gen_batch(X, y, batch_size=128):
sample_num = len(X)
for start in tqdm(xrange(0, sample_num, batch_size)):
end = min(start + batch_size, sample_num)
X_batch = X[start:end]
y_batch = y[start:end]
return X_batch,y_batch
loss = your regular output loss
l2 = lambda_l2_reg * sum(
tf.nn.l2_loss(tf_var)
for tf_var in tf.trainable_variables()
if not ("noreg" in tf_var.name or "Bias" in tf_var.name)
)
loss += l2
def to_categorical(topics):
"""topics:array,start from 0"""
n_topics = len(set(topics))
n_sample = len(topics)
y = np.zeros(shape=(n_sample, n_topics))
for i in range(n_sample):
topic_index = topics[i]
y[i,topic_index] = 1
return y
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment