Dominique Luna domluna

Transformer notes

current models have trouble learning dependencies over distance (i.e. between characters/words), # ops scale O(n) or O(log n).
transformer is O(1) in number of ops
encoder-decoder with residual conns. Encoder/decodes feed into themselves N times.
We also modify the self-attention sub-layer in the decoder stack to prevent positions from attending to subsequent positions. This masking, combined with fact that the output embeddings are offset by one position, **ensures that the predictions for position i can depend only on the known outputs at positions less than i **.

def subsequent_mask(size):

	export @esc, isexpr, isline, rmlines, unblock, block, inexpr, namify, isdef,
	longdef, shortdef, @expand, makeif, prettify, splitdef, splitarg

	"""
	assoc!(d, k, v)

	is the same as `d[k] = v` but returns `d` rather than `v`.
	"""
	assoc!(d, k, v) = (d[k] = v; d)

	import tensorflow as tf
	import numpy as np


	x = tf.constant(np.random.randn(1, 4, 4, 2), dtype=tf.float32)
	# TODO: Use `tf.layers.conv2d_transpose` to return a tensor
	# with the shape (1, 8, 8, 5)
	conv = 0

	with tf.Session() as sess:

	"""
	Load SavedModel

	Output graphdef and checkpoint files
	"""

	import tensorflow as tf
	import argparse
	import sys

	# Required downloads:
	# NVIDIA-Linux-x86_64-367.27.run
	# cuda_8.0.27_linux.run
	# cudnn-8.0-linux-x64-v5.0-ga.tgz

	sudo apt-get install build-essential
	sudo apt-get install linux-image-extra-`uname -r`
	sudo ./NVIDIA-Linux-x86_64-367.27.run
	./cuda_8.0.27_linux.run --extract=`pwd`/extracts
	sudo ./extracts/cuda-linux64-rel-8.0.27-20733550.run

	import tensorflow as tf
	import numpy as np

	w = np.arange(1, 10, dtype=np.float32).reshape((3,3,1,1))
	f = tf.Variable(tf.constant(w))
	input = tf.placeholder(tf.float32, (None, 28, 28, 1))
	conv = tf.nn.conv2d(input, f, [1,2,2,1], 'SAME')

	s = tf.Session()
	x = np.zeros((28, 28), dtype=np.float32)

	name: CarND-Term1
	channels:
	- https://conda.anaconda.org/menpo
	dependencies:
	- python==3.5.2
	- numpy
	- matplotlib
	- tensorflow
	- jupyter
	- opencv3