cjratcliff · June 29, 2017 09:24
diff --git a/lstm.py b/lstm.py
 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

 import collections
 import hashlib
 import numbers

 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.layers import base as base_layer
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import clip_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variable_scope as vs
 from tensorflow.python.ops import variables as tf_variables
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import nest


 _BIAS_VARIABLE_NAME = "bias"
 _WEIGHTS_VARIABLE_NAME = "kernel"


 def _like_rnncell(cell):
  """Checks that a given object is an RNNCell by using duck typing."""
  conditions = [hasattr(cell, "output_size"), hasattr(cell, "state_size"),
                hasattr(cell, "zero_state"), callable(cell)]
  return all(conditions)


 def _concat(prefix, suffix, static=False):
  """Concat that enables int, Tensor, or TensorShape values.

  This function takes a size specification, which can be an integer, a
  TensorShape, or a Tensor, and converts it into a concatenated Tensor
  (if static = False) or a list of integers (if static = True).

  Args:
    prefix: The prefix; usually the batch size (and/or time step size).
      (TensorShape, int, or Tensor.)
    suffix: TensorShape, int, or Tensor.
    static: If `True`, return a python list with possibly unknown dimensions.
      Otherwise return a `Tensor`.

  Returns:
    shape: the concatenation of prefix and suffix.

  Raises:
    ValueError: if `suffix` is not a scalar or vector (or TensorShape).
    ValueError: if prefix or suffix was `None` and asked for dynamic
      Tensors out.
  """
  if isinstance(prefix, ops.Tensor):
    p = prefix
    p_static = tensor_util.constant_value(prefix)
    if p.shape.ndims == 0:
      p = array_ops.expand_dims(p, 0)
    elif p.shape.ndims != 1:
      raise ValueError("prefix tensor must be either a scalar or vector, "
                       "but saw tensor: %s" % p)
  else:
    p = tensor_shape.as_shape(prefix)
    p_static = p.as_list() if p.ndims is not None else None
    p = (constant_op.constant(p.as_list(), dtype=dtypes.int32)
         if p.is_fully_defined() else None)
  if isinstance(suffix, ops.Tensor):
    s = suffix
    s_static = tensor_util.constant_value(suffix)
    if s.shape.ndims == 0:
      s = array_ops.expand_dims(s, 0)
    elif s.shape.ndims != 1:
      raise ValueError("suffix tensor must be either a scalar or vector, "
                       "but saw tensor: %s" % s)
  else:
    s = tensor_shape.as_shape(suffix)
    s_static = s.as_list() if s.ndims is not None else None
    s = (constant_op.constant(s.as_list(), dtype=dtypes.int32)
         if s.is_fully_defined() else None)

  if static:
    shape = tensor_shape.as_shape(p_static).concatenate(s_static)
    shape = shape.as_list() if shape.ndims is not None else None
  else:
    if p is None or s is None:
      raise ValueError("Provided a prefix or suffix of None: %s and %s"
                       % (prefix, suffix))
    shape = array_ops.concat((p, s), 0)
  return shape


 def _zero_state_tensors(state_size, batch_size, dtype):
  """Create tensors of zeros based on state_size, batch_size, and dtype."""
  def get_state_shape(s):
    """Combine s with batch_size to get a proper tensor shape."""
    c = _concat(batch_size, s)
    c_static = _concat(batch_size, s, static=True)
    size = array_ops.zeros(c, dtype=dtype)
    size.set_shape(c_static)
    return size
  return nest.map_structure(get_state_shape, state_size)


 class RNNCell(base_layer.Layer):

  def __call__(self, inputs, state, scope=None):
    if scope is not None:
      with vs.variable_scope(scope,
                             custom_getter=self._rnn_get_variable) as scope:
        return super(RNNCell, self).__call__(inputs, state, scope=scope)
    else:
      with vs.variable_scope(vs.get_variable_scope(),
                             custom_getter=self._rnn_get_variable):
        return super(RNNCell, self).__call__(inputs, state)

  def _rnn_get_variable(self, getter, *args, **kwargs):
    variable = getter(*args, **kwargs)
    trainable = (variable in tf_variables.trainable_variables() or
                 (isinstance(variable, tf_variables.PartitionedVariable) and
                  list(variable)[0] in tf_variables.trainable_variables()))
    if trainable and variable not in self._trainable_weights:
      self._trainable_weights.append(variable)
    elif not trainable and variable not in self._non_trainable_weights:
      self._non_trainable_weights.append(variable)
    return variable

  @property
  def state_size(self):
    raise NotImplementedError("Abstract method")

  @property
  def output_size(self):
    """Integer or TensorShape: size of outputs produced by this cell."""
    raise NotImplementedError("Abstract method")

  def build(self, _):
    # This tells the parent Layer object that it's OK to call
    # self.add_variable() inside the call() method.
    pass

  def zero_state(self, batch_size, dtype):
    with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]):
      state_size = self.state_size
      return _zero_state_tensors(state_size, batch_size, dtype)


 _LSTMStateTuple = collections.namedtuple("LSTMStateTuple", ("c", "h"))


 class LSTMStateTuple(_LSTMStateTuple):
  """Tuple used by LSTM Cells for `state_size`, `zero_state`, and output state.
  Stores two elements: `(c, h)`, in that order.
  Only used when `state_is_tuple=True`.
  """
  __slots__ = ()

  @property
  def dtype(self):
    (c, h) = self
    if c.dtype != h.dtype:
      raise TypeError("Inconsistent internal state: %s vs %s" %
                      (str(c.dtype), str(h.dtype)))
    return c.dtype


 class CustomLSTMCell(RNNCell):

  def __init__(self, num_units, forget_bias=1.0,
               state_is_tuple=True, activation=None, reuse=None):

    super(CustomLSTMCell, self).__init__(_reuse=reuse)
    if not state_is_tuple:
      logging.warn("%s: Using a concatenated state is slower and will soon be "
                   "deprecated.  Use state_is_tuple=True.", self)
    self._num_units = num_units
    self._forget_bias = forget_bias
    self._state_is_tuple = state_is_tuple
    self._activation = activation or math_ops.tanh

  @property
  def state_size(self):
    return (LSTMStateTuple(self._num_units, self._num_units)
            if self._state_is_tuple else 2 * self._num_units)

  @property
  def output_size(self):
    return self._num_units

  def call(self, inputs, state):
    sigmoid = math_ops.sigmoid
    # Parameters of gates are concatenated into one multiply for efficiency.
    if self._state_is_tuple:
      c, h = state
    else:
      c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1)

    concat = _linear([inputs, h], 4 * self._num_units, True)

    # i = input_gate, j = new_input, f = forget_gate, o = output_gate
    i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1)

    new_c = (
        c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j))
    new_h = self._activation(new_c) * sigmoid(o)

    if self._state_is_tuple:
      new_state = LSTMStateTuple(new_c, new_h)
    else:
      new_state = array_ops.concat([new_c, new_h], 1)
    return new_h, new_state


 def _linear(args,
            output_size,
            bias,
            bias_initializer=None,
            kernel_initializer=None):

  if args is None or (nest.is_sequence(args) and not args):
    raise ValueError("`args` must be specified")
  if not nest.is_sequence(args):
    args = [args]

  # Calculate the total size of arguments on dimension 1.
  total_arg_size = 0
  shapes = [a.get_shape() for a in args]
  for shape in shapes:
    if shape.ndims != 2:
      raise ValueError("linear is expecting 2D arguments: %s" % shapes)
    if shape[1].value is None:
      raise ValueError("linear expects shape[1] to be provided for shape %s, "
                       "but saw %s" % (shape, shape[1]))
    else:
      total_arg_size += shape[1].value

  dtype = [a.dtype for a in args][0]

  # Now the computation.
  scope = vs.get_variable_scope()
  with vs.variable_scope(scope) as outer_scope:
    weights = vs.get_variable(
        _WEIGHTS_VARIABLE_NAME, [total_arg_size, output_size],
        dtype=dtype,
        initializer=kernel_initializer)
    if len(args) == 1:
      res = math_ops.matmul(args[0], weights)
    else:
      res = math_ops.matmul(array_ops.concat(args, 1), weights)
    if not bias:
      return res
    with vs.variable_scope(outer_scope) as inner_scope:
      inner_scope.set_partitioner(None)
      if bias_initializer is None:
        bias_initializer = init_ops.constant_initializer(0.0, dtype=dtype)
      biases = vs.get_variable(
          _BIAS_VARIABLE_NAME, [output_size],
          dtype=dtype,
          initializer=bias_initializer)
    return nn_ops.bias_add(res, biases)
	# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ==============================================================================
	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function

	import collections
	import hashlib
	import numbers

	from tensorflow.python.framework import constant_op
	from tensorflow.python.framework import dtypes
	from tensorflow.python.framework import ops
	from tensorflow.python.framework import tensor_shape
	from tensorflow.python.framework import tensor_util
	from tensorflow.python.layers import base as base_layer
	from tensorflow.python.ops import array_ops
	from tensorflow.python.ops import clip_ops
	from tensorflow.python.ops import init_ops
	from tensorflow.python.ops import math_ops
	from tensorflow.python.ops import nn_ops
	from tensorflow.python.ops import partitioned_variables
	from tensorflow.python.ops import random_ops
	from tensorflow.python.ops import variable_scope as vs
	from tensorflow.python.ops import variables as tf_variables
	from tensorflow.python.platform import tf_logging as logging
	from tensorflow.python.util import nest


	_BIAS_VARIABLE_NAME = "bias"
	_WEIGHTS_VARIABLE_NAME = "kernel"


	def _like_rnncell(cell):
	"""Checks that a given object is an RNNCell by using duck typing."""
	conditions = [hasattr(cell, "output_size"), hasattr(cell, "state_size"),
	hasattr(cell, "zero_state"), callable(cell)]
	return all(conditions)


	def _concat(prefix, suffix, static=False):
	"""Concat that enables int, Tensor, or TensorShape values.

	This function takes a size specification, which can be an integer, a
	TensorShape, or a Tensor, and converts it into a concatenated Tensor
	(if static = False) or a list of integers (if static = True).

	Args:
	prefix: The prefix; usually the batch size (and/or time step size).
	(TensorShape, int, or Tensor.)
	suffix: TensorShape, int, or Tensor.
	static: If `True`, return a python list with possibly unknown dimensions.
	Otherwise return a `Tensor`.

	Returns:
	shape: the concatenation of prefix and suffix.

	Raises:
	ValueError: if `suffix` is not a scalar or vector (or TensorShape).
	ValueError: if prefix or suffix was `None` and asked for dynamic
	Tensors out.
	"""
	if isinstance(prefix, ops.Tensor):
	p = prefix
	p_static = tensor_util.constant_value(prefix)
	if p.shape.ndims == 0:
	p = array_ops.expand_dims(p, 0)
	elif p.shape.ndims != 1:
	raise ValueError("prefix tensor must be either a scalar or vector, "
	"but saw tensor: %s" % p)
	else:
	p = tensor_shape.as_shape(prefix)
	p_static = p.as_list() if p.ndims is not None else None
	p = (constant_op.constant(p.as_list(), dtype=dtypes.int32)
	if p.is_fully_defined() else None)
	if isinstance(suffix, ops.Tensor):
	s = suffix
	s_static = tensor_util.constant_value(suffix)
	if s.shape.ndims == 0:
	s = array_ops.expand_dims(s, 0)
	elif s.shape.ndims != 1:
	raise ValueError("suffix tensor must be either a scalar or vector, "
	"but saw tensor: %s" % s)
	else:
	s = tensor_shape.as_shape(suffix)
	s_static = s.as_list() if s.ndims is not None else None
	s = (constant_op.constant(s.as_list(), dtype=dtypes.int32)
	if s.is_fully_defined() else None)

	if static:
	shape = tensor_shape.as_shape(p_static).concatenate(s_static)
	shape = shape.as_list() if shape.ndims is not None else None
	else:
	if p is None or s is None:
	raise ValueError("Provided a prefix or suffix of None: %s and %s"
	% (prefix, suffix))
	shape = array_ops.concat((p, s), 0)
	return shape


	def _zero_state_tensors(state_size, batch_size, dtype):
	"""Create tensors of zeros based on state_size, batch_size, and dtype."""
	def get_state_shape(s):
	"""Combine s with batch_size to get a proper tensor shape."""
	c = _concat(batch_size, s)
	c_static = _concat(batch_size, s, static=True)
	size = array_ops.zeros(c, dtype=dtype)
	size.set_shape(c_static)
	return size
	return nest.map_structure(get_state_shape, state_size)


	class RNNCell(base_layer.Layer):

	def __call__(self, inputs, state, scope=None):
	if scope is not None:
	with vs.variable_scope(scope,
	custom_getter=self._rnn_get_variable) as scope:
	return super(RNNCell, self).__call__(inputs, state, scope=scope)
	else:
	with vs.variable_scope(vs.get_variable_scope(),
	custom_getter=self._rnn_get_variable):
	return super(RNNCell, self).__call__(inputs, state)

	def _rnn_get_variable(self, getter, args, *kwargs):
	variable = getter(args, *kwargs)
	trainable = (variable in tf_variables.trainable_variables() or
	(isinstance(variable, tf_variables.PartitionedVariable) and
	list(variable)[0] in tf_variables.trainable_variables()))
	if trainable and variable not in self._trainable_weights:
	self._trainable_weights.append(variable)
	elif not trainable and variable not in self._non_trainable_weights:
	self._non_trainable_weights.append(variable)
	return variable

	@property
	def state_size(self):
	raise NotImplementedError("Abstract method")

	@property
	def output_size(self):
	"""Integer or TensorShape: size of outputs produced by this cell."""
	raise NotImplementedError("Abstract method")

	def build(self, _):
	# This tells the parent Layer object that it's OK to call
	# self.add_variable() inside the call() method.
	pass

	def zero_state(self, batch_size, dtype):
	with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]):
	state_size = self.state_size
	return _zero_state_tensors(state_size, batch_size, dtype)


	_LSTMStateTuple = collections.namedtuple("LSTMStateTuple", ("c", "h"))


	class LSTMStateTuple(_LSTMStateTuple):
	"""Tuple used by LSTM Cells for `state_size`, `zero_state`, and output state.
	Stores two elements: `(c, h)`, in that order.
	Only used when `state_is_tuple=True`.
	"""
	__slots__ = ()

	@property
	def dtype(self):
	(c, h) = self
	if c.dtype != h.dtype:
	raise TypeError("Inconsistent internal state: %s vs %s" %
	(str(c.dtype), str(h.dtype)))
	return c.dtype


	class CustomLSTMCell(RNNCell):

	def __init__(self, num_units, forget_bias=1.0,
	state_is_tuple=True, activation=None, reuse=None):

	super(CustomLSTMCell, self).__init__(_reuse=reuse)
	if not state_is_tuple:
	logging.warn("%s: Using a concatenated state is slower and will soon be "
	"deprecated. Use state_is_tuple=True.", self)
	self._num_units = num_units
	self._forget_bias = forget_bias
	self._state_is_tuple = state_is_tuple
	self._activation = activation or math_ops.tanh

	@property
	def state_size(self):
	return (LSTMStateTuple(self._num_units, self._num_units)
	if self._state_is_tuple else 2 * self._num_units)

	@property
	def output_size(self):
	return self._num_units

	def call(self, inputs, state):
	sigmoid = math_ops.sigmoid
	# Parameters of gates are concatenated into one multiply for efficiency.
	if self._state_is_tuple:
	c, h = state
	else:
	c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1)

	concat = _linear([inputs, h], 4 * self._num_units, True)

	# i = input_gate, j = new_input, f = forget_gate, o = output_gate
	i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1)

	new_c = (
	c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j))
	new_h = self._activation(new_c) * sigmoid(o)

	if self._state_is_tuple:
	new_state = LSTMStateTuple(new_c, new_h)
	else:
	new_state = array_ops.concat([new_c, new_h], 1)
	return new_h, new_state


	def _linear(args,
	output_size,
	bias,
	bias_initializer=None,
	kernel_initializer=None):

	if args is None or (nest.is_sequence(args) and not args):
	raise ValueError("`args` must be specified")
	if not nest.is_sequence(args):
	args = [args]

	# Calculate the total size of arguments on dimension 1.
	total_arg_size = 0
	shapes = [a.get_shape() for a in args]
	for shape in shapes:
	if shape.ndims != 2:
	raise ValueError("linear is expecting 2D arguments: %s" % shapes)
	if shape[1].value is None:
	raise ValueError("linear expects shape[1] to be provided for shape %s, "
	"but saw %s" % (shape, shape[1]))
	else:
	total_arg_size += shape[1].value

	dtype = [a.dtype for a in args][0]

	# Now the computation.
	scope = vs.get_variable_scope()
	with vs.variable_scope(scope) as outer_scope:
	weights = vs.get_variable(
	_WEIGHTS_VARIABLE_NAME, [total_arg_size, output_size],
	dtype=dtype,
	initializer=kernel_initializer)
	if len(args) == 1:
	res = math_ops.matmul(args[0], weights)
	else:
	res = math_ops.matmul(array_ops.concat(args, 1), weights)
	if not bias:
	return res
	with vs.variable_scope(outer_scope) as inner_scope:
	inner_scope.set_partitioner(None)
	if bias_initializer is None:
	bias_initializer = init_ops.constant_initializer(0.0, dtype=dtype)
	biases = vs.get_variable(
	_BIAS_VARIABLE_NAME, [output_size],
	dtype=dtype,
	initializer=bias_initializer)
	return nn_ops.bias_add(res, biases)