rizar · August 29, 2015 14:11
diff --git a/data_streams.py b/data_streams.py
 class Data(object):
   """Super abstract class for all data sources. 
   
   So far does not have neither methods nor attributes, 
   but perhaps it can provide some sort-of signature, 
   e.g. channel names.
   
   """
   pass

 class SequentialData(Data):
   """Data that provides sequential access only."""
   
   @abstractmethod
   def iterator(self, state):
      """Returns iterator over the data that starts from the `state`.
      
      Examples of possible semantics for the `state` include
      a random number generator, an index.
      """
      pass
   
 class FiniteSequentialData(SequentialData):
   """Sequential data consisting of finite number of elements."""
   
   @abstractmethod
   def num_elements(self):
      pass

 class IndexedData(Data):
   """Data with random access to the elements."""
   
   @abstractmethod
   def __getitem__(self, key):
      pass

 class FiniteIndexedData(FiniteSequentialData, IndexedData):
   """Finite data with random access to the elements.
   
   .. todo:: 
   
      Implement __iter__ using __getitem__.
      
   """
   pass

 class MNIST(FiniteIndexedData):
   """MNIST dataset of handwritten digits."""
   pass

 class FetchingScheme(object):
   """Super abstract class for fetching schemes.
   
   The fetching schemes encapsulates the logic of batch building
   and the current state of iteration.
   
   """
   def __next__(self):
      """Returns an instruction on how to compose the next batch""".
      pass

 class DataStream(object):
   """Provides access to the training data for the `TrainingLoop`.
   
   `DataStream` is an abstract stream of batches of training data.
   The batches are semantically grouped into epochs. The `__iter__`
   provides access to the next epoch data. If `__iter__` is called
   before the current epoch is over, the returned iterator will yield 
   the data yet unseen.
   
   Data streams can be chained into pipeline or even organized in a tree. 
   The leaves of the tree fetch data from a `Data` instance, 
   the others request it from their children data streams. 
   
   Every stream has a fetching scheme, which encapsulates parameters of data
   fetching mechanism and its current state. The scheme is a separate
   entity in order to make syncronizing of several data schemes
   using different data possible.
   
   A data stream can perform advanced data manipulation (e.g. caching). 
   For this reason sometimes it is impossible to pickle without losing
   the iteration state. The `is_picklable` method is available to query
   if the data stream can be pickled at a particular stage of iteration.
   
   Attributes
   ----------
      scheme : instance of :class:`FetchingScheme`
         The fetching scheme.
         
   Parameters
   ----------
      scheme : instance of :class:`FetchingScheme`
         The fetching scheme.

   """
   def __init__(self, scheme):
      self.scheme = scheme
   
   @abstractmethod
   def next_batch(self):
      """Returns the next batch in the stream.
      
      In the epoch is over, returns None and automatically 
      switches to the next epoch.
      """
      pass
   
   def __iter__(self):
      """Returns iterator over the next epoch batches."""
      return EpochIterator(self)
   
   def is_picklable(self):
      """Reports whether the stream can be loselessly pickled.
      
      Returns
      -------
      status : bool
         True, if after pickling and unpickling the data stream state 
         will be fully preserved. False otherwise.
      """
      pass
   
 class EpochIterator(object):
   
   def __init__(self, data_stream):
      self.data_stream = data_stream
      
   def next(self):
      batch = self.data_stream.next_batch
      if not batch:
         raise StopIteration()
   
 class BatchDataStream(DataStream):  
   """Groups data into batches.
   
   Supporting this interface a class declares that its __next__
   method returns batch size.
   """
   pass
	class Data(object):
	"""Super abstract class for all data sources.

	So far does not have neither methods nor attributes,
	but perhaps it can provide some sort-of signature,
	e.g. channel names.

	"""
	pass

	class SequentialData(Data):
	"""Data that provides sequential access only."""

	@abstractmethod
	def iterator(self, state):
	"""Returns iterator over the data that starts from the `state`.

	Examples of possible semantics for the `state` include
	a random number generator, an index.
	"""
	pass

	class FiniteSequentialData(SequentialData):
	"""Sequential data consisting of finite number of elements."""

	@abstractmethod
	def num_elements(self):
	pass

	class IndexedData(Data):
	"""Data with random access to the elements."""

	@abstractmethod
	def __getitem__(self, key):
	pass

	class FiniteIndexedData(FiniteSequentialData, IndexedData):
	"""Finite data with random access to the elements.

	.. todo::

	Implement __iter__ using __getitem__.

	"""
	pass

	class MNIST(FiniteIndexedData):
	"""MNIST dataset of handwritten digits."""
	pass

	class FetchingScheme(object):
	"""Super abstract class for fetching schemes.

	The fetching schemes encapsulates the logic of batch building
	and the current state of iteration.

	"""
	def __next__(self):
	"""Returns an instruction on how to compose the next batch""".
	pass

	class DataStream(object):
	"""Provides access to the training data for the `TrainingLoop`.

	`DataStream` is an abstract stream of batches of training data.
	The batches are semantically grouped into epochs. The `__iter__`
	provides access to the next epoch data. If `__iter__` is called
	before the current epoch is over, the returned iterator will yield
	the data yet unseen.

	Data streams can be chained into pipeline or even organized in a tree.
	The leaves of the tree fetch data from a `Data` instance,
	the others request it from their children data streams.

	Every stream has a fetching scheme, which encapsulates parameters of data
	fetching mechanism and its current state. The scheme is a separate
	entity in order to make syncronizing of several data schemes
	using different data possible.

	A data stream can perform advanced data manipulation (e.g. caching).
	For this reason sometimes it is impossible to pickle without losing
	the iteration state. The `is_picklable` method is available to query
	if the data stream can be pickled at a particular stage of iteration.

	Attributes
	----------
	scheme : instance of :class:`FetchingScheme`
	The fetching scheme.

	Parameters
	----------
	scheme : instance of :class:`FetchingScheme`
	The fetching scheme.

	"""
	def __init__(self, scheme):
	self.scheme = scheme

	@abstractmethod
	def next_batch(self):
	"""Returns the next batch in the stream.

	In the epoch is over, returns None and automatically
	switches to the next epoch.
	"""
	pass

	def __iter__(self):
	"""Returns iterator over the next epoch batches."""
	return EpochIterator(self)

	def is_picklable(self):
	"""Reports whether the stream can be loselessly pickled.

	Returns
	-------
	status : bool
	True, if after pickling and unpickling the data stream state
	will be fully preserved. False otherwise.
	"""
	pass

	class EpochIterator(object):

	def __init__(self, data_stream):
	self.data_stream = data_stream

	def next(self):
	batch = self.data_stream.next_batch
	if not batch:
	raise StopIteration()

	class BatchDataStream(DataStream):
	"""Groups data into batches.

	Supporting this interface a class declares that its __next__
	method returns batch size.
	"""
	pass