Last active
August 29, 2015 14:11
-
-
Save rizar/cc62fb9d6270f9856793 to your computer and use it in GitHub Desktop.
Data Streams
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Data(object): | |
"""Super abstract class for all data sources. | |
So far does not have neither methods nor attributes, | |
but perhaps it can provide some sort-of signature, | |
e.g. channel names. | |
""" | |
pass | |
class SequentialData(Data): | |
"""Data that provides sequential access only.""" | |
@abstractmethod | |
def iterator(self, state): | |
"""Returns iterator over the data that starts from the `state`. | |
Examples of possible semantics for the `state` include | |
a random number generator, an index. | |
""" | |
pass | |
class FiniteSequentialData(SequentialData): | |
"""Sequential data consisting of finite number of elements.""" | |
@abstractmethod | |
def num_elements(self): | |
pass | |
class IndexedData(Data): | |
"""Data with random access to the elements.""" | |
@abstractmethod | |
def __getitem__(self, key): | |
pass | |
class FiniteIndexedData(FiniteSequentialData, IndexedData): | |
"""Finite data with random access to the elements. | |
.. todo:: | |
Implement __iter__ using __getitem__. | |
""" | |
pass | |
class MNIST(FiniteIndexedData): | |
"""MNIST dataset of handwritten digits.""" | |
pass | |
class FetchingScheme(object): | |
"""Super abstract class for fetching schemes. | |
The fetching schemes encapsulates the logic of batch building | |
and the current state of iteration. | |
""" | |
def __next__(self): | |
"""Returns an instruction on how to compose the next batch""". | |
pass | |
class DataStream(object): | |
"""Provides access to the training data for the `TrainingLoop`. | |
`DataStream` is an abstract stream of batches of training data. | |
The batches are semantically grouped into epochs. The `__iter__` | |
provides access to the next epoch data. If `__iter__` is called | |
before the current epoch is over, the returned iterator will yield | |
the data yet unseen. | |
Data streams can be chained into pipeline or even organized in a tree. | |
The leaves of the tree fetch data from a `Data` instance, | |
the others request it from their children data streams. | |
Every stream has a fetching scheme, which encapsulates parameters of data | |
fetching mechanism and its current state. The scheme is a separate | |
entity in order to make syncronizing of several data schemes | |
using different data possible. | |
A data stream can perform advanced data manipulation (e.g. caching). | |
For this reason sometimes it is impossible to pickle without losing | |
the iteration state. The `is_picklable` method is available to query | |
if the data stream can be pickled at a particular stage of iteration. | |
Attributes | |
---------- | |
scheme : instance of :class:`FetchingScheme` | |
The fetching scheme. | |
Parameters | |
---------- | |
scheme : instance of :class:`FetchingScheme` | |
The fetching scheme. | |
""" | |
def __init__(self, scheme): | |
self.scheme = scheme | |
@abstractmethod | |
def next_batch(self): | |
"""Returns the next batch in the stream. | |
In the epoch is over, returns None and automatically | |
switches to the next epoch. | |
""" | |
pass | |
def __iter__(self): | |
"""Returns iterator over the next epoch batches.""" | |
return EpochIterator(self) | |
def is_picklable(self): | |
"""Reports whether the stream can be loselessly pickled. | |
Returns | |
------- | |
status : bool | |
True, if after pickling and unpickling the data stream state | |
will be fully preserved. False otherwise. | |
""" | |
pass | |
class EpochIterator(object): | |
def __init__(self, data_stream): | |
self.data_stream = data_stream | |
def next(self): | |
batch = self.data_stream.next_batch | |
if not batch: | |
raise StopIteration() | |
class BatchDataStream(DataStream): | |
"""Groups data into batches. | |
Supporting this interface a class declares that its __next__ | |
method returns batch size. | |
""" | |
pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment