Last active
February 4, 2018 19:40
-
-
Save hamx0r/ebaeab00c0039ccf07104bdc64bc072f to your computer and use it in GitHub Desktop.
Zipline Data Source which pulls from Memecache
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" MemcacheDataSource Class to pull data from memcache on demand for a simulation, minimizing data load time for a | |
simulation, and allowing Memcached stock data to be shared by multiple parallel simulations. | |
The MemCacheData class implements methods to act in compliance with the `zipline.algorithm.TradingAlgorithm.sources` | |
object: https://github.com/quantopian/zipline/blob/0.9.0/zipline/algorithm.py | |
This means implementing the following methods (with line numbers per v0.9.0 linked above): | |
* `sids` (Line 562) | |
* iterator (or generator) methods (439) like: | |
- '__iter__' | |
- 'next' | |
This implies the need for each memcached object to behave like the "data source" provided by | |
`zipline.sources.data_source.DataSource` | |
A fine example by cowmoo of a custom Data Source was followed: | |
https://github.com/cowmoo/zipline/blob/a139d0fec287be330dc6a91a3d2f3b1f96bf2fe2/zipline/examples/custom_data_source.py | |
However, as cowmoo's example is a bit old, there are some changes needed. Namely, the `sids` must be mapped to `symbols` outside of | |
this datasource class and then passed in (see `__init__`). Also the `TradingEnvironment` object is now used instead of how cowmoo did it. | |
Using MemcacheDataSource requires the following memcached data: | |
* YYYYMMDDHHmm Timestamp key in format of `KEY_DATE_FORMAT` (YYYYMMDDHHmm) must have value | |
To minimize loading a list of symbols from disk or database, I also memcached: | |
* `symbols_idx` key must have value of a python list of all stock symbols in your "universe" | |
See bottom `if __name__...` code for an example of how to use this in an algo. | |
""" | |
__author__ = 'hamx0r' | |
from zipline.sources.data_source import DataSource | |
from zipline.finance import trading | |
import pandas as pd | |
KEY_DATE_FORMAT = '%Y%m%d%H%M' | |
class MemcacheDataSource(DataSource): | |
"""This is a custom data source that generates random quotes with random | |
quote as an demonstration to how to create a custom data source | |
""" | |
environment = trading.TradingEnvironment() | |
def __init__(self, symbols, sids, bars, start, end): | |
""" Constructor for the data source | |
Parameters | |
---------- | |
symbols : array | |
Symbols to simulate the custom data source on | |
bars : string | |
'minute' or 'daily' | |
start : pd.Timestamp | |
start date of data source | |
end: pd.Timestamp | |
end date of data sourceS | |
""" | |
self._raw_data = None | |
self.symbols = symbols | |
#self.sids = sids | |
self.sids = sids | |
self.start = start | |
self.end = end | |
self.bars = bars | |
@property | |
def mapping(self): | |
return { | |
'dt': (lambda x: x, 'dt'), | |
'sid': (lambda x: x, 'sid'), | |
'price': (float, 'price'), | |
'volume': (int, 'volume'), | |
} | |
@property | |
def instance_hash(self): | |
return "MemcacheDataSource" | |
def raw_data_gen(self): | |
""" The generator function that is used by zipline to iterate through the custom | |
data source, modify code here to connect to a database or parse through a file | |
""" | |
# figure out the frequency of the data source | |
if self.bars == 'daily': | |
freq = pd.datetools.BDay() | |
elif self.bars == 'minute': | |
freq = pd.datetools.Minute() | |
else: | |
raise ValueError('%s bars not understood.' % self.bars) | |
# figure out trading days in the given date range | |
days = self.environment.days_in_range(self.start, self.end) | |
if self.bars == 'daily': | |
index = days | |
if self.bars == 'minute': | |
index = pd.DatetimeIndex([], freq=freq) | |
for day in days: | |
day_index = self.environment.market_minutes_for_day(day) #generate the trading minutes in the given day | |
index = index.append(day_index) | |
# iterate through the available trading interval in this data source's date range | |
for minute in index: | |
prices = mc_client.get(minute.strftime(KEY_DATE_FORMAT)) | |
for idx, symbol in enumerate(self.symbols): | |
price = prices[idx] | |
# yield the data event to zipline backtester thread | |
to_yield = {'dt': minute, # timestamp (e.g., 2013-12-10 00:00:00+00:00) | |
'sid': symbol, # symbol (e.g., AAPL) | |
'price': price, | |
'volume': 1e9 # Choose a large volume if you don't have data available | |
} | |
yield to_yield | |
@property | |
def raw_data(self): | |
if not self._raw_data: | |
self._raw_data = self.raw_data_gen() | |
return self._raw_data | |
if __name__ == '__main__': | |
from zipline.algorithm import TradingAlgorithm | |
stocks = mc_client.get('stocks_idx') | |
start = datetime(2016, 1, 1, 0, 0, 0, 0, pytz.utc) | |
end = datetime(2016, 3, 1, 0, 0, 0, 0, pytz.utc) | |
sim_params = factory.create_simulation_parameters(start=start, end=end) | |
# set this backtest to have minute bars | |
sim_params.emission_rate = 'minute' | |
sim_params.data_frequency = 'minute' | |
sim_params.first_open = sim_params.period_start | |
sim_params.last_close = sim_params.period_end | |
sim_params.capital_base = 1e4 # set your starting cash here | |
# Here you'll refer to your custom algo class which must subclass `TradingAlgorithm` | |
algo = MyAlgo(sim_params) | |
sids = algo._write_and_map_id_index_to_sids( | |
stocks, start, | |
) | |
source = MemcacheDataSource(stocks, sids, 'minute', start, end) | |
results = algo.run(source) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment