Skip to content

Instantly share code, notes, and snippets.

@khuangaf
Created January 22, 2018 03:36
Show Gist options
  • Save khuangaf/1e73d0c79183e25247c432905a28b15b to your computer and use it in GitHub Desktop.
Save khuangaf/1e73d0c79183e25247c432905a28b15b to your computer and use it in GitHub Desktop.
class PortfolioEnv(gym.Env):
def _step(self, action):
"""
Step the env.
Actions should be portfolio [w0...]
- Where wn is a portfolio weight between 0 and 1. The first (w0) is cash_bias
- cn is the portfolio conversion weights see PortioSim._step for description
"""
logger.debug('action: %s', action)
weights = np.clip(action, 0.0, 1.0)
weights /= weights.sum() + eps
# Sanity checks
assert self.action_space.contains(
action), 'action should be within %r but is %r' % (self.action_space, action)
np.testing.assert_almost_equal(
np.sum(weights), 1.0, 3, err_msg='weights should sum to 1. action="%s"' % weights)
history, y1, done1 = self.src._step()
reward, info, done2 = self.sim._step(weights, y1)
# calculate return for buy and hold a bit of each asset
info['market_value'] = np.cumprod(
[inf["market_return"] for inf in self.infos + [info]])[-1]
# add dates
info['date'] = self.src.times[self.src.step].timestamp()
info['steps'] = self.src.step
self.infos.append(info)
# reshape history according to output mode
if self.output_mode == 'EIIE':
pass
elif self.output_mode == 'atari':
padding = history.shape[1] - history.shape[0]
history = np.pad(history, [[0, padding], [
0, 0], [0, 0]], mode='constant')
elif self.output_mode == 'mlp':
history = history.flatten()
return {'history': history, 'weights': weights}, reward, done1 or done2, info
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment