khuangaf · January 22, 2018 03:08
diff --git a/PortfolioSim b/PortfolioSim
 class PortfolioSim(object):
  def _step(self, w1, y1):
    """
    Step.
    w1 - new action of portfolio weights - e.g. [0.1,0.9, 0.0]
    y1 - price relative vector also called return
        e.g. [1.0, 0.9, 1.1]
    Numbered equations are from https://arxiv.org/abs/1706.10059
    """
    w0 = self.w0
    p0 = self.p0

    dw1 = (y1 * w0) / (np.dot(y1, w0) + eps)  # (eq7) weights evolve into

    # (eq16) cost to change portfolio
    # (excluding change in cash to avoid double counting for transaction cost)
    c1 = self.cost * (
        np.abs(dw1[1:] - w1[1:])).sum()

    p1 = p0 * (1 - c1) * np.dot(y1, w0)  # (eq11) final portfolio value

    p1 = p1 * (1 - self.time_cost)  # we can add a cost to holding

    # can't have negative holdings in this model (no shorts)
    p1 = np.clip(p1, 0, np.inf)

    rho1 = p1 / p0 - 1  # rate of returns
    r1 = np.log((p1 + eps) / (p0 + eps))  # (eq10) log rate of return
    # (eq22) immediate reward is log rate of return scaled by episode length
    reward = r1 / self.steps

    # remember for next step
    self.w0 = w1
    self.p0 = p1

    # if we run out of money, we're done
    done = bool(p1 == 0)

    # should only return single values, not list
    info = {
        "reward": reward,
        "log_return": r1,
        "portfolio_value": p1,
        "market_return": y1.mean(),
        "rate_of_return": rho1,
        "weights_mean": w1.mean(),
        "weights_std": w1.std(),
        "cost": c1,
    }
    # record weights and prices
    for i, name in enumerate(['BTCBTC'] + self.asset_names):
        info['weight_' + name] = w1[i]
        info['price_' + name] = y1[i]

    self.infos.append(info)
    return reward, info, done
	class PortfolioSim(object):
	def _step(self, w1, y1):
	"""
	Step.
	w1 - new action of portfolio weights - e.g. [0.1,0.9, 0.0]
	y1 - price relative vector also called return
	e.g. [1.0, 0.9, 1.1]
	Numbered equations are from https://arxiv.org/abs/1706.10059
	"""
	w0 = self.w0
	p0 = self.p0

	dw1 = (y1 * w0) / (np.dot(y1, w0) + eps) # (eq7) weights evolve into

	# (eq16) cost to change portfolio
	# (excluding change in cash to avoid double counting for transaction cost)
	c1 = self.cost * (
	np.abs(dw1[1:] - w1[1:])).sum()

	p1 = p0 * (1 - c1) * np.dot(y1, w0) # (eq11) final portfolio value

	p1 = p1 * (1 - self.time_cost) # we can add a cost to holding

	# can't have negative holdings in this model (no shorts)
	p1 = np.clip(p1, 0, np.inf)

	rho1 = p1 / p0 - 1 # rate of returns
	r1 = np.log((p1 + eps) / (p0 + eps)) # (eq10) log rate of return
	# (eq22) immediate reward is log rate of return scaled by episode length
	reward = r1 / self.steps

	# remember for next step
	self.w0 = w1
	self.p0 = p1

	# if we run out of money, we're done
	done = bool(p1 == 0)

	# should only return single values, not list
	info = {
	"reward": reward,
	"log_return": r1,
	"portfolio_value": p1,
	"market_return": y1.mean(),
	"rate_of_return": rho1,
	"weights_mean": w1.mean(),
	"weights_std": w1.std(),
	"cost": c1,
	}
	# record weights and prices
	for i, name in enumerate(['BTCBTC'] + self.asset_names):
	info['weight_' + name] = w1[i]
	info['price_' + name] = y1[i]

	self.infos.append(info)
	return reward, info, done