Created
April 19, 2017 08:24
-
-
Save yujuwon/4725eac20267a2cadd1e3885cb42b2b9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from yahoo_finance import Share | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import tensorflow as tf | |
import random | |
%matplotlib inline | |
class DecisionPolicy: | |
def select_action(self, current_state, step): | |
pass | |
def update_q(self, state, action, reward, next_state): | |
pass | |
class QLearningDecisionPolicy(DecisionPolicy): | |
def __init__(self, actions, input_dim): | |
self.epsilon = 0.9 | |
self.gamma = 0.01 | |
self.actions = actions | |
output_dim = len(actions) | |
h1_dim = 200 | |
self.x = tf.placeholder(tf.float32, [None, input_dim]) | |
self.y = tf.placeholder(tf.float32, [output_dim]) | |
W1 = tf.Variable(tf.random_normal([input_dim, h1_dim])) | |
b1 = tf.Variable(tf.constant(0.1, shape=[h1_dim])) | |
h1 = tf.nn.relu(tf.matmul(self.x, W1) + b1) | |
W2 = tf.Variable(tf.random_normal([h1_dim, output_dim])) | |
b2 = tf.Variable(tf.constant(0.1, shape=[output_dim])) | |
self.q = tf.nn.relu(tf.matmul(h1,W2)+ b2) | |
loss = tf.square(self.y - self.q) | |
self.train_op = tf.train.AdagradOptimizer(0.01).minimize(loss) | |
self.sess = tf.Session() | |
self.sess.run(tf.global_variables_initializer()) | |
def select_action(self, current_state, step): | |
threshold = min(self.epsilon, step / 1000.) | |
if random.random() < threshold: | |
action_q_vals = self.sess.run(self.q, feed_dict={self.x: current_state}) | |
action_idx = np.argmax(action_q_vals) | |
action = self.actions[action_idx] | |
else: | |
action = self.actions[random.randint(0, len(self.actions) - 1)] | |
return action | |
def update_q(self, state, action, reward, next_state): | |
action_q_vals = self.sess.run(self.q, feed_dict={self.x: state}) | |
next_action_q_vals = self.sess.run(self.q, feed_dict={self.x: next_state}) | |
next_action_idx = np.argmax(next_action_q_vals) | |
action_q_vals[0, next_action_idx] = reward + self.gamma * next_action_q_vals[0, next_action_idx] | |
action_q_vals = np.squeeze(np.asarray(action_q_vals)) | |
self.sess.run(self.train_op, feed_dict={self.x: state, self.y: action_q_vals}) | |
def get_prices(share_symbol, start_date, end_date, cache_filename='stock_prices.npy'): | |
try: | |
stock_prices = np.load(cache_filename) | |
except IOError: | |
print("ERROR") | |
share = Share(share_symbol) | |
stock_hist = share.get_historical(start_date, end_date) | |
stock_prices = [stock_price['Open'] for stock_price in stock_hist] | |
np.save(cache_filename, stock_prices) | |
return stock_prices | |
def plot_prices(prices): | |
plt.title('Opening stock prices') | |
plt.xlabel('day') | |
plt.ylabel('price ($)') | |
plt.plot(prices) | |
plt.savefig('prices.png') | |
def run_simulation(policy, initial_budget, initial_num_stocks, prices, hist, debug=False): | |
budget = initial_budget | |
num_stocks = initial_num_stocks | |
share_value = 0 | |
transitions = list() | |
for i in range(len(prices) - hist - 1): | |
if i % 100 == 0: | |
print('progress {:.2f}%'.format(float(100*i) / (len(prices) - hist - 1))) | |
current_state = np.asmatrix(np.hstack((prices[i:i+hist], budget, num_stocks))) | |
current_portfolio = budget + num_stocks * share_value | |
action = policy.select_action(current_state, i) | |
share_value = float(prices[i + hist + 1]) | |
if action == 'Buy' and budget >= share_value: | |
budget -= share_value | |
num_stocks += 1 | |
elif action == 'Sell' and num_stocks > 0: | |
budget += share_value | |
num_stocks -= 1 | |
else: | |
action = 'Hold' | |
new_portfolio = budget + num_stocks * share_value | |
reward = new_portfolio - current_portfolio | |
next_state = np.asmatrix(np.hstack((prices[i+1:i+hist+1], budget, num_stocks))) | |
transitions.append((current_state, action, reward, next_state)) | |
policy.update_q(current_state, action, reward, next_state) | |
portfolio = budget + num_stocks * share_value | |
if debug: | |
print('${}\t{} shares'.format(budget, num_stocks)) | |
return portfolio | |
def run_simulations(policy, budget, num_stocks, prices, hist): | |
num_tries = 10 | |
final_portfolios = list() | |
for i in range(num_tries): | |
final_portfolio = run_simulation(policy, budget, num_stocks, prices, hist) | |
final_portfolios.append(final_portfolio) | |
avg, std = np.mean(final_portfolios), np.std(final_portfolios) | |
return avg, std | |
prices = get_prices('MSFT', '1992-07-22', '2016-07-22') | |
plot_prices(prices) | |
actions = ['Buy', 'Sell', 'Hold'] | |
hist = 200 | |
policy = QLearningDecisionPolicy(actions, hist + 2) | |
budget = 1000.0 | |
num_stocks = 0 | |
avg, std = run_simulations(policy, budget, num_stocks, prices, hist) | |
print(avg, std) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment