This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from gensim.models import FastText | |
model_ted = FastText(sentences_ted, size=100, window=5, min_count=5, workers=4,sg=1) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
model_ted.wv.most_similar(“man”) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from gensim.models import Word2Vec | |
model_ted = Word2Vec(sentences=sentences_ted, size=100, window=5, min_count=5, workers=4, sg=0) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# remove parenthesis | |
input_text_noparens = re.sub(r'\([^)]*\)', '', input_text) | |
# store as list of sentences | |
sentences_strings_ted = [] | |
for line in input_text_noparens.split('\n'): | |
m = re.match(r'^(?:(?P<precolon>[^:]{,20}):)?(?P<postcolon>.*)$', line) | |
sentences_strings_ted.extend(sent for sent in m.groupdict()['postcolon'].split('.') if sent) | |
# store as list of lists of words | |
sentences_ted = [] | |
for sent_str in sentences_strings_ted: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import os | |
from random import shuffle | |
import re | |
import urllib.request | |
import zipfile | |
import lxml.etree | |
#download the data | |
urllib.request.urlretrieve("https://wit3.fbk.eu/get.php?path=XML_releases/xml/ted_en-20160408.zip&filename=ted_en-20160408.zip", filename="ted_en-20160408.zip") | |
# extract subtitle |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Model(object): | |
def __init__(self, *, policy, ob_space, ac_space, nbatch_act, nbatch_train, | |
nsteps, ent_coef, vf_coef, max_grad_norm, training): | |
sess = tf.get_default_session() | |
if training: | |
act_model = policy(sess, ob_space, ac_space, nbatch_act, 1, reuse=False) | |
else: | |
act_model = policy(sess, ob_space, ac_space, nbatch_act, 1, reuse=True) | |
train_model = policy(sess, ob_space, ac_space, nbatch_train, nsteps, reuse=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class CnnPolicy(object): | |
def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=True): | |
ob_shape = (nbatch,) + ob_space.shape | |
actdim = ac_space.shape[0] | |
window_length = ob_space.shape[1] -1 | |
X = tf.placeholder(tf.float32, ob_shape, name='Ob') #obs | |
with tf.variable_scope("model", reuse=reuse) as scope: | |
w0 = tf.slice(X, [0,0,0,0],[-1,-1,1,1]) | |
x = tf.slice(X, [0,0,1,0],[-1,-1,-1,-1]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gym.spaces | |
import gym.wrappers | |
import numpy as np | |
def concat_states(state): | |
history = state["history"] | |
weights = state["weights"] | |
weight_insert_shape = (history.shape[0], 1, history.shape[2]) | |
if len(weights) - 1 == history.shape[0]: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class PortfolioEnv(gym.Env): | |
def _step(self, action): | |
""" | |
Step the env. | |
Actions should be portfolio [w0...] | |
- Where wn is a portfolio weight between 0 and 1. The first (w0) is cash_bias | |
- cn is the portfolio conversion weights see PortioSim._step for description | |
""" | |
logger.debug('action: %s', action) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class PortfolioSim(object): | |
def _step(self, w1, y1): | |
""" | |
Step. | |
w1 - new action of portfolio weights - e.g. [0.1,0.9, 0.0] | |
y1 - price relative vector also called return | |
e.g. [1.0, 0.9, 1.1] | |
Numbered equations are from https://arxiv.org/abs/1706.10059 | |
""" | |
w0 = self.w0 |