Skip to content

Instantly share code, notes, and snippets.

View NMZivkovic's full-sized avatar

Nikola Živković NMZivkovic

View GitHub Profile
def __init__(self, enviroment, optimizer):
# Initialize atributes
self._state_size = enviroment.observation_space.n
self._action_size = enviroment.action_space.n
self._optimizer = optimizer
self.expirience_replay = deque(maxlen=2000)
# Initialize discount and exploration rate
self.gamma = 0.6
class Agent:
def __init__(self, enviroment, optimizer):
# Initialize atributes
self._state_size = enviroment.observation_space.n
self._action_size = enviroment.action_space.n
self._optimizer = optimizer
self.expirience_replay = deque(maxlen=2000)
enviroment = gym.make("Taxi-v2").env
enviroment.render()
print('Number of states: {}'.format(enviroment.observation_space.n))
print('Number of actions: {}'.format(enviroment.action_space.n))
import numpy as np
import random
from IPython.display import clear_output
from collections import deque
import progressbar
import gym
from tensorflow.keras import Model, Sequential
from tensorflow.keras.layers import Dense, Embedding, Reshape
total_epochs = 0
total_penalties = 0
num_of_episodes = 100
for _ in range(num_of_episodes):
state = enviroment.reset()
epochs = 0
penalties = 0
reward = 0
num_of_episodes = 100000
for episode in range(0, num_of_episodes):
# Reset the enviroment
state = enviroment.reset()
# Initialize variables
reward = 0
terminated = False
alpha = 0.1
gamma = 0.6
epsilon = 0.1
q_table = np.zeros([enviroment.observation_space.n, enviroment.action_space.n])
enviroment = gym.make("Taxi-v2").env
enviroment.render()
print('Number of states: {}'.format(enviroment.observation_space.n))
print('Number of actions: {}'.format(enviroment.action_space.n))
import numpy as np
import random
from IPython.display import clear_output
import gym
corrMatt = data.corr()
mask = np.array(corrMatt)
mask[np.tril_indices_from(mask)] = False
fig,ax= plt.subplots()
fig.set_size_inches(20,10)
sb.heatmap(corrMatt, cmap="Greens", mask=mask,vmax=.8, square=True,annot=True)