-
-
Save ruvnet/b110bd43b66c107393e31fe475a14be9 to your computer and use it in GitHub Desktop.
Q* (Q-Star)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# - Q* (Q-Star) | |
# /\__/\ - q.py | |
# ( o.o ) - v0.0.1 | |
# >^< - by @rUv | |
# 01110010 01110101 01110110 | |
# This is a proof of concept implementation of the Q* (AGI) leak from OpenAi | |
# This Python code defines a sophisticated Q-learning agent for reinforcement learning. | |
# It includes dynamic exploration, learning from experiences, and checks for convergence. | |
# The agent's capabilities are refined iteratively to optimize its decision-making strategy in a given environment. | |
import numpy as np | |
import random | |
class SophisticatedQLearningAgent: | |
def __init__(self, states, actions, learning_rate=0.1, discount_factor=0.95, exploration_rate=1.0, min_exploration_rate=0.01, exploration_decay_rate=0.995, max_episodes=10000, max_steps_per_episode=200): | |
# Initialize the Q-learning agent with specified parameters. | |
self.states = states # Number of states in the environment | |
self.actions = actions # Number of possible actions | |
self.learning_rate = learning_rate # Rate at which the agent learns | |
self.discount_factor = discount_factor # Factor for discounting future rewards | |
self.exploration_rate = exploration_rate # Initial exploration rate | |
self.min_exploration_rate = min_exploration_rate # Minimum exploration rate | |
self.exploration_decay_rate = exploration_decay_rate # Rate of decay for exploration | |
self.max_episodes = max_episodes # Maximum number of training episodes | |
self.max_steps_per_episode = max_steps_per_episode # Maximum steps per episode | |
self.q_table = np.zeros((states, actions)) # Initialize Q-table with zeros | |
def choose_action(self, state): | |
# Choose an action based on the current state and exploration-exploitation strategy. | |
if random.uniform(0, 1) < self.exploration_rate: | |
action = random.randint(0, self.actions - 1) # Explore: choose a random action | |
else: | |
action = np.argmax(self.q_table[state, :]) # Exploit: choose the best known action | |
return action | |
def learn(self, state, action, reward, next_state): | |
# Update the Q-table based on the action taken and the resulting state. | |
predict = self.q_table[state, action] | |
target = reward + self.discount_factor * np.max(self.q_table[next_state, :]) | |
self.q_table[state, action] += self.learning_rate * (target - predict) | |
def update_exploration_rate(self): | |
# Decrease exploration rate over time. | |
self.exploration_rate = max(self.min_exploration_rate, self.exploration_rate * self.exploration_decay_rate) | |
def has_converged(self, threshold=0.005): | |
# Check if the Q-values have converged. | |
return np.all(np.abs(self.q_table - np.max(self.q_table, axis=1, keepdims=True)) < threshold) | |
def train(self): | |
# Train the agent over a series of episodes. | |
for episode in range(self.max_episodes): | |
state = random.randint(0, self.states - 1) | |
for step in range(self.max_steps_per_episode): | |
action = self.choose_action(state) | |
next_state = random.randint(0, self.states - 1) | |
reward = 1 if next_state == self.states - 1 else 0 | |
self.learn(state, action, reward, next_state) | |
state = next_state | |
self.update_exploration_rate() | |
if self.has_converged(): | |
return True, episode | |
return False, self.max_episodes | |
def resize_q_table(self, new_states, new_actions): | |
# Resize the Q-table if the number of states or actions increases. | |
if new_states > self.states or new_actions > self.actions: | |
new_q_table = np.zeros((new_states, new_actions)) | |
new_q_table[:self.states, :self.actions] = self.q_table | |
self.q_table = new_q_table | |
self.states = new_states | |
self.actions = new_actions | |
# Initialize and refine the sophisticated agent | |
sophisticated_agent = SophisticatedQLearningAgent(states=30, actions=4) | |
max_refinements = 5 | |
refinement_count = 0 | |
converged = False | |
# Refine the agent until it converges or reaches the maximum number of refinements | |
while not converged and refinement_count < max_refinements: | |
new_states = sophisticated_agent.states + 10 # Increasing the number of states | |
new_actions = sophisticated_agent.actions # Keeping the number of actions constant | |
sophisticated_agent.resize_q_table(new_states, new_actions) # Resize the Q-table | |
sophisticated_agent.max_episodes += 5000 # Increase the number of episodes | |
sophisticated_agent.max_steps_per_episode += 50 # Increase the steps per episode | |
converged, episodes = sophisticated_agent.train() # Train the agent | |
refinement_count += 1 | |
# Result of training after refinements | |
print(f"Converged: {converged}, Episodes: {episodes}, Refinements: {refinement_count}") | |
print(f"Q-Table:\n{sophisticated_agent.q_table}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment