Created
October 25, 2024 18:41
-
-
Save vwxyzjn/473b8bb6ddd65aad886559e769eac292 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import queue | |
import threading | |
import time | |
class Agent(): | |
def __init__(self): | |
self.param = 1 | |
def learn(self, data): | |
self.param += 1 | |
def query_generator_fn(): | |
for i in range(1, 100): | |
yield i | |
ITER = 7 | |
batch_size = 32 | |
agent = Agent() | |
data_Q = queue.Queue(maxsize=1) | |
param_and_query_Q = queue.Queue(maxsize=1) | |
def actor(): | |
for i in range(1, ITER + 1): | |
params, query = param_and_query_Q.get() | |
data = params | |
print(f"[actor] generating data π_{params} -> p_{query} D_π_{data}") | |
time.sleep(1) # simulate data generation | |
data_Q.put((query, data)) | |
actor_thread = threading.Thread(target=actor) | |
actor_thread.start() | |
# initial param put | |
generator = query_generator_fn() | |
next_queries = next(generator) | |
param_and_query_Q.put((agent.param, next_queries)) | |
# cleanba style stuff | |
async_mode = True | |
start_time = time.time() | |
for g in range(1, ITER + 1): | |
queries = next_queries | |
if async_mode: | |
if g != 1: | |
next_queries = next(generator) | |
param_and_query_Q.put((agent.param, queries)) | |
else: | |
if g != 1: | |
next_queries = next(generator) | |
param_and_query_Q.put((agent.param, next_queries)) # note the indent here is different | |
_, data = data_Q.get() | |
old_param = agent.param | |
agent.learn(data) | |
time.sleep(1) # simulate training | |
print(f"--[leaner] get π_{old_param} -> p_{queries} D_π_{data} -> π_{agent.param}, time: {time.time() - start_time}") | |
actor_thread.join() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This gist demonstrates the core async RL concept presented in https://arxiv.org/abs/2410.18252:
Assuming rollout and training both take 1 second, by running things in an async mode, we can parallelize the actor and learner's computation:
Versus if we run in sync mode: