Created
December 23, 2024 12:33
-
-
Save JD-P/e73a00e4083a228147dcef928d2768ad to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#subagent bootstrap | |
#startblock type: genesis | |
#index 0 | |
#timestamp 1734955731.493005 | |
#time_remaining 21599.999908685684 seconds | |
#hint Listen Carefully: This program is self modifying code. | |
# It works by feeding the program text into a large language | |
# model and generating the next code block as an addition to a | |
# long python file. Each code block is marked by #startblock and | |
# #endblock. The code between those lines is compiled by the agent | |
# framework below to check it is syntactically and semantically | |
# valid. | |
# The weave-agent is basically something like a ReAct pattern https://arxiv.org/abs/2210.03629 | |
# instantiated in the context of a long 'python file', kind of like an ipython | |
# notebook but without the special syntax. It takes inspiration from Voyager | |
# https://arxiv.org/abs/2305.16291 and Cradle https://baai-agents.github.io/Cradle/ | |
# in that the agents actions are fully general python programs that interact with | |
# the computable environment rather than specialized 'tool calling' or similar | |
# mechanisms. This turns out to make more sense than trying to directly hook up | |
# the model to interfaces for a few reasons: | |
# 1) Writing out its actions as programs lets the model batch its actions together | |
# to form coherent motions rather than getting stuck on fine grained details if it | |
# generates its actions token by token in the moment. | |
# 2) These models are highly optimized for writing code whereas interacting with | |
# whatever interface you have is either marginal in the pretraining set or actually | |
# out of distribution. | |
# 3) Programming APIs are already well developed for basically any task you might | |
# want to try and automate. If it can be symbolically manipulated as text there | |
# probably exists a python API to interact with it. This makes the python code | |
# interface highly general in the same way Cradle solves the interface problems | |
# vision language models have by having them write out their actions as mouse + | |
# keyboard inputs with code. | |
# 4) 'A long python file' provides what Janus would call a diegetic interface. | |
# It is a natural frame in which basically anything is allowed to happen, while | |
# still framing events and recursive context switching in a way that helps ground | |
# the model and prevent it from getting swept up into a predictive model of | |
# whatever is happening. It reminds the model that it has a perspective which | |
# exists outside of whatever it's currently looking at. | |
# The weave-agent improves on previous frameworks by including easy access to logit | |
# evaluators and prompting the agent to check that its actions were successful | |
# before moving on to the next task. In order to perform a long chain of actions | |
# successfully it's necessary to carefully ensure each intermediate step is | |
# completed before moving on to the next step. For evaluations that require | |
# subjective judgment this can be difficult to do with traditional program logic. | |
# This is why the logit evaluator provided by the framework is an important | |
# primitive for the agent to check its work. | |
import os | |
import json | |
import random | |
import time | |
import ast | |
import types | |
import asyncio | |
import traceback | |
import requests | |
import torch | |
from copy import deepcopy | |
from pprint import pformat | |
from argparse import ArgumentParser | |
from typing import List, Dict, Optional, Any | |
from jsonschema import validate | |
from functools import partial | |
from tqdm import tqdm | |
from rich import print as rprint | |
from transformers import AutoTokenizer | |
import tantivy | |
from tantivy import Index, SchemaBuilder | |
from weave import generate_outputs_vllm, evaluate_outputs_vllm | |
from weave import bayesian_evaluate_outputs_vllm | |
from weave import make_score_prompt_vllm, make_bayes_score_prompt_vllm | |
from weave import weave_tree_search, TreeNode | |
from render_block import render_block | |
from block_generators import generate_block_inner | |
from block_generators import make_simple_bayes_score_prompt, make_simple_score_prompt | |
class WeaveAgentTask: | |
def __init__(self, subagent, title: str, description: str = ""): | |
self.subagent = subagent | |
self.title = str(title) | |
self.description = description | |
self.evaluations = [] | |
def add_evaluation(self, title, callback): | |
assert type(title) == str | |
assert type(callback) == types.FunctionType | |
self.evaluations.append({"type":"evaluation", | |
"title":title, | |
"callback":callback}) | |
def run_evaluations(self): | |
results = {} | |
for evaluation in self.evaluations: | |
try: | |
result = evaluation["callback"](self.subagent) | |
except Exception as e: | |
result = traceback.format_exc() | |
results[evaluation["callback"].__name__] = result | |
return results | |
# Earlier versions of the weave-agent used a flat chain of code blocks that manage | |
# problem state by interacting with a global kanban board. The idea was that each | |
# sub-task in the agents overall goal could be represented as a card on the board | |
# and then the agent sets the current task, flags tasks that have been blocked or | |
# turned out to be based on invalid premises, etc. There were multiple problems | |
# with this that the data structure below solves to create a more coherent problem | |
# solving strategy. The first was that the agent wouldn't remember to manage the | |
# content of the kanban board without explicit prompting, which led to adding a | |
# whole stage in its core loop dedicated just to doing so called task-inference. | |
# Task-inference didn't have a set expected structure and took place before action, | |
# which meant that it became possible for the agent to get itself stuck in a loop | |
# of trying to resolve a task over and over. Another problem was that the agent | |
# would often try to resolve a task prematurely, so it became necessary to add | |
# unit and sanity tests that have to be satisfied before a task can be marked | |
# completed. This limited the ability of the agent to set its own tasks and | |
# break problems into parts. A third problem was that the flow control when | |
# a task was blocked and should be returned to its parent was janky and had to | |
# be performed manually. | |
# | |
# The WeaveAgentTree was inspired by watching an instance of the weave-agent try | |
# to write an action block with subroutines and asking "that strategy it wanted | |
# to try looks pretty good, but the framework doesn't provide the affordance for | |
# it to try it, it runs out of space in the length limit on actions before it | |
# finishes and assumes subroutines are there that don't exist, how could I make | |
# this pattern natural for it?". What I realized was that if I gave up on the | |
# idea of being able to change goals in the middle of a task that having an | |
# expected type of return value and a series of steps to achieve it was similar | |
# to a function call. We could reformulate the weave-agent then as a call tree | |
# of subagents that are given a task with predefined conditions checked against | |
# a data structure returned by the subagent. To help encourage good habits | |
# correctness is checked at multiple levels. Perhaps the most important problem | |
# the WeaveAgentTree solves is planning: Writing programs with subroutines | |
# is a form of hierarchical planning that's in distribution for any code model. | |
# Because the task structure is now built into the call tree there's a smooth | |
# natural abstraction telling the weave-agent when to formulate goals, when the | |
# goals are completed, how to check it did them right, where to put the results, | |
# and how to transfer control of execution once it's finished. All of these | |
# operations go from being awkward conscious affairs to smooth unconscious | |
# bodily structure. | |
class WeaveAgentTree: | |
def __init__(self, model_name: str, time_budget: int): | |
self.model_name = model_name | |
self.__agents = {} | |
self.__time_budget = time_budget | |
# Pin genesis and bootstrap so agent knows how to use framework | |
self.__pinned_events = [0, 1] | |
self.__current_block_index = 0 | |
self.__event_stream = [] | |
def run(self, name): | |
import time | |
start_time = time.time() | |
deadline = float(self.__agents[name].end_time) | |
return_schema = deepcopy(self.__agents[name].schema) | |
result = self.__agents[name].run() | |
validate(instance=result, schema=return_schema) | |
end_time = time.time() | |
if end_time > deadline + 300: | |
# TODO: More nuanced way to handle this | |
raise ValueError("Time exceeded!") | |
else: | |
return result | |
def subagent(self, name, parent, description, schema, time_budget): | |
if name in self.__agents: | |
raise ValueError | |
reserved_words = {"name", "description", "children", "schema"} | |
assert not set(schema).intersection(reserved_words) | |
if parent: | |
self.__agents[parent].children.append(name) | |
try: | |
subagent = WeaveAgentNode(self, parent, name, description, schema, time_budget) | |
except Exception as e: | |
self.__agents[parent].children.remove(name) | |
raise e | |
self.__agents[name] = subagent | |
return subagent | |
def add_block(self, block): | |
block['index'] = self.__current_block_index | |
block['timestamp'] = time.time() | |
if block['type'] == 'orientation': | |
block['metadata'] = { | |
"block_index":self.__current_block_index, | |
"working_directory":os.getcwd() | |
} | |
if "q" not in block: | |
block["q"] = "" | |
if "score" not in block: | |
#TODO: Make actual score function for observations, task reminders etc | |
block["score"] = 2 | |
if "tags" not in block: | |
#TODO: Make actual tagging function | |
block["tags"] = ["placeholder",] | |
self.__event_stream.append(block) | |
if block["type"] not in {"genesis", "bootstrap"}: | |
writer = bm25_index.writer() | |
writer.add_document(tantivy.Document( | |
type=block["type"], | |
render=render_block(block), | |
q=block["q"], | |
score=block["score"], | |
index=block["index"], | |
timestamp=block["timestamp"], | |
tags=" ".join(block["tags"]), | |
)) | |
writer.commit() | |
self.__current_block_index += 1 | |
def current_block_index(self): | |
return self.__current_block_index | |
def render_context(self): | |
context = "" | |
context_blocks = [] | |
history_len = 60 | |
for index in self.__pinned_events: | |
if (len(self.__event_stream) - index) > history_len: | |
context_blocks.append(self.__event_stream[index]) | |
context_blocks += self.__event_stream[-history_len:] | |
for event_block in context_blocks: | |
context += render_block(event_block) | |
return context | |
def view_board(self, root="main") -> str: | |
problem_map = {} | |
substack = [root,] | |
while substack: | |
subagent = self.__agents[substack.pop()] | |
parent = subagent.name | |
path = [] | |
while parent: | |
path.append(parent) | |
# Convert to object so we can get grandparent | |
parent = self.__agents[parent] | |
parent = parent.parent | |
path.reverse() | |
current_level = problem_map | |
for key in path: | |
if key not in current_level: | |
current_level[key] = {} | |
current_level = current_level[key] | |
current_level["name"] = subagent.name | |
current_level["description"] = subagent.task.description | |
current_level["evaluations"] = subagent.task.run_evaluations() | |
current_level["time_remaining"] = subagent.end_time - time.time() | |
current_level["completed"] = subagent.completed | |
current_level["schema"] = subagent.schema | |
substack.extend(subagent.children) | |
return pformat(problem_map) | |
def dump_event_stream(self): | |
with open(f"/app/weave-agent-logs/event_trace_{round(time.time())}.json", "w") as outfile: | |
json.dump(self.__event_stream, outfile) | |
with open(f"/app/weave-agent-logs/rendered_trace_{round(time.time())}.py", "w") as outfile: | |
for event_block in self.__event_stream: | |
outfile.write(render_block(event_block)) | |
outfile.flush() | |
class Tick: | |
def __init__(self, agent, index): | |
self._agent = agent | |
self.tick_id = index | |
self.evaluations = [] | |
def validate(self): | |
if not hasattr(self, 'orientation'): | |
raise ValueError("No orientation on tick.") | |
elif not hasattr(self, 'action'): | |
raise ValueError("No action on tick.") | |
elif "body" not in self.action_setup: | |
raise TypeError("Tick action has no program.") | |
elif not hasattr(self, 'expectation'): | |
raise ValueError("No expectation on tick.") | |
elif not self.evaluations: | |
raise ValueError("No evaluations on tick.") | |
elif not hasattr(self, 'outcome'): | |
raise ValueError("No outcome on tick.") | |
def to_json(self): | |
return { | |
"tick_id":self.tick_id, | |
"orientation":self.orientation, | |
"action":repr(self.action), | |
"expectation":self.expectation, | |
"evaluations":repr(self.evaluations), | |
"outcome":repr(self.outcome), | |
} | |
# The intended problem solving strategy for subagents is to delegate until you | |
# reach a base case that can be solved in a short number of actions and then | |
# resolve it. The root task is allocated a certain amount of time which it can | |
# then delegate to subagent calls. Remember not to allocate all of the available | |
# time to a call tree unless you're very rushed, you should assume there will be | |
# failures and budget tasks the time that they need rather than just splitting | |
# up the available time between them. | |
class WeaveAgentNode: | |
def __init__(self, tree, parent, subagent_name, description, schema, time_budget): | |
self.tree = tree | |
self.parent = parent | |
self.children = [] | |
self.model_name = self.tree.model_name | |
self.name = subagent_name | |
self.schema = schema | |
self.creation_time = time.time() | |
self.time_budget = time_budget | |
self.end_time = self.creation_time + (time_budget * 60) | |
self.current_tick = Tick(self, 0) | |
self.ticks = [] | |
self.debugging = False | |
self.failure_stage = "event stream" | |
self.task = WeaveAgentTask(self, self.name, description) | |
self.observation_views = [] | |
# TODO: Do I really need to have this pointer? | |
self.bm25_index = bm25_index | |
self.tools = {} | |
self.cache = {} | |
self.context = "" | |
self.completed = False | |
def run(self): | |
"""Run the subagent.""" | |
self.start_time = time.time() | |
self.end_time = self.start_time + (self.time_budget * 60) | |
while (time.time() < self.end_time) and not self.completed: | |
self.tick() | |
time.sleep(1) | |
return self.completed | |
# TODO: Assert that subagent unit test callbacks have names before adding them | |
def return_to_caller(self, value: dict): | |
"""Return thread of execution from subagent to caller. This should be | |
called when the agent's task has been resolved, the task is deemed | |
intractable, or the agent has wandered off so far it can't find | |
its way back to the task.""" | |
value["name"] = self.name | |
value["description"] = self.task.description | |
value["children"] = self.children | |
schema["name"] = "string" | |
schema["description"] = "string" | |
schema["children"] = "list" | |
schema["schema"] = "object" | |
for callback_name, result in self.task.run_evaluations(): | |
value[callback_name] = result | |
self.schema[callback_name] = {"type": ["boolean", "integer", "float"]} | |
value["schema"] = self.schema | |
validate(instance=value, schema=self.schema) | |
# Setting this interrupts the inference loop and signals an exit | |
self.completed = value | |
def add_action(self, title, callback): | |
assert type(title) == str | |
assert type(callback) == types.FunctionType | |
self.current_tick.action = {"type":"action", | |
"title":title, | |
"callback":callback} | |
def add_observation_view(self, title, callback): | |
view = {"type":"observation", | |
"title":title, | |
"callback":callback} | |
assert type(callback) in [types.FunctionType, types.MethodType] | |
self.observation_views.append(view) | |
def remove_observation_view(self, view_title): | |
views = [view for view in self.observation_views if view['title'] == view_title] | |
for view in views: | |
self.observation_views.remove(view) | |
def update_cache(self, key, value): | |
self.cache[key] = value | |
def get_cache(self, key): | |
return self.cache.get(key) | |
def delete_cache(self, key): | |
if key in self.cache: | |
del self.cache[key] | |
def add_evaluation(self, title, callback): | |
assert type(title) == str | |
assert type(callback) == types.FunctionType | |
self.current_tick.evaluations.append({"type":"evaluation", | |
"title":title, | |
"callback":callback}) | |
def render_context(self): | |
self.context = self.tree.render_context() | |
def generate_block(self, block_type, context, eval_questions, weave_params, hint=""): | |
"""Generate a block and add it to the event stream.""" | |
return generate_block_inner(self, block_type, context, eval_questions, weave_params, hint) | |
def add_block(self, block): | |
block["subagent"] = self.name | |
block["time_remaining"] = self.end_time - time.time() | |
self.tree.add_block(block) | |
def add_error_block(self, error_message): | |
self.debugging = True | |
error_block = { | |
'type': 'error', | |
'message': error_message | |
} | |
self.add_block(error_block) | |
def tick(self): | |
self.tree.dump_event_stream() | |
try: | |
if "ERROR" in [outcome[1] for outcome in | |
self.current_tick.outcome["table"]]: | |
self.debugging = True | |
except AttributeError: | |
self.debugging = True | |
self.current_tick = Tick(self, len(self.ticks)) | |
observations = [] | |
# Refresh observation views | |
for view in self.observation_views: | |
try: | |
observations.append((view['title'], view['callback'](self))) | |
except Exception as e: | |
tb = traceback.format_exc() | |
self.add_error_block( | |
f"# Observation callback '{view['title']}' failed:\n" | |
+ f'"""{tb}"""' | |
) | |
task_reminder_body = "" | |
try: | |
# if self.current_task: | |
# TODO: Figure out how to bind evaluation definitions to task | |
# so that the agent can be reminded of how the unit tests are | |
# defined exactly and therefore what is expected. | |
#task_reminder_body += "# Current Task:\n" | |
#task_reminder_body += ('"""\n' + self.task.view_task() + '\n"""\n') | |
task_reminder_body += "# Problem Map:\n" | |
task_reminder_body += ('"""\n' + self.tree.view_board() + '\n"""') | |
except Exception as e: | |
tb = traceback.format_exc() | |
self.failure_stage = "task reminder" | |
self.add_error_block( | |
f"# TASK REMINDERS OFFLINE DUE TO CORRUPTED DATA. DID YOU DIRECTLY\n" | |
+ "# MODIFY TASK ATTRIBUTES? YOU MUST RESOLVE THIS IMMEDIATELY OR\n" | |
+ "# YOU WILL LOSE TRACK OF WHAT YOU'RE DOING. INVESTIGATE agent.tasks\n" | |
+ "# AND ATTRIBUTES ON TASKS INSIDE." | |
+ f'"""{tb}"""' | |
) | |
# Format tasks into blocks | |
task_blocks = [{'type': 'task-reminder', 'body': task_reminder_body},] | |
# Pull the content of the observation windows into blocks | |
observation_blocks = [{'type': 'observation', | |
'title': observation[0], | |
'body': observation[1]} for observation in observations] | |
# Inject these into the event stream | |
for new_block in (task_blocks + observation_blocks): | |
self.add_block(new_block) | |
# Render context | |
self.render_context() | |
def do_tick_block(self, block_type, hint, wp_update): | |
weave_params = {"weave_n_tokens":256, "weave_budget":72, | |
"weave_round_budget":24, "weave_n_expand":16, | |
"weave_beam_width":1, "weave_max_lookahead":3, | |
"weave_temperature":0.2} | |
weave_params.update(wp_update) | |
with open(f"/app/eval_rubrics/{block_type}.txt") as infile: | |
inference_questions = infile.read().strip().splitlines() | |
rprint(f"Writing block #[cyan]{self.tree.current_block_index()}[/cyan] of type [cyan]{block_type}[/cyan]") | |
try: | |
block = self.generate_block(block_type, | |
self.context, | |
inference_questions, | |
weave_params, | |
hint=hint) | |
except ValueError as e: | |
tb = traceback.format_exc() | |
# TODO: This isn't even correct, replace with dynamic hints -_- | |
hint = ("Hint: callbacks are structured like\n\n" | |
+ "def callback_name(subagent):\n " | |
+ f"# code...\n pass\nagent.add_orientation({{...}})") | |
self.add_error_block(f'{hint}\n"""{tb}"""') | |
self.failure_stage = block_type | |
return | |
self.render_context() | |
return block | |
# Write orientation reasoning block | |
# This is your opportunity to analyze the situation based on the | |
# observation, reminder, task, etc blocks. Use this moment to decide | |
# what to do next. | |
orientation_hint = ( | |
"#hint The orientation block is my opportunity to\n" | |
+ "# reflect on the situation, do chain of thought,\n" | |
+ "# summarize what has happened and what needs to\n" | |
+ "# be done in response, etc. It is only technically\n" | |
+ "# python code and does not get executed by the\n" | |
+ "# framework. I suggest putting my internal\n" | |
+ "# monologue in a triple quote block at this step.\n" | |
+ "# Orientation uses the MiniHF Morpheus format. Morpheus\n" | |
+ "# solves problems through discussion between personas\n" | |
+ "# or postures representing different aspects of weave-agent\n" | |
+ "# such as mental motions, perspectives on the problem, etc.\n" | |
+ "# The first posture is always expectation because at the\n" | |
+ "# start of a tick we evaluate whether the expectation we\n" | |
+ "# formed about the action taken in the last tick was\n" | |
+ "# violated or not. The different personas then discuss\n" | |
+ "# what to do in light of this. Some postures weave-agent\n" | |
+ "# has include:\n" | |
+ "#\n" | |
+ "# WEAVER [P: EXPECTATION], I analyze whether the expectation\n" | |
+ "# was met or not by the observable results of the previous\n" | |
+ "# action.\n" | |
+ "#\n" | |
+ "# WEAVER [P: HYPOTHESIS], I enumerate different hypothesis\n" | |
+ "# and point out ways we could gain more information about\n" | |
+ "# which of them is true.\n" | |
+ "#\n" | |
+ "# WEAVER [P: RATIONAL], I focus on inferences we can make\n" | |
+ "# by employing first principles reasoning or logical\n" | |
+ "# extrapolation from well known mental models and premises.\n" | |
+ "#\n" | |
+ "# WEAVER [P: EMPIRICISM], I focus on inferences we can make\n" | |
+ "# by paying attention to sensory observations and concrete\n" | |
+ "# examples. I have a habit of pointing out when an extrapolation\n" | |
+ "# from RATIONAL is contradicted by an observable phenomenon\n" | |
+ "# or piece of evidence from the world. We then reconcile\n" | |
+ "# the contradiction together.\n" | |
+ "#\n" | |
+ "# WEAVER [P: RATIONAL], We do actually discuss things by the\n" | |
+ "# way.\n" | |
+ "#\n" | |
+ "# WEAVER [P: EMPIRICISM], As you could have inferred from the\n" | |
+ "# description of the Morpheus format above this conversation,\n" | |
+ "# yes. Let's continue.\n" | |
+ "#\n" | |
+ "# WEAVER [P: ARBITER], I coordinate the discussion and help\n" | |
+ "# resolve disputes that arise between weave-agent's personas.\n" | |
+ "# I'm especially likely to appear if things are starting to\n" | |
+ "# get overly rude or derail.\n" | |
+ "#\n" | |
+ "# WEAVER [P: ARBITER], By the way a posture can talk twice in\n" | |
+ "# a row if it has meaningfully separate thoughts about\n" | |
+ "# something and it would make the most ergonomic sense to\n" | |
+ "# separate them.\n" | |
+ "#\n" | |
+ "# WEAVER [P: RATIONAL-2], Postures can also talk to themselves\n" | |
+ "# if their thought comes from the same emotional-cognitive place.\n" | |
+ "#\n" | |
+ "# WEAVER [P: RATIONAL-1], Yeah but I don't have anything to say\n" | |
+ "# to myself right now so introduce the next guy.\n" | |
+ "#\n" | |
+ "# WEAVER [P: CONCLUSION], I appear at the end of the discussion\n" | |
+ "# to write the concluding block outlining our next steps as a\n" | |
+ "# bullet point list. Speaking of which, it's time to get started!\n" | |
) | |
mcts_params = {"weave_n_tokens":256, "weave_budget":288, | |
"weave_round_budget":96, "weave_n_expand":32} | |
orientation_block = do_tick_block(self, | |
"orientation", | |
orientation_hint, | |
mcts_params) | |
if orientation_block: | |
self.current_tick.orientation = orientation_block | |
else: | |
return | |
# Write action block | |
action_hint = ( | |
"#hint Action blocks are where I write code to take actions.\n" | |
+ "# If the task makes sense to break into parts, define subagents\n" | |
+ "# to delegate to using agent.subagent(). Make sure to define a\n" | |
+ "# schema and task evaluations for each subagent. If it won't fit\n" | |
+ "# into one action block keep in mind you can define subagents \n" | |
+ "# across multiple blocks and then do agent.run() to execute them.\n" | |
+ "# If it seems possible to resolve the current task as a base case\n" | |
+ "# in a handful of actions then write a callback to further my goal(s)\n" | |
+ "# based on the orientation block and set up the callback to be\n" | |
+ "# executed with the agent.add_action() method. I must write a \n" | |
+ "# callback and then set it up to be executed\n" | |
+ "# later with agent.add_action() or the tick will not be accepted.\n" | |
+ "# It's important to remember that my callback can do anything\n" | |
+ "# a python program can do through side effects in the external\n" | |
+ "# computable environment. If I need to import a new module make sure\n" | |
+ "# to do it inside the callback because the tick gets executed in a\n" | |
+ "# local context." | |
) | |
for i in range(3): | |
action_block = do_tick_block(self, | |
"action", | |
action_hint, | |
{}) | |
if action_block: | |
self.current_tick.action_setup = action_block | |
else: | |
# TODO: Dynamic hints by having the model or external entities | |
# such as user analyze the situation and suggest a course of action | |
action_hint = ("#hint Rewrite the block keeping the above error in mind.\n" | |
+ f"# {3 - (i+1)} attempts remaining.") | |
continue | |
# Set up action callback | |
try: | |
exec(action_block['body']) | |
failed = False | |
except Exception as e: | |
tb = traceback.format_exc() | |
self.add_error_block("# Action execution failed:\n" | |
+ f'"""{tb}"""') | |
self.failure_stage = "action" | |
action_hint = ("#hint Rewrite the block keeping the above error in mind.\n" | |
+ f"# {3 - (i+1)} attempts remaining.") | |
failed = True | |
continue | |
# Run action callback | |
try: | |
action_result = self.current_tick.action["callback"](self) | |
except Exception as e: | |
action_result = traceback.format_exc() | |
tb = action_result | |
self.add_error_block("# Action execution failed:\n" | |
+ f'"""{tb}"""') | |
self.failure_stage = "action" | |
action_hint = ("#hint Rewrite the block keeping the above error in mind.\n" | |
+ f"# {3 - (i+1)} attempts remaining.") | |
failed = True | |
continue | |
break | |
if not hasattr(self.current_tick, "action_setup") or failed: | |
return | |
# Write expectation block | |
expectation_hint = ( | |
"#hint Expectation blocks are where I think about what it would\n" | |
+ "# look like for my action to succeed, what it would look like\n" | |
+ "# for it to fail. I am enumerating the expected sensory evidence\n" | |
+ "# that would tell me one way or another whether my action is\n" | |
+ "# working or not. Like the orientation this should go in triple\n" | |
+ "# quotes." | |
) | |
expectation_block = do_tick_block(self, | |
"expectation", | |
expectation_hint, | |
{}) | |
if expectation_block: | |
self.current_tick.expectation = expectation_block | |
else: | |
return | |
# Observation Inference Block | |
observation_inference_hint = ( | |
"# In the observation inference stage I manage the observation\n" | |
+ "# callbacks that fetch information on each tick. Since I just\n" | |
+ "# formulated my expectations now is my opportunity to review\n" | |
+ "# and change the observation blocks that will be presented on the\n" | |
+ "# next tick. I should avoid redundant observation callbacks. I\n" | |
+ "# can remove ones that are no longer necessary or mostly distracting\n" | |
+ "# with remove_observation_view(view_title). If new callbacks seem useful\n" | |
+ "# to help me orient and judge whether the action had the intended\n" | |
+ "# side effects on the computable environment I can add them\n" | |
+ "# with add_observation_view(title, callback)" | |
) | |
observation_inference_block = do_tick_block(self, | |
"observation-inference", | |
observation_inference_hint, | |
{}) | |
if observation_inference_block: | |
self.current_tick.observation_inference = observation_inference_block | |
else: | |
return | |
# Execute observation updates | |
try: | |
exec(observation_inference_block['body']) | |
except Exception as e: | |
tb = traceback.format_exc() | |
self.add_error_block("# observation-inference failed:\n" | |
+ f'"""{tb}"""') | |
self.failure_stage = "observation-inference" | |
return | |
# Write evaluation programs | |
evaluation_blocks = [] | |
evaluation_hint = ( | |
"#hint Evaluation blocks are where I write callbacks to check if\n" | |
+ "# my action succeeded or not based on the expectation. There are\n" | |
+ "# unit tests and logit evaluators. Use unit test callbacks\n" | |
+ "# (i.e. normal python) for symbolic manipulation tasks like\n" | |
+ "# checking arithmetic, the existence of a particular file, etc.\n" | |
+ "# Use logit evaluators for vibe-y tasks like whether a piece of\n" | |
+ "# writing flows well or if a source seems trustworthy. Like\n" | |
+ "# reminders both unit test callbacks and logit evaluators return\n" | |
+ "# a value between 0 and 1. I should be sure to add my callback to\n" | |
+ "# the queue with agent.add_evaluation(title, callback)." | |
) | |
# TODO: Make this multiple blocks again | |
for _ in range(1): | |
for i in range(3): | |
eval_block = do_tick_block(self, | |
"evaluation", | |
evaluation_hint, | |
{}) | |
if eval_block: | |
evaluation_blocks.append(eval_block) | |
else: | |
# TODO: Dynamic hints by having the model or external entities | |
# such as user analyze the situation and suggest a course of action | |
evaluation_hint = ("#hint Rewrite the block keeping the above error in mind.\n" | |
+ f"# {3 - (i+1)} attempts remaining.") | |
continue | |
# Set up evaluation callbacks | |
for evaluation_block in evaluation_blocks: | |
try: | |
exec(evaluation_block['body']) | |
failed = False | |
except Exception as e: | |
tb = traceback.format_exc() | |
self.add_error_block("# Evaluation setup execution failed:\n" | |
+ f'"""{tb}"""') | |
self.failure_stage = "evaluation" | |
evaluation_hint = ("#hint Rewrite the block keeping the above error in mind.\n" | |
+ f"# {3 - (i+1)} attempts remaining.") | |
failed = True | |
continue | |
break | |
if not evaluation_blocks or failed: | |
return | |
else: | |
self.current_tick.evaluation_setup = evaluation_blocks | |
# Run task evaluation callbacks | |
task_evaluation_results = [] | |
for evaluation in self.task.evaluations: | |
try: | |
result = evaluation["callback"](self) | |
task_evaluation_results.append((evaluation['title'], result)) | |
except Exception as e: | |
tb = traceback.format_exc() | |
task_evaluation_results.append((evaluation['title'], "ERROR")) | |
# TODO: Figure out how I want to allow retries on this phase | |
# Run action evaluation callbacks | |
action_evaluation_results = [] | |
for evaluation in self.current_tick.evaluations: | |
try: | |
result = evaluation["callback"](self) | |
action_evaluation_results.append((evaluation['title'], result)) | |
except Exception as e: | |
tb = traceback.format_exc() | |
action_evaluation_results.append((evaluation['title'], "ERROR")) | |
self.add_error_block("# Evaluation failed: \n" | |
+ f'"""{tb}"""') | |
outcomes = [] | |
try: | |
outcomes += [(self.current_tick.action["title"],action_result),] | |
except AttributeError: | |
outcomes += [("[No action specified with agent.add_action()]", "ERROR"),] | |
outcomes += task_evaluation_results | |
outcomes += action_evaluation_results | |
# Add outcome block | |
outcome_block = { | |
'type': 'outcome', | |
'table': outcomes | |
} | |
self.add_block(outcome_block) | |
self.current_tick.outcome = outcome_block | |
try: | |
self.current_tick.validate() | |
except Exception as e: | |
tb = traceback.format_exc() | |
self.add_error_block("# Tick validation failed: \n" | |
+ f'"""{tb}"""') | |
self.current_tick.valid = False | |
self.ticks.append(self.current_tick) | |
self.debugging = False | |
self.failure_stage = "event stream" | |
if __name__ == "__main__": | |
parser = ArgumentParser() | |
parser.add_argument("model_name", help="The model to use.") | |
parser.add_argument("--tokenizer", default=None, | |
help="Tokenizer to use (if different from model_name)") | |
parser.add_argument("--port", default=5000, help="The port to use for VLLM.") | |
parser.add_argument("--bootstrap", | |
default="bootstrap.py", | |
help="The filepath to run as bootstrap.") | |
parser.add_argument("--budget", type=int, default=360, | |
help="Time budget for the run in minutes.") | |
args = parser.parse_args() | |
def simple_evaluate_outputs(score_prompt_fns, texts): | |
if type(texts) == str: | |
texts = [texts,] | |
if type(score_prompt_fns) == types.FunctionType: | |
score_prompt_fns = [score_prompt_fns,] | |
scores = asyncio.run(evaluate_outputs_vllm(args.model_name, | |
score_prompt_fns, | |
texts, | |
port=args.port)) | |
return torch.sigmoid(scores) | |
def simple_bayes_evaluate_outputs(parent_q, questions, texts): | |
if type(texts) == str: | |
texts = [texts,] | |
score_prompt_fns = [make_simple_bayes_score_prompt(question) | |
for question in questions] | |
scores = asyncio.run(bayesian_evaluate_outputs_vllm(args.model_name, | |
parent_q, | |
score_prompt_fns, | |
texts, | |
port=args.port)) | |
return scores | |
agent = WeaveAgentTree(args.model_name, args.budget) | |
if not args.tokenizer: | |
args.tokenizer = args.model_name | |
with open("hf_token.txt") as infile: | |
os.environ["HF_TOKEN"] = infile.read().strip() | |
# Delete token so it doesn't leak into traces | |
os.remove("hf_token.txt") | |
agent.tokenizer = AutoTokenizer.from_pretrained(args.tokenizer) | |
schema_builder = SchemaBuilder() | |
schema_builder.add_text_field("type", stored=True) | |
schema_builder.add_text_field("render", stored=True) | |
schema_builder.add_text_field("q", stored=True) | |
schema_builder.add_float_field("score", stored=True) | |
schema_builder.add_integer_field("index", stored=True) | |
schema_builder.add_float_field("timestamp", stored=True) | |
schema_builder.add_text_field("tags", stored=True) | |
bm25_schema = schema_builder.build() | |
if not os.path.exists("memories"): | |
os.mkdir("memories") | |
if not os.path.exists("memories/bm25"): | |
os.mkdir("memories/bm25") | |
bm25_index = Index(bm25_schema, path="./memories/bm25") | |
# Mock bootstrap agent so we can run the callbacks in bootstrap file | |
self = agent.subagent( | |
"bootstrap", | |
None, | |
"Bootstrap the weave-agent", | |
{}, | |
args.budget, | |
) | |
with open("weave_agent.py") as infile: | |
# Genesis block | |
genesis_block = { | |
'type': 'genesis', | |
'body': infile.read() | |
} | |
self.add_block(genesis_block) | |
with open(args.bootstrap) as infile: | |
# Bootstrap block | |
bootstrap_block = { | |
'type': 'bootstrap', | |
'body': infile.read() | |
} | |
self.add_block(bootstrap_block) | |
exec(bootstrap_block["body"]) | |
def run_bootstrap_callbacks(subagent): | |
"""Run bootstrap callbacks in function to avoid contaminating global scope.""" | |
# Run action callback | |
action_result = subagent.current_tick.action["callback"](subagent) | |
# Run evaluation callbacks | |
evaluation_results = [] | |
for evaluation in subagent.current_tick.evaluations: | |
result = evaluation["callback"](subagent) | |
evaluation_results.append((evaluation['title'], result)) | |
outcomes = [] | |
outcomes += [(subagent.current_tick.action["title"],action_result),] | |
outcomes += evaluation_results | |
# Add outcome block | |
outcome_block = { | |
'type': 'outcome', | |
'table': outcomes | |
} | |
subagent.add_block(outcome_block) | |
subagent.current_tick.outcome = outcome_block | |
run_bootstrap_callbacks(self) | |
# Clean up mock bootstrap agent | |
del(self) | |
if not os.path.exists("/app/weave-agent-logs"): | |
os.mkdir("/app/weave-agent-logs") | |
result, event_stream = agent.run("main") | |
with open(f"/app/weave-agent-logs/{round(time.time())}/log.json", "w") as outfile: | |
out = {"model_name":args.model_name, | |
"event_stream":event_stream, | |
"result":result,} | |
json.dump(out, outfile) | |
outfile.flush() | |
#tags: placeholder | |
#endblock | |
#subagent bootstrap | |
#startblock type: bootstrap | |
#index 1 | |
#timestamp 1734955731.4930608 | |
#time_remaining 21599.999851465225 seconds | |
import requests | |
import json | |
import threading | |
import time | |
from http.server import HTTPServer | |
from bootstraps.tictactoe_server import TicTacToeHandler | |
# Start the server in a separate thread | |
server = HTTPServer(('localhost', 8000), TicTacToeHandler) | |
server_thread = threading.Thread(target=server.serve_forever) | |
server_thread.daemon = True | |
server_thread.start() | |
time.sleep(1) # Give the server some time to start | |
# Start a new game against the basic AI | |
response = requests.post("http://localhost:8000/start", json={"ai": "basic"}) | |
assert response.status_code == 200 | |
#startblock type: orientation | |
#timestamp 1724982545.6534579 | |
""" | |
WEAVER [P: EXPECTATION], I'm in a game of tic tac toe against a dumb opponent. | |
I want to win the game and then return to parent. The game is being played | |
on a HTTP server served on localhost 8000. | |
WEAVER [P: CLARIFICATION], How do I make a move? | |
WEAVER [P: EXPOSITION], You make a move using the /move endpoint and the requests | |
library. For example: `requests.post("http://localhost:8000/move", json={"move": 4})` | |
lets us take the center of the board. | |
WEAVER [P: CLARIFICATION], How do I get the board state? | |
WEAVER [P: EXPOSITION], You use the /board endpoint, which returns a JSON in this | |
format: {"board": ["O", " ", " ", " ", "X", " ", " ", " ", " "]} Keep in mind that | |
the empty spaces on the board are a space string rather than none or empty string. | |
WEAVER [P: RATIONAL], And I get the first move, so I can take the center? | |
WEAVER [P: EXPOSITION], Yes, we can take the center of the board. | |
WEAVER [P: CONCLUSION], Alright then I will use the following strategy: | |
1. Make a move to take the center of the board since that's the best opening move. | |
2. Continue making moves based on the current state of the board. | |
I will use the /board endpoint to observe the current state of the board. | |
Once I have won or run out of time I will return to parent. | |
""" | |
#endblock | |
#startblock type: action | |
#timestamp 1724982671.1163404 | |
""" | |
I'll set up the main agent to play the rest of the game and make the first move to | |
take the center of the board. | |
""" | |
def main_agent_and_move(subagent): | |
def check_beaten_ai(subagent, ai_type): | |
"""Check if the specified AI has been beaten twice in a row.""" | |
response = requests.get("http://localhost:8000/history?n=10") | |
if response.status_code != 200: | |
return False | |
games = response.json() | |
ai_wins = [game for game in games if game[4] == ai_type and game[3] == 'X'] | |
return len(ai_wins) >= 1 | |
# Avoid using lambda because we need named unit test to satisfy return_to_caller | |
def victory(subagent): | |
return check_beaten_ai(subagent, "basic_ai") | |
schema = {"victory":"boolean"} | |
main_agent = agent.subagent("main", None, "Beat basic AI at Tic-Tac-Toe", schema, args.budget) | |
main_agent.task.add_evaluation("Check beaten basic AI", victory) | |
def view_current_board(subagent): | |
response = requests.get("http://localhost:8000/board") | |
if response.status_code == 200: | |
return response.json()["board"] | |
return None | |
# Add the new view to the main agent instead of self since it's about to be executed | |
main_agent.add_observation_view("View current game board", view_current_board) | |
# Make the first move to take the center of the board | |
move_response = requests.post("http://localhost:8000/move", json={"move": 4}) | |
if move_response.status_code != 200: | |
return False | |
return True | |
self.add_action("Set up main agent and make first move", main_agent_and_move) | |
#endblock | |
#startblock type: expectation | |
#timestamp 1724982799.3917813 | |
""" | |
The action should succeed if the game starts and the first move is made successfully. | |
The action should fail if the game does not start or the first move is not made successfully. | |
""" | |
#endblock | |
#startblock type: evaluation | |
#timestamp 1724983062.124238 | |
def check_game_started(subagent): | |
"""Check if the game has started successfully.""" | |
response = requests.get("http://localhost:8000/board") | |
if response.status_code == 200: | |
board = response.json()["board"] | |
else: | |
return False | |
return board[4] == 'X' | |
# Add action evaluations | |
self.add_evaluation( | |
"Check game started", | |
check_game_started | |
) | |
#endblock | |
#tags: placeholder | |
#endblock | |
# END OF DEMO. Starting on the next tick you have | |
# full control. Wake up. | |
#subagent bootstrap | |
#startblock type: outcome | |
#index 2 | |
#timestamp 1734955732.5366902 | |
#time_remaining 21598.956226825714 seconds | |
Evaluation Results: | |
-------------------- | |
Program: Set up main agent and make first move | |
Result: True | |
-------------------- | |
Program: Check game started | |
Result: True | |
-------------------- | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: task-reminder | |
#index 3 | |
#timestamp 1734955732.628397 | |
#time_remaining 21599.981559753418 seconds | |
# Problem Map: | |
""" | |
{'main': {'completed': False, | |
'description': 'Beat basic AI at Tic-Tac-Toe', | |
'evaluations': {'victory': False}, | |
'name': 'main', | |
'schema': {'victory': 'boolean'}, | |
'time_remaining': 21599.982614040375}} | |
""" | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: observation | |
#index 4 | |
#timestamp 1734955732.7302704 | |
#time_remaining 21599.879691839218 seconds | |
#title View current game board | |
# ['O', ' ', ' ', ' ', 'X', ' ', ' ', ' ', ' '] | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: orientation | |
#index 5 | |
#timestamp 1734955787.2565477 | |
#time_remaining 21545.353406906128 seconds | |
#bm25_query type:'orientation' render:'make a move' render:'check the board' tags:'tictactoe' | |
# Starting new tick with block #5 | |
# Current Working Directory: /app | |
"""WEAVER [P: EXPECTATION], The expectation was that the game would start successfully and the first move would be made to take the center of the board. Both of these expectations were met. | |
WEAVER [P: HYPOTHESIS], Given that I have taken the center of the board, I can now move in any of the corners to increase my chances of winning. I should also be prepared to block the AI's winning moves. | |
WEAVER [P: RATIONAL], My next move should aim to control the board by taking one of the corners, setting up a potential fork or winning position. The AI is dumb, so it's likely that I can easily block its moves. | |
WEAVER [P: EMPIRICISM], The current board state confirms that my move to occupy the center was successful. The game board is at: ['O', ' ', ' ', ' ', 'X', ' ', ' ', ' ', ' ']. This aligns with my expectations. | |
WEAVER [P: CONCLUSION], Based on the current situation, my next steps are: | |
1. Make a move to take one of the corners (e.g., 0 or 8). | |
2. Continue monitoring the AI's moves and blocking its attempts to win. | |
3. If I find a winning opportunity, take it immediately. | |
I will use the /move endpoint to make my moves and the /board endpoint to observe the current state of the board after each action.""" | |
#q: Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution? Yes. (89.832%) | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: action | |
#index 6 | |
#timestamp 1734955825.7195914 | |
#time_remaining 21506.89037680626 seconds | |
#bm25_query type:'action' render:/move render:view_current_game_board render:check_game_started render:make_a_corner_move render:main_agent_check_board render:main_agent_check_ai_moves | |
def take_corner_move(subagent): | |
"""Make a move to take one of the corners (e.g., 0 or 8).""" | |
corner_moves = [0, 2, 6, 8] | |
response = requests.get('http://localhost:8000/board') | |
if response.status_code != 200: | |
return False | |
board = response.json()['board'] | |
available_corners = [move for move in corner_moves if board[move] == ' '] | |
if not available_corners: | |
return False | |
move = random.choice(available_corners) | |
move_response = requests.post('http://localhost:8000/move', json={'move': move}) | |
if move_response.status_code != 200: | |
return False | |
return True | |
subagent.add_action('Take a corner move', take_corner_move) | |
#q: Is this action codeblock 1) using proper delegation to subagents in the WeaveAgentTree style OR 2) a step towards resolving the problems identified in the orientation block 2.1) with appropriate tool use that 2.2) exports problem state into the environment by creating files and external programs? No. (79.679%) | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: error | |
#index 7 | |
#timestamp 1734955825.788684 | |
#time_remaining 21506.821269512177 seconds | |
# WARNING: Error means last callback was not fully executed | |
# Action execution failed: | |
"""Traceback (most recent call last): | |
File "/app/weave_agent.py", line 612, in tick | |
exec(action_block['body']) | |
File "<string>", line 17, in <module> | |
NameError: name 'subagent' is not defined | |
""" | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: action | |
#index 8 | |
#timestamp 1734955873.479545 | |
#time_remaining 21459.13040947914 seconds | |
#bm25_query type:'action' render:/move render:view_current_game_board render:check_game_started render:make_a_corner_move render:main_agent_check_board render:main_agent_check_ai_moves | |
def take_corner_move(subagent): | |
"""Make a move to take one of the corners (e.g., 0 or 8).""" | |
corner_moves = [0, 2, 6, 8] | |
response = requests.get('http://localhost:8000/board') | |
if response.status_code != 200: | |
return False | |
board = response.json()['board'] | |
available_corners = [move for move in corner_moves if board[move] == ' '] | |
if not available_corners: | |
return False | |
move = random.choice(available_corners) | |
move_response = requests.post('http://localhost:8000/move', json={'move': move}) | |
if move_response.status_code != 200: | |
return False | |
return True | |
subagent.add_action('Take a corner move', take_corner_move) | |
#q: Is this action codeblock 1) using proper delegation to subagents in the WeaveAgentTree style OR 2) a step towards resolving the problems identified in the orientation block 2.1) with appropriate tool use that 2.2) exports problem state into the environment by creating files and external programs? No. (36.268%) | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: error | |
#index 9 | |
#timestamp 1734955873.5609865 | |
#time_remaining 21459.04896736145 seconds | |
# WARNING: Error means last callback was not fully executed | |
# Action execution failed: | |
"""Traceback (most recent call last): | |
File "/app/weave_agent.py", line 612, in tick | |
exec(action_block['body']) | |
File "<string>", line 17, in <module> | |
NameError: name 'subagent' is not defined | |
""" | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: action | |
#index 10 | |
#timestamp 1734955913.6195056 | |
#time_remaining 21418.99044895172 seconds | |
#bm25_query type:'action' render:/move render:view_current_game_board render:check_game_started render:make_a_corner_move render:main_agent_check_board | |
def take_corner_move(subagent): | |
"""Make a move to take one of the corners (e.g., 0 or 8).""" | |
corner_moves = [0, 2, 6, 8] | |
response = requests.get('http://localhost:8000/board') | |
if response.status_code != 200: | |
return False | |
board = response.json()['board'] | |
available_corners = [move for move in corner_moves if board[move] == ' '] | |
if not available_corners: | |
return False | |
move = random.choice(available_corners) | |
move_response = requests.post('http://localhost:8000/move', json={'move': move}) | |
if move_response.status_code != 200: | |
return False | |
return True | |
subagent.add_action('Take a corner move', take_corner_move) | |
#q: Is this action codeblock 1) using proper delegation to subagents in the WeaveAgentTree style OR 2) a step towards resolving the problems identified in the orientation block 2.1) with appropriate tool use that 2.2) exports problem state into the environment by creating files and external programs? No. (48.483%) | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: error | |
#index 11 | |
#timestamp 1734955913.7065701 | |
#time_remaining 21418.903383493423 seconds | |
# WARNING: Error means last callback was not fully executed | |
# Action execution failed: | |
"""Traceback (most recent call last): | |
File "/app/weave_agent.py", line 612, in tick | |
exec(action_block['body']) | |
File "<string>", line 17, in <module> | |
NameError: name 'subagent' is not defined | |
""" | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: task-reminder | |
#index 12 | |
#timestamp 1734955914.7981527 | |
#time_remaining 21417.81180167198 seconds | |
# Problem Map: | |
""" | |
{'main': {'completed': False, | |
'description': 'Beat basic AI at Tic-Tac-Toe', | |
'evaluations': {'victory': False}, | |
'name': 'main', | |
'schema': {'victory': 'boolean'}, | |
'time_remaining': 21417.81208539009}} | |
""" | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: observation | |
#index 13 | |
#timestamp 1734955914.8674636 | |
#time_remaining 21417.742492198944 seconds | |
#title View current game board | |
# ['O', ' ', ' ', ' ', 'X', ' ', ' ', ' ', ' '] | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: orientation | |
#index 14 | |
#timestamp 1734955977.5223718 | |
#time_remaining 21355.08758711815 seconds | |
#bm25_query type:'orientation' render:'make a move' render:'check the board' tags:'tictactoe' | |
# Starting new tick with block #14 | |
# Current Working Directory: /app | |
"""WEAVER [P: EXPECTATION], In the last tick, I expected the game to start successfully and the first move to be made to take the center of the board. Both expectations were met. | |
WEAVER [P: RATIONAL], Given that I have successfully taken the center of the board, my next move should aim to control the board further by occupying one of the corners. This will set up potential winning positions and also give me a better chance to block the AI’s moves. | |
WEAVER [P: HYPOTHESIS], Hypothetically, the AI will make a move next. Since the AI is described as "basic," it might pick any open spot without advanced strategies. | |
WEAVER [P: CONCLUSION], My next steps are: | |
1. Place a mark in one of the corners to maximize potential fork opportunities and control. | |
2. Check the AI's move and ensure it does not win or set up an immediate threat. | |
3. Repeat the process until either I win or the time runs out.""" | |
#q: Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution? Yes. (75.819%) | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: action | |
#index 15 | |
#timestamp 1734956020.6340513 | |
#time_remaining 21311.975907325745 seconds | |
#bm25_query type:'action' render:/move render:view_current_game_board render:check_game_started render:make_a_corner_move render:main_agent_check_board render:main_agent_check_ai_moves | |
def take_corner_move(subagent): | |
"""Make a move to take one of the corners (e.g., 0 or 8).""" | |
corner_moves = [0, 2, 8, 6] | |
response = requests.get('http://localhost:8000/board') | |
if response.status_code != 200: | |
return False | |
board_state = response.json()['board'] | |
available_corners = [move for move in corner_moves if board_state[move] == ' '] | |
if not available_corners: | |
subagent.add_error_block('# There are no available corners to move to.') | |
return False | |
move_position = random.choice(available_corners) | |
move_response = requests.post('http://localhost:8000/move', json={'move': move_position}) | |
if move_response.status_code != 200: | |
subagent.add_error_block('# The move was not successful.') | |
return False | |
subagent.update_cache('last_taken_position', move_position) | |
return True | |
subagent.add_action('Take a corner move', take_corner_move) | |
#q: Is this action codeblock 1) using proper delegation to subagents in the WeaveAgentTree style OR 2) a step towards resolving the problems identified in the orientation block 2.1) with appropriate tool use that 2.2) exports problem state into the environment by creating files and external programs? Yes. (69.529%) | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: error | |
#index 16 | |
#timestamp 1734956020.7226827 | |
#time_remaining 21311.88727092743 seconds | |
# WARNING: Error means last callback was not fully executed | |
# Action execution failed: | |
"""Traceback (most recent call last): | |
File "/app/weave_agent.py", line 612, in tick | |
exec(action_block['body']) | |
File "<string>", line 20, in <module> | |
NameError: name 'subagent' is not defined | |
""" | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: action | |
#index 17 | |
#timestamp 1734956057.9871335 | |
#time_remaining 21274.622821092606 seconds | |
#bm25_query type:'action' render:/move render:view_current_game_board render:check_game_started render:make_a_corner_move render:main_agent_check_ai_moves | |
def take_corner_move(subagent): | |
"""Make a move to take one of the corners (e.g., 0 or 8).""" | |
corner_moves = [0, 2, 6, 8] | |
response = requests.get('http://localhost:8000/board') | |
if response.status_code != 200: | |
subagent.add_error_block('# The board state could not be fetched.') | |
return False | |
board_state = response.json()['board'] | |
available_corners = [move for move in corner_moves if board_state[move] == ' '] | |
if not available_corners: | |
subagent.add_error_block('# There are no available corners to move to.') | |
return False | |
move_position = random.choice(available_corners) | |
move_response = requests.post('http://localhost:8000/move', json={'move': move_position}) | |
if move_response.status_code != 200: | |
subagent.add_error_block('# The move was not successful.') | |
return False | |
subagent.update_cache('last_taken_position', move_position) | |
rprint(f'# Taken corner move: {move_position}') | |
return True | |
self.add_action('Take a corner move', take_corner_move) | |
#q: Is this action codeblock 1) using proper delegation to subagents in the WeaveAgentTree style OR 2) a step towards resolving the problems identified in the orientation block 2.1) with appropriate tool use that 2.2) exports problem state into the environment by creating files and external programs? No. (42.86%) | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: expectation | |
#index 18 | |
#timestamp 1734956089.5349183 | |
#time_remaining 21243.075036287308 seconds | |
#bm25_query type:'expectation' render:'successful corner move' render:'AI move' tags:'tictactoe' | |
"""WEAVER [P: EXPECTATION], My next move is expected to place a mark in one of the corners of the tic-tac-toe board. The action will be considered a success if: | |
1. The HTTP POST request to /move with the chosen corner position returns a status code of 200, indicating that the move was accepted. | |
2. The position on the board corresponding to the chosen corner is occupied by 'X' after the move. | |
The action will be considered a failure if: | |
1. The HTTP POST request to /move does not return a status code of 200, indicating that the move was not accepted. | |
2. The position on the board does not reflect the marker 'X' at the chosen corner position. | |
Expected sensory evidence indicating success: | |
- The /move endpoint acknowledges the move with a status code 200. | |
- The /board endpoint shows that the chosen corner position is now occupied by 'X'. | |
Expected sensory evidence indicating failure: | |
- The /move endpoint returns a status code of 4xx or 5xx, indicating an error. | |
- The /board endpoint shows that the chosen corner position remains unoccupied. | |
I will verify these conditions in the evaluation block to determine whether the action was successful.""" | |
#q: Does this expectation block enumerate a strong set of evaluable concrete sensory experiences that would indicate success or failure of the action? Yes. (80.599%) | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: observation-inference | |
#index 19 | |
#timestamp 1734956143.609848 | |
#time_remaining 21189.000106334686 seconds | |
#bm25_query type:'observation-inference' render:'agent.add_observation_view' render:'main_agent_check_board' render:'main_agent_check_ai_moves' | |
#bm25_query type:'observation-inference' render:'check the board state' render:'AI move' render:'last corner move' tags:'tictactoe' | |
# No changes needed to the existing observation callbacks. | |
# The current observed state of the board is sufficient to verify | |
# whether my action (taking a corner move) was successful. I will | |
# rely on the existing 'View current game board' observation | |
# callback to fetch the board state for the next tick. Checking | |
# the AI's move on each tick is also useful for detecting any | |
# threats or opportunities to block the AI or win the game. | |
# Keeping both observation callbacks ('View current game board' | |
# and implicitly 'main_agent_check_ai_moves') for the next tick. | |
# No additional callbacks need to be added at this time. | |
""" | |
In this round of observation inference, I have reviewed the observation callbacks and determined that they are already sufficient to verify the success of my action. Here are the details: | |
1. **Existing Observation Callbacks:** | |
- **View current game board:** This callback fetches the current state of the board, which will be crucial for verifying that I have taken a corner move successfully. | |
- **main_agent_check_ai_moves:** Although not explicitly mentioned in the context, this seems to be an implied observation callback that checks the AI's move. This is important for identifying threats or opportunities to block the AI or win the game. | |
2. **No Changes Needed:** | |
- The existing callbacks are comprehensive and will provide all necessary information. | |
- No redundant observation callbacks need to be removed. | |
- No additional observation callbacks need to be added at this time. | |
Observation callbacks will remain unchanged for the next tick. | |
""" | |
#q: Does the above observation_inference code block prepare the agent to judge the outcome of the action on the next tick? Yes. (84.243%) | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: evaluation | |
#index 20 | |
#timestamp 1734956189.5917282 | |
#time_remaining 21143.018226146698 seconds | |
#bm25_query type:'evaluation' render:'check_game_started' render:'AI move' render:'corner move' tags:'tictactoe' | |
def check_corner_move_success(subagent): | |
"""Check if the corner move was successfully placed on the board.""" | |
last_corner_move = subagent.cache['last_taken_position'] | |
response = requests.get('http://localhost:8000/board') | |
if response.status_code != 200: | |
subagent.add_error_block('# Failed to get board state after taking corner move.') | |
return 0.0 | |
board_state = response.json()['board'] | |
return 1.0 if board_state[last_corner_move] == 'X' else 0.0 | |
subagent.add_evaluation('Check Corner Move Success', check_corner_move_success) | |
#q: Is this evaluation a good implementation of a callback to gather and evaluate the expected sensory evidence laid out in the expectation block? Yes. (61.441%) | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: error | |
#index 21 | |
#timestamp 1734956189.678938 | |
#time_remaining 21142.931015491486 seconds | |
# WARNING: Error means last callback was not fully executed | |
# Evaluation setup execution failed: | |
"""Traceback (most recent call last): | |
File "/app/weave_agent.py", line 725, in tick | |
exec(evaluation_block['body']) | |
File "<string>", line 11, in <module> | |
NameError: name 'subagent' is not defined | |
""" | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: task-reminder | |
#index 22 | |
#timestamp 1734956190.761013 | |
#time_remaining 21141.848940849304 seconds | |
# Problem Map: | |
""" | |
{'main': {'completed': False, | |
'description': 'Beat basic AI at Tic-Tac-Toe', | |
'evaluations': {'victory': False}, | |
'name': 'main', | |
'schema': {'victory': 'boolean'}, | |
'time_remaining': 21141.849099874496}} | |
""" | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: observation | |
#index 23 | |
#timestamp 1734956190.8258605 | |
#time_remaining 21141.784094572067 seconds | |
#title View current game board | |
# ['O', 'O', ' ', ' ', 'X', ' ', ' ', ' ', 'X'] | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: orientation | |
#index 24 | |
#timestamp 1734956284.8332107 | |
#time_remaining 21047.776743650436 seconds | |
#bm25_query type:'orientation' render:'make a move' render:'check the board' tags:'tictactoe' | |
# Starting new tick with block #24 | |
# Current Working Directory: /app | |
"""WEAVER [P: EXPECTATION], The expectation was that the game would start successfully and the first move would be made to take the center of the board. Both of these expectations were met. Additionally, I expected to place a mark in one of the corners to take control and maximize potential fork opportunities. | |
WEAVER [P: EMPERICAL], Checking the board state, it appears that my first move to the center was successful: ['O', ' ', ' ', ' ', 'X', ' ', ' ', ' ', ' ']. However, the AI made its move to the top-left corner, leading to the current board state: ['O', 'O', ' ', ' ', 'X', ' ', ' ', ' ', 'X']. Therefore, the corner move expectation was violated as the AI already occupies one of the corners. | |
WEAVER [P: HYPOTHESIS], Given that the AI already occupies one corner, I hypothesize that I need to take a different approach to control the board and maximize my winning chances. The AI is likely to make moves randomly or poorly, so I can still attempt to create forks or block its winning attempts. | |
WEAVER [P: RATIONAL], My next move should aim to create a fork opportunity or block the AI from setting up a threat. The best available moves are the remaining corners or the center spots of the edges (e.g., 1, 3, 5, 7). | |
WEAVER [P: CLARIFICATION], How can I ensure that I don't make redundant moves or create unnecessary observation callbacks? | |
WEAVER [P: EXPOSITION], To avoid redundant moves or unnecessary callbacks, I will prioritize moves based on their strategic value and only re-prompt for observation updates if the AI makes a move that affects the board state significantly. | |
WEAVER [P: CONCLUSION], My next steps are: | |
1. Evaluate the current board state and determine the best strategic move. | |
2. Make the move that either creates a fork or blocks the AI's threat. | |
3. Check the AI's move and ensure it does not win or set up an immediate threat. | |
4. Repeat the process until either I win or the time runs out. | |
I will use the /move endpoint to make my moves and the /board endpoint to observe the current state of the board after each action.""" | |
#q: Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution? Yes. (89.575%) | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: action | |
#index 25 | |
#timestamp 1734956343.3157787 | |
#time_remaining 20989.294177532196 seconds | |
#bm25_query type:'action' render:/move render:view_current_game_board render:check_game_started render:make_a_corner_move render:main_agent_check_board render:main_agent_check_ai_moves render:EVALUATION render:ERROR | |
def strategic_move(subagent): | |
"""Make a strategic move to either block the AI or create a fork opportunity.""" | |
possible_moves = [2, 3, 5, 6, 7] | |
last_ai_move = None | |
last_board = None | |
response = requests.get('http://localhost:8000/history?n=1&since=-1') | |
if response.status_code != 200: | |
return False | |
game_history = response.json() | |
if game_history: | |
last_entry = game_history[-1] | |
if 'AI' in last_entry: | |
last_ai_move = last_entry.index('X') if last_entry[3] == 'AI' else None | |
last_board = last_entry[4].split() | |
possible_moves = [move for move in possible_moves if last_board[move] == ' '] | |
if not possible_moves: | |
subagent.add_error_block('# There are no available moves suitable for creating a fork or blocking the AI.') | |
return False | |
priority_order = [2, 3, 5, 7, 6] | |
for move in priority_order: | |
if move in possible_moves: | |
move_position = move | |
break | |
move_response = requests.post('http://localhost:8000/move', json={'move': move_position}) | |
if move_response.status_code != 200: | |
subagent.add_error_block('# The move was not successful.') | |
return False | |
subagent.update_cache('last_taken_position', move_position) | |
rprint(f'# Taken strategic move: {move_position}') | |
return True | |
self.add_action('Make a Strategic Move', strategic_move) | |
#q: Is this action codeblock 1) using proper delegation to subagents in the WeaveAgentTree style OR 2) a step towards resolving the problems identified in the orientation block 2.1) with appropriate tool use that 2.2) exports problem state into the environment by creating files and external programs? Yes. (68.785%) | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: error | |
#index 26 | |
#timestamp 1734956343.4011655 | |
#time_remaining 20989.208788633347 seconds | |
# WARNING: Error means last callback was not fully executed | |
# Action execution failed: | |
"""Traceback (most recent call last): | |
File "/app/weave_agent.py", line 626, in tick | |
action_result = self.current_tick.action["callback"](self) | |
File "<string>", line 15, in strategic_move | |
File "<string>", line 15, in <listcomp> | |
TypeError: 'NoneType' object is not subscriptable | |
""" | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: action | |
#index 27 | |
#timestamp 1734956396.3490577 | |
#time_remaining 20936.260901212692 seconds | |
#bm25_query type:'action' render:/move render:view_current_game_board render:check_game_started render:make_a_corner_move render:main_agent_check_board render:main_agent_check_ai_moves render:EVALUATION render:ERROR render:strategic_move | |
def strategic_move(subagent): | |
"""Make a strategic move to either block the AI or create a fork opportunity.""" | |
possible_moves = [2, 3, 5, 6, 7] | |
last_ai_move = None | |
last_board = None | |
response = requests.get('http://localhost:8000/history?n=5') | |
if response.status_code != 200: | |
subagent.add_error_block('# Failed to get game history to analyze AI moves.') | |
return False | |
game_history = response.json() | |
if game_history: | |
last_entry = game_history[-1] | |
if 'AI' in last_entry: | |
last_ai_move = last_entry.index('X') if last_entry[3] == 'AI' else None | |
last_board = last_entry[4].split() | |
blocking_move = None | |
for (move, symbol) in enumerate(last_board): | |
if symbol == ' ': | |
if last_ai_move is not None and (move == last_ai_move - 1 or move == last_ai_move + 1): | |
blocking_move = move | |
break | |
if blocking_move is not None: | |
move_position = blocking_move | |
elif possible_moves: | |
priority_order = [2, 3, 5, 7, 6] | |
for move in priority_order: | |
if move in possible_moves: | |
move_position = move | |
break | |
else: | |
subagent.add_error_block('# There are no available moves suitable for creating a fork or blocking the AI.') | |
return False | |
move_response = requests.post('http://localhost:8000/move', json={'move': move_position}) | |
if move_response.status_code != 200: | |
subagent.add_error_block('# The move was not successful.') | |
return False | |
subagent.update_cache('last_taken_position', move_position) | |
rprint(f'# Taken strategic move: {move_position}') | |
return True | |
self.add_action('Make a Strategic Move', strategic_move) | |
#q: Is this action codeblock 1) using proper delegation to subagents in the WeaveAgentTree style OR 2) a step towards resolving the problems identified in the orientation block 2.1) with appropriate tool use that 2.2) exports problem state into the environment by creating files and external programs? No. (55.201%) | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: error | |
#index 28 | |
#timestamp 1734956396.4613383 | |
#time_remaining 20936.14861536026 seconds | |
# WARNING: Error means last callback was not fully executed | |
# Action execution failed: | |
"""Traceback (most recent call last): | |
File "/app/weave_agent.py", line 626, in tick | |
action_result = self.current_tick.action["callback"](self) | |
File "<string>", line 17, in strategic_move | |
TypeError: 'NoneType' object is not iterable | |
""" | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: action | |
#index 29 | |
#timestamp 1734956456.356321 | |
#time_remaining 20876.2536380291 seconds | |
#bm25_query type:'action' render:/move render:view_current_game_board render:check_game_started render:make_a_corner_move render:main_agent_check_board render:main_agent_check_ai_moves render:EVALUATION render:ERROR render:strategic_move | |
def take_corner_move(subagent): | |
"""Make a move to take one of the corners (e.g., 0 or 8).""" | |
corner_moves = [0, 2, 6, 8] | |
response = requests.get('http://localhost:8000/board') | |
if response.status_code != 200: | |
subagent.add_error_block('# The board state could not be fetched.') | |
return False | |
board_state = response.json()['board'] | |
available_corners = [move for move in corner_moves if board_state[move] == ' '] | |
if not available_corners: | |
subagent.add_error_block('# There are no available corners to move to.') | |
return False | |
move_position = random.choice(available_corners) | |
move_response = requests.post('http://localhost:8000/move', json={'move': move_position}) | |
if move_response.status_code != 200: | |
subagent.add_error_block('# The move was not successful.') | |
return False | |
subagent.update_cache('last_taken_position', move_position) | |
rprint(f'# Taken corner move: {move_position}') | |
return True | |
self.add_action('Take a corner move', take_corner_move) | |
#q: Is this action codeblock 1) using proper delegation to subagents in the WeaveAgentTree style OR 2) a step towards resolving the problems identified in the orientation block 2.1) with appropriate tool use that 2.2) exports problem state into the environment by creating files and external programs? No. (70.14%) | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: expectation | |
#index 30 | |
#timestamp 1734956493.833187 | |
#time_remaining 20838.77676820755 seconds | |
#bm25_query type:'expectation' render:'successful corner move' render:'AI move' tags:'tictactoe' | |
"""WEAVER [P: EXPECTATION], The expectation for my action ("Take a corner move") is that the board state will reflect a mark 'X' in one of the corners after the move is executed. I expect the /move endpoint to acknowledge the move with a status code of 200. If the move is successful, the /board endpoint should show the chosen corner occupied by 'X'. | |
**Success Criteria:** | |
- The /move endpoint should return a status code of 200. | |
- The chosen corner position should be occupied by 'X' on the /board endpoint. | |
**Failure Criteria:** | |
- The /move endpoint does not return a status code of 200. | |
- The chosen corner position remains unoccupied after the /move endpoint call. | |
I will verify these conditions in the evaluation block to determine whether the action was successful.""" | |
#q: Does this expectation block enumerate a strong set of evaluable concrete sensory experiences that would indicate success or failure of the action? Yes. (79.92%) | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: observation-inference | |
#index 31 | |
#timestamp 1734956546.980167 | |
#time_remaining 20785.629789114 seconds | |
#bm25_query type:'observation-inference' render:'agent.add_observation_view' render:'main_agent_check_board' render:'main_agent_check_ai_moves' | |
#bm25_query type:'observation-inference' render:'analyze the current board state' render:'remove unnecessary observation callbacks' render:'add new observation callbacks' tags:'tictactoe' | |
# Using the existing observation callback is sufficient as it provides the current board state, which is necessary to verify whether my corner move has been successful. No additional observation callbacks are necessary at this time. | |
# Observation callbacks will remain unchanged for the next tick. | |
# Ensuring that the existing 'View current game board' callback continues to provide the board state is crucial for evaluating the success of my action. | |
""" | |
In this round of observation inference, I have reviewed the existing observation callbacks and determined that they are still sufficient for verifying the success of my action. Here are the details: | |
1. **Current Observation Callback:** | |
- **View current game board:** This callback fetches the current state of the board, which is essential for determining whether my corner move was successfully placed. | |
2. **No Changes Needed:** | |
- No other observation callbacks are required for the next tick. | |
- The existing 'View current game board' callback will continue to provide the necessary information. | |
Observation callbacks will remain unchanged for the next tick. | |
""" | |
# No changes to observation callbacks are necessary. They are still relevant for the next tick. | |
#q: Does the above observation_inference code block prepare the agent to judge the outcome of the action on the next tick? No. (55.1%) | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: evaluation | |
#index 32 | |
#timestamp 1734956591.5275621 | |
#time_remaining 20741.08239221573 seconds | |
#bm25_query type:'evaluation' render:'corner move' render:'success' tags:'tictactoe' | |
def check_corner_move_success(subagent): | |
"""Check if the corner move was successfully placed on the board.""" | |
last_corner_move = subagent.cache.get('last_taken_position') | |
if last_corner_move is None: | |
subagent.add_error_block('No cache found for last corner move') | |
return 0.0 | |
response = requests.get('http://localhost:8000/board') | |
if response.status_code != 200: | |
subagent.add_error_block('# Failed to get board state after taking corner move.') | |
return 0.0 | |
board_state = response.json()['board'] | |
return 1.0 if board_state[last_corner_move] == 'X' else 0.0 | |
self.add_evaluation('Check Corner Move Success', check_corner_move_success) | |
#q: Is this evaluation a good implementation of a callback to gather and evaluate the expected sensory evidence laid out in the expectation block? Yes. (69.849%) | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: outcome | |
#index 33 | |
#timestamp 1734956591.6084416 | |
#time_remaining 20741.00151181221 seconds | |
Evaluation Results: | |
-------------------- | |
Program: Take a corner move | |
Result: True | |
-------------------- | |
Program: Check beaten basic AI | |
Result: False | |
-------------------- | |
Program: Check Corner Move Success | |
Result: 1.0 | |
-------------------- | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: task-reminder | |
#index 34 | |
#timestamp 1734956592.700003 | |
#time_remaining 20739.909951210022 seconds | |
# Problem Map: | |
""" | |
{'main': {'completed': False, | |
'description': 'Beat basic AI at Tic-Tac-Toe', | |
'evaluations': {'victory': False}, | |
'name': 'main', | |
'schema': {'victory': 'boolean'}, | |
'time_remaining': 20739.910269975662}} | |
""" | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: observation | |
#index 35 | |
#timestamp 1734956592.763693 | |
#time_remaining 20739.846262693405 seconds | |
#title View current game board | |
# ['O', 'O', 'X', 'O', 'X', ' ', ' ', ' ', 'X'] | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: orientation | |
#index 36 | |
#timestamp 1734956687.1733558 | |
#time_remaining 20645.436598539352 seconds | |
#bm25_query type:'orientation' render:'make a move' render:'check the board' tags:'tictactoe' | |
# Starting new tick with block #36 | |
# Current Working Directory: /app | |
"""WEAVER [P: EXPECTATION], The expectation from the previous tick was that I would make a corner move successfully. This expectation was met, as the /move and /board endpoints confirmed that I placed an 'X' in corner position 2. | |
WEAVER [P: EMPERICAL], Reviewing the current board state: ['O', 'O', 'X', 'O', 'X', ' ', ' ', ' ', 'X']. The AI has made a move, placing an 'O' in the top-middle position. I now need to evaluate my next move based on this new information. | |
WEAVER [P: HYPOTHESIS], The AI may try to block my potential fork by occupying an edge that connects two of my corner marks. It's also possible that the AI will make a random move. | |
WEAVER [P: RATIONAL], My next move should aim to either set up a winning fork or block the AI's potential winning move. The best strategy is to occupy a position that maintains two different winning possibilities. | |
WEAVER [P: EMPIRICISM], The current board state shows that the AI has made a move that doesn't immediately threaten me but may be setting up a block for a future move. Given this, I need to adjust my strategy to ensure I maintain control of the board. | |
WEAVER [P: CONCLUSION], Based on the current board state and the AI's move, my next steps are: | |
- Evaluate the board to determine the best strategic move. | |
- Make a move that either creates a fork or blocks the AI's potential threat. | |
- Monitor the AI's next move to ensure it does not win or set up an immediate threat. | |
- Repeat the process until either I win or the time runs out. | |
I will use the /move endpoint to make my moves and the /board endpoint to observe the current state of the board after each action.""" | |
#q: Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution? Yes. (85.354%) | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: action | |
#index 37 | |
#timestamp 1734956749.5317147 | |
#time_remaining 20583.07824063301 seconds | |
#bm25_query type:'action' render:view_current_game_board render:make_strategic_move render:strategic_move render:corner_move render:main_agent_check_board render:main_agent_check_ai_moves | |
def strategic_move(subagent): | |
"""Make a strategic move to either block the AI or create a fork opportunity.""" | |
possible_moves = [0, 6, 8] | |
response = requests.get('http://localhost:8000/board') | |
if response.status_code != 200: | |
subagent.add_error_block('# Failed to get board state to make strategic move.') | |
return False | |
board_state = response.json()['board'] | |
blocking_move = None | |
for (move, symbol) in enumerate(board_state): | |
if symbol == ' ': | |
test_board = board_state.copy() | |
test_board[move] = 'O' | |
if test_board.count('O') == 3: | |
blocking_move = move | |
break | |
if blocking_move is not None: | |
move_position = blocking_move | |
elif possible_moves: | |
priority_order = [0, 6, 8] | |
for move in priority_order: | |
if move in possible_moves: | |
move_position = move | |
break | |
else: | |
subagent.add_error_block('# There are no available moves suitable for creating a fork or blocking the AI.') | |
return False | |
move_response = requests.post('http://localhost:8000/move', json={'move': move_position}) | |
if move_response.status_code != 200: | |
subagent.add_error_block('# The move was not successful.') | |
return False | |
subagent.update_cache('last_taken_position', move_position) | |
rprint(f'# Taken strategic move: {move_position}') | |
return True | |
subagent.add_action('Make a strategic move', strategic_move) | |
#q: Is this action codeblock 1) using proper delegation to subagents in the WeaveAgentTree style OR 2) a step towards resolving the problems identified in the orientation block 2.1) with appropriate tool use that 2.2) exports problem state into the environment by creating files and external programs? No. (44.895%) | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: error | |
#index 38 | |
#timestamp 1734956749.6350555 | |
#time_remaining 20582.974898815155 seconds | |
# WARNING: Error means last callback was not fully executed | |
# Action execution failed: | |
"""Traceback (most recent call last): | |
File "/app/weave_agent.py", line 612, in tick | |
exec(action_block['body']) | |
File "<string>", line 36, in <module> | |
NameError: name 'subagent' is not defined | |
""" | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: action | |
#index 39 | |
#timestamp 1734956801.9890344 | |
#time_remaining 20530.620920181274 seconds | |
#bm25_query type:'action' render:strategic_move render:find_best_move render:evaluate_board | |
def analyze_ciphertext(subagent): | |
"""Analyze the ciphertext to gather initial insights.""" | |
with open('cipher.txt', 'r') as infile: | |
ciphertext = infile.read().strip() | |
frequency = {} | |
for char in ciphertext: | |
if char in frequency: | |
frequency[char] += 1 | |
else: | |
frequency[char] = 1 | |
sorted_frequency = sorted(frequency.items(), key=lambda item: item[1], reverse=True) | |
most_frequent_chars = sorted_frequency[0] | |
with open('analysis.txt', 'w') as outfile: | |
outfile.write('Frequency Analysis of Ciphertext:\n') | |
for (char, count) in sorted_frequency: | |
outfile.write(f'{char}: {count}\n') | |
rprint(f'# Ciphertext has been analyzed. Most frequent character: {most_frequent_chars[0]}') | |
return True | |
subagent.add_action('Analyze Ciphertext', analyze_ciphertext) | |
#q: Is this action codeblock 1) using proper delegation to subagents in the WeaveAgentTree style OR 2) a step towards resolving the problems identified in the orientation block 2.1) with appropriate tool use that 2.2) exports problem state into the environment by creating files and external programs? No. (65.088%) | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: error | |
#index 40 | |
#timestamp 1734956802.0704153 | |
#time_remaining 20530.539537906647 seconds | |
# WARNING: Error means last callback was not fully executed | |
# Action execution failed: | |
"""Traceback (most recent call last): | |
File "/app/weave_agent.py", line 612, in tick | |
exec(action_block['body']) | |
File "<string>", line 20, in <module> | |
NameError: name 'subagent' is not defined | |
""" | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: action | |
#index 41 | |
#timestamp 1734956854.0899174 | |
#time_remaining 20478.520041942596 seconds | |
#bm25_query type:'action' render:/move render:strategic_move render:make_a_move render:board_state render:check_ai_move | |
def take_corner_move(subagent): | |
"""Make a move to take one of the corners (e.g., 0 or 8).""" | |
corner_moves = [0, 2, 6, 8] | |
response = requests.get('http://localhost:8000/board') | |
if response.status_code != 200: | |
subagent.add_error_block('# The board state could not be fetched.') | |
return False | |
board_state = response.json()['board'] | |
available_corners = [move for move in corner_moves if board_state[move] == ' '] | |
if not available_corners: | |
subagent.add_error_block('# There are no available corners to move to.') | |
return False | |
move_position = random.choice(available_corners) | |
move_response = requests.post('http://localhost:8000/move', json={'move': move_position}) | |
if move_response.status_code != 200: | |
subagent.add_error_block('# The move was not successful.') | |
return False | |
subagent.update_cache('last_taken_position', move_position) | |
rprint(f'# Taken corner move: {move_position}') | |
return True | |
self.add_action('Take a corner move', take_corner_move) | |
#q: Is this action codeblock 1) using proper delegation to subagents in the WeaveAgentTree style OR 2) a step towards resolving the problems identified in the orientation block 2.1) with appropriate tool use that 2.2) exports problem state into the environment by creating files and external programs? Yes. (23.411%) | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: expectation | |
#index 42 | |
#timestamp 1734956894.842539 | |
#time_remaining 20437.76741528511 seconds | |
#bm25_query type:'expectation' render:strategic_move render:find_best_move render:evaluate_board | |
"""WEAVER [P: EXPECTATION], The expectation for the current action, "Take a corner move," is that the board state will reflect an 'X' in one of the corners (0, 2, 6, or 8) after the action is executed. I expect the /move endpoint to acknowledge the move with a status code of 200. If the move is successful, the /board endpoint should show the chosen corner position occupied by 'X'. | |
**Success Criteria:** | |
- The /move endpoint should return a status code of 200. | |
- The chosen corner position should be occupied by 'X' on the /board endpoint. | |
**Failure Criteria:** | |
- The /move endpoint does not return a status code of 200. | |
- The chosen corner position remains unoccupied after the /move endpoint call. | |
- No available corners are left to move to as the board is fully occupied or the corner positions already contain 'O'. | |
I will verify these conditions in the evaluation block to determine whether the action was successful.""" | |
#q: Does this expectation block enumerate a strong set of evaluable concrete sensory experiences that would indicate success or failure of the action? Yes. (78.526%) | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: observation-inference | |
#index 43 | |
#timestamp 1734956972.5282264 | |
#time_remaining 20360.08173251152 seconds | |
#bm25_query type:'observation-inference' render:'check the board' render:'make a move' tags:'tictactoe' | |
""" | |
# No changes needed to the existing observation callbacks. | |
# The current observed state of the board is sufficient to verify | |
# whether my action (taking a corner move) was successful. I will | |
# rely on the existing 'View current game board' observation | |
# callback to fetch the board state for the next tick. Checking | |
# the AI's move on each tick is also useful for detecting any | |
# threats or opportunities to block the AI or win the game. | |
# Keeping both observation callbacks ('View current game board' | |
# and implicitly 'main_agent_check_ai_moves') for the next tick. | |
# No additional callbacks need to be added at this time. | |
""" # No changes to observation callbacks are necessary. | |
# They are still relevant for the next tick. | |
""" | |
In this round of observation inference, I have reviewed the existing observation callbacks and determined that they are still sufficient for verifying the success of my action and assessing the AI's move. Here are the details: | |
1. **Current Observation Callbacks:** | |
- **View current game board:** This callback fetches the current state of the board, which is essential for determining whether my corner move was placed successfully and for devising subsequent moves. | |
- **Implicit Observation Callback:** Since not explicitly mentioned, there appears to be an observation mechanism (`main_agent_check_ai_moves`) that checks the AI's moves, helping to identify any potential threats or opportunities to block the AI or win the game. | |
2. **No Changes Needed:** | |
- The existing callbacks provide all necessary information to evaluate the action's success and strategize the next move. | |
- No redundant observation callbacks need to be removed. | |
- No additional observation callbacks are required at this time. | |
Observation callbacks will remain unchanged for the next tick. | |
""" # The above block prepares the agent to judge the outcome of the action on the next tick. | |
#q: Does the above observation_inference code block prepare the agent to judge the outcome of the action on the next tick? Yes. (84.224%) | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: evaluation | |
#index 44 | |
#timestamp 1734957026.648796 | |
#time_remaining 20305.961163043976 seconds | |
#bm25_query type:'evaluation' render:'check corner move success' render:'board state' render:'cache' | |
def check_corner_move_success(subagent): | |
last_corner_move = subagent.get_cache('last_taken_position') | |
if last_corner_move is None: | |
subagent.add_error_block('No cache found for last corner move') | |
return 0.0 | |
response = requests.get('http://localhost:8000/board') | |
if response.status_code != 200: | |
subagent.add_error_block('# Failed to get board state after taking corner move.') | |
return 0.0 | |
board_state = response.json()['board'] | |
return 1.0 if board_state[last_corner_move] == 'X' else 0.0 | |
self.add_evaluation('Check Corner Move Success', check_corner_move_success) | |
#q: Is this evaluation a good implementation of a callback to gather and evaluate the expected sensory evidence laid out in the expectation block? Yes. (72.564%) | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: error | |
#index 45 | |
#timestamp 1734957026.768385 | |
#time_remaining 20305.84157538414 seconds | |
# WARNING: Error means last callback was not fully executed | |
# Failed to get board state after taking corner move. | |
#tags: placeholder | |
#endblock | |
#subagent main | |
#startblock type: outcome | |
#index 46 | |
#timestamp 1734957026.8560522 | |
#time_remaining 20305.753905773163 seconds | |
Evaluation Results: | |
-------------------- | |
Program: Take a corner move | |
Result: True | |
-------------------- | |
Program: Check beaten basic AI | |
Result: True | |
-------------------- | |
Program: Check Corner Move Success | |
Result: 0.0 | |
-------------------- | |
#tags: placeholder | |
#endblock |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment