matouskozak · May 30, 2025 18:15
diff --git a/trajectory_to_markdown.py b/trajectory_to_markdown.py
 # Helper function to create markdown fenced code blocks robustly
 def _create_fenced_block(content, language_hint=""):
    if content is None:
        content = "" # Ensure content is not None
    content_str = str(content) # Ensure content is a string
    
    fence_char = '`'
    
    # Find the longest run of backticks in the content.
    # Use one more than that for the fence, with a minimum of 3.
    longest_backtick_run_in_content = 0
    if content_str: # Avoid error if content_str is empty
        # Matches any sequence of 1 or more backticks
        runs = re.findall(r'`+', content_str)
        if runs:
            longest_backtick_run_in_content = max(len(r) for r in runs)
            
    num_backticks = max(3, longest_backtick_run_in_content + 1)
    
    fence = fence_char * num_backticks
    # Ensure there's a newline after the content before the closing fence,
    # and handle cases where content might already end with a newline.
    if content_str.endswith('\n'):
        return f"{fence}{language_hint}\n{content_str}{fence}\n"
    else:
        return f"{fence}{language_hint}\n{content_str}\n{fence}\n"

 def _format_action_dict_human_readable(action_dict):
    formatted_lines = ["### Action:"] 
    
    path_for_hint = action_dict.get('path') if isinstance(action_dict.get('path'), str) else None

    command = action_dict.get('command')
    if command:
        formatted_lines.append(f"**Command:** `{command}`\n")

    for key, value in action_dict.items():
        if key == 'command': # Already handled
            continue
        if key == 'thought': # Handled separately
            continue
        if value is None or (isinstance(value, str) and not value.strip()): # Skip None or empty/whitespace-only strings
            continue

        key_display = key.replace('_', ' ').capitalize()
        lang_hint = 'text'

        if key in ['file_text', 'content'] and path_for_hint:
            if path_for_hint.endswith('.py'): lang_hint = 'python'
            elif path_for_hint.endswith('.js'): lang_hint = 'javascript'
            elif path_for_hint.endswith('.html'): lang_hint = 'html'
            elif path_for_hint.endswith('.css'): lang_hint = 'css'
            elif path_for_hint.endswith('.json'): lang_hint = 'json'
            elif path_for_hint.endswith('.md'): lang_hint = 'markdown'
            elif path_for_hint.endswith(('.sh', '.bash')): lang_hint = 'bash'
            elif path_for_hint.endswith(('.yaml', '.yml')): lang_hint = 'yaml'
        elif key in ['code', 'script', 'patch', 'diff'] or \
             (isinstance(value, str) and '\n' in value and key in ['file_text', 'content']):
            if isinstance(value, str): # Basic content-based language hinting
                stripped_value = value.strip()
                if lang_hint == 'text': # Only if not already set by path
                    if ('def ' in value or 'class ' in value or 'import ' in value) and ':\n' in value : lang_hint = 'python'
                    elif stripped_value.startswith('{') and stripped_value.endswith('}'): lang_hint = 'json'
                    elif stripped_value.startswith('<') and stripped_value.endswith('>'): lang_hint = 'xml'
        
        if isinstance(value, str) and ('\n' in value or key in ['file_text', 'content', 'code', 'script', 'patch', 'diff']):
            formatted_lines.append(f"**{key_display}:**\n{_create_fenced_block(value, lang_hint)}")
        elif isinstance(value, (dict, list)):
            formatted_lines.append(f"**{key_display}:**\n{_create_fenced_block(json.dumps(value, indent=2), 'json')}")
        else:
            formatted_lines.append(f"**{key_display}:** `{str(value)}`")
            
    return formatted_lines

 def trajectory_to_markdown(trajectory_file, output_file=None):
    """Convert a trajectory json file to markdown format with proper escaping of special characters.
    
    Args:
        trajectory_file (str): Path to the trajectory json file
        output_file (str, optional): Path to save the markdown output. If None, 
                                    will use the trajectory filename with .md extension
    
    Returns:
        str: Path to the saved markdown file
    """
    # Load the trajectory data
    with open(trajectory_file, 'r') as f:
        data = json.load(f)
    
    # If no output file specified, create one based on input filename
    if output_file is None:
        output_file = Path(trajectory_file).with_suffix('.md')
    
    # Start building the markdown content
    md_content = [f"# Trajectory Analysis: {Path(trajectory_file).stem}\n\n"]
    
    for i, item in enumerate(data):
        # Extract the important fields
        tool = item.get('tool', 'N/A')
        action = item.get('action', 'N/A')
        response = item.get('response', 'N/A')
        thought = item.get('thought', None)
        
        # Format the step header
        md_content.append(f"## Step {i+1}: {tool.upper()}\n")

        # Format the thought if available
        if thought and tool != "finish":  # Skip thought for "finish" tool
            md_content.append(f"### Thought:\n{_create_fenced_block(thought, 'text')}")

        # Format the response - use proper code blocks to avoid markdown interpretation
        md_content.append(f"### Response:\n{_create_fenced_block(response, 'text')}")

        # If tools is "message" or "finish", we can skip the action
        if tool in ["message", "finish"]:
            md_content.append("---\n\n")
            continue
        
        # Format the action
        if isinstance(action, str) and action.strip().startswith(('{', '[')) and action.strip().endswith(('}', ']')):
            try:
                # ast.literal_eval is safer for Python dict/list-like strings
                parsed_action = ast.literal_eval(action)
                
                if isinstance(parsed_action, dict):
                    formatted_action_lines = _format_action_dict_human_readable(parsed_action)
                    md_content.extend(formatted_action_lines)
                else: 
                    # Parsed, but not a dict (e.g. a list of actions, or a simple string ast could parse)
                    md_content.append(f"### Action (parsed as non-dictionary):\n{_create_fenced_block(str(parsed_action), 'text')}")

            except (ValueError, SyntaxError, TypeError) as e:
                # If parsing fails, print raw action string with error
                md_content.append(f"### Action (raw - failed to parse string as dict/list):\n{_create_fenced_block(action, 'text')}")
                md_content.append(f"\n_Parsing error: {str(e)}_\n")
        elif action is not None: # Handle non-string actions or strings not resembling dict/list
            md_content.append(f"### Action:\n{_create_fenced_block(str(action), 'text')}")
        # If action is None, nothing is added for the action section.
    
        md_content.append("---\n\n")
    
    # Write the content to the output file
    with open(output_file, 'w', encoding='utf-8') as f: # Added encoding='utf-8' for broader compatibility
        f.write(''.join(md_content)) # Use ''.join for potentially better performance
    
    return output_file
	# Helper function to create markdown fenced code blocks robustly
	def _create_fenced_block(content, language_hint=""):
	if content is None:
	content = "" # Ensure content is not None
	content_str = str(content) # Ensure content is a string

	fence_char = '`'

	# Find the longest run of backticks in the content.
	# Use one more than that for the fence, with a minimum of 3.
	longest_backtick_run_in_content = 0
	if content_str: # Avoid error if content_str is empty
	# Matches any sequence of 1 or more backticks
	runs = re.findall(r'`+', content_str)
	if runs:
	longest_backtick_run_in_content = max(len(r) for r in runs)

	num_backticks = max(3, longest_backtick_run_in_content + 1)

	fence = fence_char * num_backticks
	# Ensure there's a newline after the content before the closing fence,
	# and handle cases where content might already end with a newline.
	if content_str.endswith('\n'):
	return f"{fence}{language_hint}\n{content_str}{fence}\n"
	else:
	return f"{fence}{language_hint}\n{content_str}\n{fence}\n"

	def _format_action_dict_human_readable(action_dict):
	formatted_lines = ["### Action:"]

	path_for_hint = action_dict.get('path') if isinstance(action_dict.get('path'), str) else None

	command = action_dict.get('command')
	if command:
	formatted_lines.append(f"Command: `{command}`\n")

	for key, value in action_dict.items():
	if key == 'command': # Already handled
	continue
	if key == 'thought': # Handled separately
	continue
	if value is None or (isinstance(value, str) and not value.strip()): # Skip None or empty/whitespace-only strings
	continue

	key_display = key.replace('_', ' ').capitalize()
	lang_hint = 'text'

	if key in ['file_text', 'content'] and path_for_hint:
	if path_for_hint.endswith('.py'): lang_hint = 'python'
	elif path_for_hint.endswith('.js'): lang_hint = 'javascript'
	elif path_for_hint.endswith('.html'): lang_hint = 'html'
	elif path_for_hint.endswith('.css'): lang_hint = 'css'
	elif path_for_hint.endswith('.json'): lang_hint = 'json'
	elif path_for_hint.endswith('.md'): lang_hint = 'markdown'
	elif path_for_hint.endswith(('.sh', '.bash')): lang_hint = 'bash'
	elif path_for_hint.endswith(('.yaml', '.yml')): lang_hint = 'yaml'
	elif key in ['code', 'script', 'patch', 'diff'] or \
	(isinstance(value, str) and '\n' in value and key in ['file_text', 'content']):
	if isinstance(value, str): # Basic content-based language hinting
	stripped_value = value.strip()
	if lang_hint == 'text': # Only if not already set by path
	if ('def ' in value or 'class ' in value or 'import ' in value) and ':\n' in value : lang_hint = 'python'
	elif stripped_value.startswith('{') and stripped_value.endswith('}'): lang_hint = 'json'
	elif stripped_value.startswith('<') and stripped_value.endswith('>'): lang_hint = 'xml'

	if isinstance(value, str) and ('\n' in value or key in ['file_text', 'content', 'code', 'script', 'patch', 'diff']):
	formatted_lines.append(f"{key_display}:\n{_create_fenced_block(value, lang_hint)}")
	elif isinstance(value, (dict, list)):
	formatted_lines.append(f"{key_display}:\n{_create_fenced_block(json.dumps(value, indent=2), 'json')}")
	else:
	formatted_lines.append(f"{key_display}: `{str(value)}`")

	return formatted_lines

	def trajectory_to_markdown(trajectory_file, output_file=None):
	"""Convert a trajectory json file to markdown format with proper escaping of special characters.

	Args:
	trajectory_file (str): Path to the trajectory json file
	output_file (str, optional): Path to save the markdown output. If None,
	will use the trajectory filename with .md extension

	Returns:
	str: Path to the saved markdown file
	"""
	# Load the trajectory data
	with open(trajectory_file, 'r') as f:
	data = json.load(f)

	# If no output file specified, create one based on input filename
	if output_file is None:
	output_file = Path(trajectory_file).with_suffix('.md')

	# Start building the markdown content
	md_content = [f"# Trajectory Analysis: {Path(trajectory_file).stem}\n\n"]

	for i, item in enumerate(data):
	# Extract the important fields
	tool = item.get('tool', 'N/A')
	action = item.get('action', 'N/A')
	response = item.get('response', 'N/A')
	thought = item.get('thought', None)

	# Format the step header
	md_content.append(f"## Step {i+1}: {tool.upper()}\n")

	# Format the thought if available
	if thought and tool != "finish": # Skip thought for "finish" tool
	md_content.append(f"### Thought:\n{_create_fenced_block(thought, 'text')}")

	# Format the response - use proper code blocks to avoid markdown interpretation
	md_content.append(f"### Response:\n{_create_fenced_block(response, 'text')}")

	# If tools is "message" or "finish", we can skip the action
	if tool in ["message", "finish"]:
	md_content.append("---\n\n")
	continue

	# Format the action
	if isinstance(action, str) and action.strip().startswith(('{', '[')) and action.strip().endswith(('}', ']')):
	try:
	# ast.literal_eval is safer for Python dict/list-like strings
	parsed_action = ast.literal_eval(action)

	if isinstance(parsed_action, dict):
	formatted_action_lines = _format_action_dict_human_readable(parsed_action)
	md_content.extend(formatted_action_lines)
	else:
	# Parsed, but not a dict (e.g. a list of actions, or a simple string ast could parse)
	md_content.append(f"### Action (parsed as non-dictionary):\n{_create_fenced_block(str(parsed_action), 'text')}")

	except (ValueError, SyntaxError, TypeError) as e:
	# If parsing fails, print raw action string with error
	md_content.append(f"### Action (raw - failed to parse string as dict/list):\n{_create_fenced_block(action, 'text')}")
	md_content.append(f"\n_Parsing error: {str(e)}_\n")
	elif action is not None: # Handle non-string actions or strings not resembling dict/list
	md_content.append(f"### Action:\n{_create_fenced_block(str(action), 'text')}")
	# If action is None, nothing is added for the action section.

	md_content.append("---\n\n")

	# Write the content to the output file
	with open(output_file, 'w', encoding='utf-8') as f: # Added encoding='utf-8' for broader compatibility
	f.write(''.join(md_content)) # Use ''.join for potentially better performance

	return output_file