14790897 · December 19, 2023 12:00
diff --git a/提取ChatGPT官方json数据中的完整对话.py b/提取ChatGPT官方json数据中的完整对话.py
 import json


 def find_bottom_most_node(conversation_data):
    """
    Finds the bottom-most node in the conversation, which is the node without any children.

    :param conversation_data: The conversation data in a nested dictionary format.
    :return: The ID of the bottom-most node, or None if not found.
    """
    if not isinstance(conversation_data, dict):
        raise TypeError("Conversation data must be a dictionary.")
    try:
        for node_id, node in reversed(conversation_data.items()):
            # Ensure that 'node' is a dictionary
            if not isinstance(node, dict):
                raise TypeError(f"Node {node_id} is not a dictionary.")
            if "children" not in node:
                raise KeyError(f"'children' key not found in node {node_id}.")
            if not node.get("children"):  # Check if the node has no children
                return node_id
    except Exception as e:
        print(f"Error finding bottom-most node: {e}")

    return None


 def extract_conversation_path(conversation_data, start_node_id):
    """
    Extracts the conversation path starting from the given node and going up to the root.

    :param conversation_data: The conversation data in a nested dictionary format.
    :param start_node_id: The ID of the starting node (bottom-most node in the conversation).
    :return: A list of nodes representing the conversation path from the start node to the root.
    """
    path = []
    current_node_id = start_node_id

    while current_node_id:
        # Get the current node
        node = conversation_data.get(current_node_id)
        if node is None:
            break  # Node not found

        # Add the current node to the path
        path.append(node)

        # Move to the parent node
        current_node_id = node.get("parent")

    return path[::-1]  # Reverse the path to start from the root


 def extract_full_conversation(conversation_data, start_node_id):
    """
    Extracts the full conversation path along with message contents, starting from the given node and going up to the root.

    :param conversation_data: The conversation data in a nested dictionary format.
    :param start_node_id: The ID of the starting node (bottom-most node in the conversation).
    :return: A list of tuples, each containing the node ID and its message content, representing the conversation path.
    """
    full_conversation = []
    current_node_id = start_node_id

    while current_node_id:
        # Get the current node
        node = conversation_data.get(current_node_id)
        if node is None:
            break  # Node not found

        # Extracting the message content if available
        message = node.get("message", {})
        content = message.get("content", {})
        parts = content.get("parts", [])
        message_text = " ".join(
            parts
        )  # Joining parts to form the complete message text
        print("message_text:", message_text)
        # Add the node ID and message text to the conversation
        full_conversation.append((current_node_id, message_text))

        # Move to the parent node
        current_node_id = node.get("parent")

    return full_conversation[::-1]  # Reverse to start from the root


 file_path = "./example.json"  # Replace with the actual file path
 try:
    # Load JSON data from the file
    with open(file_path, "r", encoding="utf-8") as file:
        conversation_data = json.load(file)
        # print(conversation_data)
 except Exception as e:
    print(f"Error reading or processing the file: {e}")

 conversation_data = conversation_data["mapping"]  # Convert to a dictionary~
 # 获取最底部节点的ID
 bottom_most_node_id = find_bottom_most_node(conversation_data)
 print("find bottom_most_node_id:", bottom_most_node_id)
 # 然后使用这个ID来提取对话路径
 if bottom_most_node_id:
    conversation_text = extract_full_conversation(
        conversation_data, bottom_most_node_id
    )
    # 指定要保存的文件名
    file_name = "conversation_output.txt"

    # 使用 with 语句打开文件，确保文件正确关闭
    with open(file_name, "w", encoding="utf-8") as file:
        # 遍历对话中的每个节点
        for node_id, message_content in conversation_text:
            # 将节点ID和消息内容写入文件
            file.write(f"Node ID: {node_id}\nMessage Content:\n{message_content}\n\n")
 else:
    print("No bottom-most node found.")
	import json


	def find_bottom_most_node(conversation_data):
	"""
	Finds the bottom-most node in the conversation, which is the node without any children.

	:param conversation_data: The conversation data in a nested dictionary format.
	:return: The ID of the bottom-most node, or None if not found.
	"""
	if not isinstance(conversation_data, dict):
	raise TypeError("Conversation data must be a dictionary.")
	try:
	for node_id, node in reversed(conversation_data.items()):
	# Ensure that 'node' is a dictionary
	if not isinstance(node, dict):
	raise TypeError(f"Node {node_id} is not a dictionary.")
	if "children" not in node:
	raise KeyError(f"'children' key not found in node {node_id}.")
	if not node.get("children"): # Check if the node has no children
	return node_id
	except Exception as e:
	print(f"Error finding bottom-most node: {e}")

	return None


	def extract_conversation_path(conversation_data, start_node_id):
	"""
	Extracts the conversation path starting from the given node and going up to the root.

	:param conversation_data: The conversation data in a nested dictionary format.
	:param start_node_id: The ID of the starting node (bottom-most node in the conversation).
	:return: A list of nodes representing the conversation path from the start node to the root.
	"""
	path = []
	current_node_id = start_node_id

	while current_node_id:
	# Get the current node
	node = conversation_data.get(current_node_id)
	if node is None:
	break # Node not found

	# Add the current node to the path
	path.append(node)

	# Move to the parent node
	current_node_id = node.get("parent")

	return path[::-1] # Reverse the path to start from the root


	def extract_full_conversation(conversation_data, start_node_id):
	"""
	Extracts the full conversation path along with message contents, starting from the given node and going up to the root.

	:param conversation_data: The conversation data in a nested dictionary format.
	:param start_node_id: The ID of the starting node (bottom-most node in the conversation).
	:return: A list of tuples, each containing the node ID and its message content, representing the conversation path.
	"""
	full_conversation = []
	current_node_id = start_node_id

	while current_node_id:
	# Get the current node
	node = conversation_data.get(current_node_id)
	if node is None:
	break # Node not found

	# Extracting the message content if available
	message = node.get("message", {})
	content = message.get("content", {})
	parts = content.get("parts", [])
	message_text = " ".join(
	parts
	) # Joining parts to form the complete message text
	print("message_text:", message_text)
	# Add the node ID and message text to the conversation
	full_conversation.append((current_node_id, message_text))

	# Move to the parent node
	current_node_id = node.get("parent")

	return full_conversation[::-1] # Reverse to start from the root


	file_path = "./example.json" # Replace with the actual file path
	try:
	# Load JSON data from the file
	with open(file_path, "r", encoding="utf-8") as file:
	conversation_data = json.load(file)
	# print(conversation_data)
	except Exception as e:
	print(f"Error reading or processing the file: {e}")

	conversation_data = conversation_data["mapping"] # Convert to a dictionary~
	# 获取最底部节点的ID
	bottom_most_node_id = find_bottom_most_node(conversation_data)
	print("find bottom_most_node_id:", bottom_most_node_id)
	# 然后使用这个ID来提取对话路径
	if bottom_most_node_id:
	conversation_text = extract_full_conversation(
	conversation_data, bottom_most_node_id
	)
	# 指定要保存的文件名
	file_name = "conversation_output.txt"

	# 使用 with 语句打开文件，确保文件正确关闭
	with open(file_name, "w", encoding="utf-8") as file:
	# 遍历对话中的每个节点
	for node_id, message_content in conversation_text:
	# 将节点ID和消息内容写入文件
	file.write(f"Node ID: {node_id}\nMessage Content:\n{message_content}\n\n")
	else:
	print("No bottom-most node found.")