up1 · February 23, 2026 03:55
diff --git a/read.py b/read.py
 query = "What are the conclusions in this document?"
 doc_id = "your doc id"  # This is the doc_id for the same document in step-01.py. You can also use your own doc_id here.
 tree = pi_client.get_tree(doc_id, node_summary=True)['result']
 print('Simplified Tree Structure of the Document:')
 utils.print_tree(tree)

 # ตัวอย่างข้อมูล index tree
 Simplified Tree Structure of the Document:
 [{'title': 'Abstract', 'node_id': '0000', 'summary': 'This text discusses the challenge of gen...'},
 {'title': '1 Introduction',
  'node_id': '0001',
  'summary': 'This text discusses the development of r...'},
 {'title': '2 DeepSeek-R1-Zero',
  'node_id': '0002',
  'summary': 'The text details the training of DeepSee...'},
 {'title': '3. DeepSeek-R1',
  'node_id': '0003',
  'summary': 'This text introduces DeepSeek-R1, an enh...'},
 {'title': '4 Experiment',
  'node_id': '0004',
  'summary': 'The text details the experimental evalua...'},
 {'title': '5 Ethics and Safety Statement',
  'node_id': '0005',
  'summary': 'The text addresses the ethical risks of ...'},
 {'title': '6 Conclusion, Limitation, and Future Wor...',
  'node_id': '0006',
  'summary': 'The text introduces DeepSeek-R1-Zero and...'},
 ...
 {'title': 'B. Training Details',
  'node_id': '0009',
  'prefix_summary': '# B. Training Details\n',
  'nodes': [{'title': 'B.1. RL Infrastructure',
             'node_id': '0010',
             'summary': 'The text describes the RL infrastructure...'},
            {'title': 'B.2 Reward Model Prompt',
             'node_id': '0011',
             'summary': 'The text provides instructions for an AI...'},
            {'title': 'B.3. Data Recipe',
             'node_id': '0012',
             'prefix_summary': '## B.3. Data Recipe\n',
             'nodes': [{'title': 'B.3.1. RL Data',
                        'node_id': '0013',
                        'summary': 'The text details the Reinforcement Learn...'},
                       {'title': 'B.3.2 DeepSeek-R1 Cold Start',
                        'node_id': '0014',
                        'summary': 'The text details the cold start process ...'},
                       {'title': 'B.3.3 800K Supervised Data',
                        'node_id': '0015',
                        'summary': '### B.3.3 800K Supervised Data\n\n#### Rea...'}]},
            
diff --git a/result.txt b/result.txt
 Reasoning Process:
 The question asks for the conclusions in the document. Typically, conclusions are found in sections
 explicitly labeled as 'Conclusion' or in sections summarizing findings and future directions. In
 this document tree, node '0006' is titled '6 Conclusion, Limitation, and Future Work', which is the
 primary section for conclusions. Additionally, the 'Abstract' (node '0000') often contains a summary
 of conclusions, and the 'Discussion' (node '0030') may also synthesize key findings and
 implications. However, the most direct and comprehensive conclusions are likely in node '0006'.

 Retrieved Nodes:
 Node ID: 0006	 Page: 10	 Title: 6 Conclusion, Limitation, and Future Work
 Node ID: 0000	 Page: 1	 Title: Abstract
 Node ID: 0030	 Page: 62	 Title: G. Discussion
diff --git a/search.py b/search.py
 # Search for relevant nodes in the document tree using LLM based on the question and the simplified tree structure without text. The LLM will return the reasoning process and a list of relevant node ids.
 search_prompt = f"""
 You are given a question and a tree structure of a document.
 Each node contains a node id, node title, and a corresponding summary.
 Your task is to find all nodes that are likely to contain the answer to the question.

 Question: {query}

 Document tree structure:
 {json.dumps(tree_without_text, indent=2)}

 Please reply in the following JSON format:
 {{
    "thinking": "<Your thinking process on which nodes are relevant to the question>",
    "node_list": ["node_id_1", "node_id_2", ..., "node_id_n"]
 }}
 Directly return the final JSON structure. Do not output anything else.
 """

 async def main():
    tree_search_result = await call_llm(search_prompt)
    
    # Show the reasoning process and retrieved nodes based on the LLM's response
    node_map = utils.create_node_mapping(tree)
    tree_search_result_json = json.loads(tree_search_result)
    
    print('Reasoning Process:')
    utils.print_wrapped(tree_search_result_json['thinking'])
    
    print('\nRetrieved Nodes:')
    for node_id in tree_search_result_json["node_list"]:
        node = node_map[node_id]
        print(f"Node ID: {node['node_id']}\t Page: {node['page_index']}\t Title: {node['title']}")

 asyncio.run(main())
diff --git a/step1.py b/step1.py
 from pageindex import PageIndexClient
 import pageindex.utils as utils

 # Get your PageIndex API key from https://dash.pageindex.ai/api-keys
 PAGEINDEX_API_KEY = "api-key"
 pi_client = PageIndexClient(api_key=PAGEINDEX_API_KEY)

 # PDF file
 pdf_path = "path to pdf file"

 # Submit file 
 doc_id = pi_client.submit_document(pdf_path)["doc_id"]
 print('Document Submitted:', doc_id)

 while True:
    status = pi_client.is_retrieval_ready(doc_id)
    print('Current Document Status:', status)
    if status :
        break
    time.sleep(5)

 # Retrieve and print the simplified tree structure of the document
 tree = pi_client.get_tree(doc_id, node_summary=True)['result']
 print('Simplified Tree Structure of the Document:')
 utils.print_tree(tree)
	query = "What are the conclusions in this document?"
	doc_id = "your doc id" # This is the doc_id for the same document in step-01.py. You can also use your own doc_id here.
	tree = pi_client.get_tree(doc_id, node_summary=True)['result']
	print('Simplified Tree Structure of the Document:')
	utils.print_tree(tree)

	# ตัวอย่างข้อมูล index tree
	Simplified Tree Structure of the Document:
	[{'title': 'Abstract', 'node_id': '0000', 'summary': 'This text discusses the challenge of gen...'},
	{'title': '1 Introduction',
	'node_id': '0001',
	'summary': 'This text discusses the development of r...'},
	{'title': '2 DeepSeek-R1-Zero',
	'node_id': '0002',
	'summary': 'The text details the training of DeepSee...'},
	{'title': '3. DeepSeek-R1',
	'node_id': '0003',
	'summary': 'This text introduces DeepSeek-R1, an enh...'},
	{'title': '4 Experiment',
	'node_id': '0004',
	'summary': 'The text details the experimental evalua...'},
	{'title': '5 Ethics and Safety Statement',
	'node_id': '0005',
	'summary': 'The text addresses the ethical risks of ...'},
	{'title': '6 Conclusion, Limitation, and Future Wor...',
	'node_id': '0006',
	'summary': 'The text introduces DeepSeek-R1-Zero and...'},
	...
	{'title': 'B. Training Details',
	'node_id': '0009',
	'prefix_summary': '# B. Training Details\n',
	'nodes': [{'title': 'B.1. RL Infrastructure',
	'node_id': '0010',
	'summary': 'The text describes the RL infrastructure...'},
	{'title': 'B.2 Reward Model Prompt',
	'node_id': '0011',
	'summary': 'The text provides instructions for an AI...'},
	{'title': 'B.3. Data Recipe',
	'node_id': '0012',
	'prefix_summary': '## B.3. Data Recipe\n',
	'nodes': [{'title': 'B.3.1. RL Data',
	'node_id': '0013',
	'summary': 'The text details the Reinforcement Learn...'},
	{'title': 'B.3.2 DeepSeek-R1 Cold Start',
	'node_id': '0014',
	'summary': 'The text details the cold start process ...'},
	{'title': 'B.3.3 800K Supervised Data',
	'node_id': '0015',
	'summary': '### B.3.3 800K Supervised Data\n\n#### Rea...'}]},
	Reasoning Process:
	The question asks for the conclusions in the document. Typically, conclusions are found in sections
	explicitly labeled as 'Conclusion' or in sections summarizing findings and future directions. In
	this document tree, node '0006' is titled '6 Conclusion, Limitation, and Future Work', which is the
	primary section for conclusions. Additionally, the 'Abstract' (node '0000') often contains a summary
	of conclusions, and the 'Discussion' (node '0030') may also synthesize key findings and
	implications. However, the most direct and comprehensive conclusions are likely in node '0006'.

	Retrieved Nodes:
	Node ID: 0006 Page: 10 Title: 6 Conclusion, Limitation, and Future Work
	Node ID: 0000 Page: 1 Title: Abstract
	Node ID: 0030 Page: 62 Title: G. Discussion
	# Search for relevant nodes in the document tree using LLM based on the question and the simplified tree structure without text. The LLM will return the reasoning process and a list of relevant node ids.
	search_prompt = f"""
	You are given a question and a tree structure of a document.
	Each node contains a node id, node title, and a corresponding summary.
	Your task is to find all nodes that are likely to contain the answer to the question.

	Question: {query}

	Document tree structure:
	{json.dumps(tree_without_text, indent=2)}

	Please reply in the following JSON format:
	{{
	"thinking": "<Your thinking process on which nodes are relevant to the question>",
	"node_list": ["node_id_1", "node_id_2", ..., "node_id_n"]
	}}
	Directly return the final JSON structure. Do not output anything else.
	"""

	async def main():
	tree_search_result = await call_llm(search_prompt)

	# Show the reasoning process and retrieved nodes based on the LLM's response
	node_map = utils.create_node_mapping(tree)
	tree_search_result_json = json.loads(tree_search_result)

	print('Reasoning Process:')
	utils.print_wrapped(tree_search_result_json['thinking'])

	print('\nRetrieved Nodes:')
	for node_id in tree_search_result_json["node_list"]:
	node = node_map[node_id]
	print(f"Node ID: {node['node_id']}\t Page: {node['page_index']}\t Title: {node['title']}")

	asyncio.run(main())
	from pageindex import PageIndexClient
	import pageindex.utils as utils

	# Get your PageIndex API key from https://dash.pageindex.ai/api-keys
	PAGEINDEX_API_KEY = "api-key"
	pi_client = PageIndexClient(api_key=PAGEINDEX_API_KEY)

	# PDF file
	pdf_path = "path to pdf file"

	# Submit file
	doc_id = pi_client.submit_document(pdf_path)["doc_id"]
	print('Document Submitted:', doc_id)

	while True:
	status = pi_client.is_retrieval_ready(doc_id)
	print('Current Document Status:', status)
	if status :
	break
	time.sleep(5)

	# Retrieve and print the simplified tree structure of the document
	tree = pi_client.get_tree(doc_id, node_summary=True)['result']
	print('Simplified Tree Structure of the Document:')
	utils.print_tree(tree)