ricardosantos79 · May 24, 2025 07:49
diff --git a/README.md b/README.md
diff --git a/ChatWithFile b/ChatWithFile
 #!/usr/bin/env bash

 # newline-delimited paths for selected files (only if local)
 #a = $1 #NAUTILUS_SCRIPT_SELECTED_FILE_PATHS
 # newline-delimited URIs for selected files
 #b = $2 #NAUTILUS_SCRIPT_SELECTED_URIS
 # current location
 #c = $3 #NAUTILUS_SCRIPT_CURRENT_URI
 # position and size of current window
 #d = $4 #NAUTILUS_SCRIPT_WINDOW_GEOMETRY

 # Print/append output to file
 #echo "file $1 | $2 | $3 | $4" | tee -a report.txt
 #echo "Hello" | tee -a report.txt

 # https://askubuntu.com/questions/46627/how-can-i-make-a-script-that-opens-terminal-windows-and-executes-commands-in-the
 # https://docs.oracle.com/cd/E88353_01/html/E37839/gnome-terminal-1.html
 # Launch new terminal window and exec command
 # need to create profile on terminal > settings and select option to keep window open.
 #   Here's a breakdown of that command above:
 #   
 #       gnome-terminal = open up a gnome-terminal
 #       --tab = open up a unique tab for what comes next
 #       --title="tab 1" = title this tab "tab 1"
 #       --command="bash -c 'cd /etc; ls; $SHELL'" = run the bash -c 'cd /etc; ls; $SHELL' command, which is a command I just made up as an example; here's what it does:
 #       bash -c says it is a bash 'c'ommand
 #       cd /etc = 'c'hange 'd'irectory into the "/etc" path
 #       ls = 'l'i's't contents of this directory
 #       $SHELL = this cryptic tidbit is required to keep the shell open so you can work with it. If you want the shell to open, run your command, then close, simply remove this part. I, however, want the tab to stay open so I can go make programming magic. :)
 #       we then start back over at the --tab part to produce tab 2, then again for tab 3. Customize to your heart's content.
 #gnome-terminal --window-with-profile=autolauncher --title="TTT" --command="bash -c 'echo $1 | tee -a report.txt; echo "done!";'"

 #gnome-terminal --window-with-profile=autolauncher --title="TTT" --command="bash -c 'uv run ChatWithFile.py $a $b $c $d'";

 function runCommands {
    echo '\nRunning...';
    uv run ChatWithFile.py '{NAUTILUS_SCRIPT_SELECTED_FILE_PATHS}';
 }

 export -f runCommands

 gnome-terminal --window-with-profile=autolauncher --title="TTT" --command="bash -c runCommands"

diff --git a/ChatWithFile.py b/ChatWithFile.py
 #!/usr/bin/env python3
 # /// script
 # requires-python = ">=3.13"
 # dependencies = [
 #     "llama-cpp-python",
 # ]
 # ///

 """
 Initiate a LLM terminal chat with the selected file in context.
 """





 import os, time, sys

 from llama_cpp import Llama





 FILES : list[str] = []

 PATH_TO_MODELS : str = os.path.abspath("/path/to/folder/with/models/") #<<-- update this to the path were the gguf models are!
 "Path to Folder of available models."

 MODEL_FILE : str = "Meta-Llama-3-8B-Instruct.Q4_0.gguf"
 "LLM Model file name."

 MAX_TOKENS : int = 1024
 "Number of max tokens to generate per message."

 MAX_CONTEXT : int = 8192
 "Context window size, adjust based on your RAM and needs"





 def main (
 		model_path			: str = "",
 		files				: list[str] = []
 		) :
 	"""
 	### Run a chat with a character or scenario.

 	---
 	#### Args:
 	:param model_path: A string with the path to the model.

 	"""
 	try :
 		llm = Llama   (
 			model_path		= model_path											,
 			n_gpu_layers	= 1														,
 			n_batch			= 1024													, # 256
 			n_ctx			= 8192													, # 2048
 			f16_kv			= True													,
 			#callbacks		= CallbackManager([StreamingStdOutCallbackHandler()])	,
 			max_tokens		= MAX_TOKENS											, # 256 : < words, 512: < ~320 words
 			verbose			= False													,
 			top_p			= 0.7													,
 			top_k           = 40													, # top percent at which to reject candidates.
 			temperature     = 0.35													) # Accuracy of the output. (closer to 0) will make the output more deterministic and consistent.
 		print('\n\t ---> Model Loaded Succesful!')
 	except Exception as e:
 		print(f'\n\t ---> Error Loading Model! {e}')
 		sys.exit(1)

 	# load file contents and proccess into context:
 	files_str : list[dict[str, str]] = []

 	for file in os.getenv('NAUTILUS_SCRIPT_SELECTED_FILE_PATHS','').splitlines():
 		try :
 			if os.path.exists(file) :
 				with open(file) as f :
 					content : str = f.read()
 					files_str.append({"file" : os.path.basename(file), "content" : content})
 					#print(f"{file}, {content}")
 		except OSError:
 			continue
 			
 	print(f"\n\tFiles loaded:\n{files}\n\tContent:\n{files_str}")

 	PROMPT = f'''
 		You are a friendly and professional assistant.
 		You can respond in Portuguese.
 		Your responses are straight to the point, short, helpful and mostly one sentence.
 		The maximum length of your response is 237 characters.
 		Input from the user contains contents from the following files:

 		{files_str}

 		These are for context and should not be repeated in the response!
 	'''
 	# --- Initialize Chat History ---
 	# We'll store the conversation history here
 	messages = [
 		# You can optionally add a system prompt here if the model supports it well
 		# {"role": "system", "content": "You are a helpful assistant."}
 		{"role" : "system", "content" : PROMPT},
 		# or add user -> assistant interaction examples to train response.
 		#{"role" : "user", "content" : "..."},
 		#{"role" : "assistant", "content" : "..."},
 	]
 	llm.create_chat_completion(messages)

 	# Chat loop:
 	try :
 		while True : 
 			
 			# 1. User input:
 			new_query : str = input("Instruction: ")
 			if new_query.lower() == "!exitchat" :
 				print("\n\t ---> Exiting Chat!!")
 				break

 			# 2. Append user message to history
 			# For Gemma 3, the content can be a simple string or the list format.
 			# Using a simple string is easier here and usually works.
 			messages.append({"role": "user", "content": new_query})
 	
 			# 3. Generate response
 			print("Assistant: ", end="", flush=True) # Print prefix, stay on the same line

 			try :
 				response = llm.create_chat_completion(
 					messages=messages,
 					max_tokens=MAX_TOKENS,
 					# Optional: Add streaming for a type-writer effect
 					stream=True,
 					# Add other generation parameters if desired (temperature, top_p, etc.)
 					# temperature=0.7,
 				)

 				assistant_response_content = ""
 				# Handle streaming output
 				for chunk in response:
 					delta = chunk['choices'][0]['delta']
 					if 'content' in delta:
 						content_piece = delta['content']
 						print(content_piece, end="", flush=True)
 						assistant_response_content += content_piece
 				print() # Add a newline after the streamed response

 				# 4. Append assistant response to history
 				if assistant_response_content:
 					messages.append({"role": "assistant", "content": assistant_response_content})

 				# Optional: Context window management (simple example: trim oldest messages if too long)
 				# This is basic, more sophisticated methods exist
 				# Estimate token count roughly (very approximate)
 				# total_tokens_estimate = sum(len(m['content']) // 3 for m in messages)
 				# if total_tokens_estimate > N_CTX * 0.8: # Keep some buffer
 				#     print("\n[Trimming conversation history to fit context window...]")
 				#     # Remove the oldest user/assistant pair after the system prompt (if any)
 				#     start_index = 1 if messages[0]['role'] == 'system' else 0
 				#     if len(messages) > start_index + 2:
 				#        del messages[start_index:start_index+2]

 			except Exception as e :
 				print(f"\nError during generation: {e}")
 				# Optional: remove the last user message if generation failed
 				if messages and messages[-1]["role"] == "user":
 					messages.pop()

 	except KeyboardInterrupt :
 		print("\nCaught Ctrl+C. Exiting gracefully...")


 	finally:
 		# Clean up - although Python's garbage collection usually handles this
 		# when the script exits, explicitly deleting can sometimes free Metal resources sooner.
 		del llm
 		print("Model unloaded (or will be by Python's GC). Goodbye!")

 if __name__ == "__main__" :
 	main(f"{PATH_TO_MODELS}/{MODEL_FILE}", FILES)
	#!/usr/bin/env bash

	# newline-delimited paths for selected files (only if local)
	#a = $1 #NAUTILUS_SCRIPT_SELECTED_FILE_PATHS
	# newline-delimited URIs for selected files
	#b = $2 #NAUTILUS_SCRIPT_SELECTED_URIS
	# current location
	#c = $3 #NAUTILUS_SCRIPT_CURRENT_URI
	# position and size of current window
	#d = $4 #NAUTILUS_SCRIPT_WINDOW_GEOMETRY

	# Print/append output to file
	#echo "file $1 \| $2 \| $3 \| $4" \| tee -a report.txt
	#echo "Hello" \| tee -a report.txt

	# https://askubuntu.com/questions/46627/how-can-i-make-a-script-that-opens-terminal-windows-and-executes-commands-in-the
	# https://docs.oracle.com/cd/E88353_01/html/E37839/gnome-terminal-1.html
	# Launch new terminal window and exec command
	# need to create profile on terminal > settings and select option to keep window open.
	# Here's a breakdown of that command above:
	#
	# gnome-terminal = open up a gnome-terminal
	# --tab = open up a unique tab for what comes next
	# --title="tab 1" = title this tab "tab 1"
	# --command="bash -c 'cd /etc; ls; $SHELL'" = run the bash -c 'cd /etc; ls; $SHELL' command, which is a command I just made up as an example; here's what it does:
	# bash -c says it is a bash 'c'ommand
	# cd /etc = 'c'hange 'd'irectory into the "/etc" path
	# ls = 'l'i's't contents of this directory
	# $SHELL = this cryptic tidbit is required to keep the shell open so you can work with it. If you want the shell to open, run your command, then close, simply remove this part. I, however, want the tab to stay open so I can go make programming magic. :)
	# we then start back over at the --tab part to produce tab 2, then again for tab 3. Customize to your heart's content.
	#gnome-terminal --window-with-profile=autolauncher --title="TTT" --command="bash -c 'echo $1 \| tee -a report.txt; echo "done!";'"

	#gnome-terminal --window-with-profile=autolauncher --title="TTT" --command="bash -c 'uv run ChatWithFile.py $a $b $c $d'";

	function runCommands {
	echo '\nRunning...';
	uv run ChatWithFile.py '{NAUTILUS_SCRIPT_SELECTED_FILE_PATHS}';
	}

	export -f runCommands

	gnome-terminal --window-with-profile=autolauncher --title="TTT" --command="bash -c runCommands"
	#!/usr/bin/env python3
	# /// script
	# requires-python = ">=3.13"
	# dependencies = [
	# "llama-cpp-python",
	# ]
	# ///

	"""
	Initiate a LLM terminal chat with the selected file in context.
	"""





	import os, time, sys

	from llama_cpp import Llama





	FILES : list[str] = []

	PATH_TO_MODELS : str = os.path.abspath("/path/to/folder/with/models/") #<<-- update this to the path were the gguf models are!
	"Path to Folder of available models."

	MODEL_FILE : str = "Meta-Llama-3-8B-Instruct.Q4_0.gguf"
	"LLM Model file name."

	MAX_TOKENS : int = 1024
	"Number of max tokens to generate per message."

	MAX_CONTEXT : int = 8192
	"Context window size, adjust based on your RAM and needs"





	def main (
	model_path : str = "",
	files : list[str] = []
	) :
	"""
	### Run a chat with a character or scenario.

	---
	#### Args:
	:param model_path: A string with the path to the model.

	"""
	try :
	llm = Llama (
	model_path = model_path ,
	n_gpu_layers = 1 ,
	n_batch = 1024 , # 256
	n_ctx = 8192 , # 2048
	f16_kv = True ,
	#callbacks = CallbackManager([StreamingStdOutCallbackHandler()]) ,
	max_tokens = MAX_TOKENS , # 256 : < words, 512: < ~320 words
	verbose = False ,
	top_p = 0.7 ,
	top_k = 40 , # top percent at which to reject candidates.
	temperature = 0.35 ) # Accuracy of the output. (closer to 0) will make the output more deterministic and consistent.
	print('\n\t ---> Model Loaded Succesful!')
	except Exception as e:
	print(f'\n\t ---> Error Loading Model! {e}')
	sys.exit(1)

	# load file contents and proccess into context:
	files_str : list[dict[str, str]] = []

	for file in os.getenv('NAUTILUS_SCRIPT_SELECTED_FILE_PATHS','').splitlines():
	try :
	if os.path.exists(file) :
	with open(file) as f :
	content : str = f.read()
	files_str.append({"file" : os.path.basename(file), "content" : content})
	#print(f"{file}, {content}")
	except OSError:
	continue

	print(f"\n\tFiles loaded:\n{files}\n\tContent:\n{files_str}")

	PROMPT = f'''
	You are a friendly and professional assistant.
	You can respond in Portuguese.
	Your responses are straight to the point, short, helpful and mostly one sentence.
	The maximum length of your response is 237 characters.
	Input from the user contains contents from the following files:

	{files_str}

	These are for context and should not be repeated in the response!
	'''
	# --- Initialize Chat History ---
	# We'll store the conversation history here
	messages = [
	# You can optionally add a system prompt here if the model supports it well
	# {"role": "system", "content": "You are a helpful assistant."}
	{"role" : "system", "content" : PROMPT},
	# or add user -> assistant interaction examples to train response.
	#{"role" : "user", "content" : "..."},
	#{"role" : "assistant", "content" : "..."},
	]
	llm.create_chat_completion(messages)

	# Chat loop:
	try :
	while True :

	# 1. User input:
	new_query : str = input("Instruction: ")
	if new_query.lower() == "!exitchat" :
	print("\n\t ---> Exiting Chat!!")
	break

	# 2. Append user message to history
	# For Gemma 3, the content can be a simple string or the list format.
	# Using a simple string is easier here and usually works.
	messages.append({"role": "user", "content": new_query})

	# 3. Generate response
	print("Assistant: ", end="", flush=True) # Print prefix, stay on the same line

	try :
	response = llm.create_chat_completion(
	messages=messages,
	max_tokens=MAX_TOKENS,
	# Optional: Add streaming for a type-writer effect
	stream=True,
	# Add other generation parameters if desired (temperature, top_p, etc.)
	# temperature=0.7,
	)

	assistant_response_content = ""
	# Handle streaming output
	for chunk in response:
	delta = chunk['choices'][0]['delta']
	if 'content' in delta:
	content_piece = delta['content']
	print(content_piece, end="", flush=True)
	assistant_response_content += content_piece
	print() # Add a newline after the streamed response

	# 4. Append assistant response to history
	if assistant_response_content:
	messages.append({"role": "assistant", "content": assistant_response_content})

	# Optional: Context window management (simple example: trim oldest messages if too long)
	# This is basic, more sophisticated methods exist
	# Estimate token count roughly (very approximate)
	# total_tokens_estimate = sum(len(m['content']) // 3 for m in messages)
	# if total_tokens_estimate > N_CTX * 0.8: # Keep some buffer
	# print("\n[Trimming conversation history to fit context window...]")
	# # Remove the oldest user/assistant pair after the system prompt (if any)
	# start_index = 1 if messages[0]['role'] == 'system' else 0
	# if len(messages) > start_index + 2:
	# del messages[start_index:start_index+2]

	except Exception as e :
	print(f"\nError during generation: {e}")
	# Optional: remove the last user message if generation failed
	if messages and messages[-1]["role"] == "user":
	messages.pop()

	except KeyboardInterrupt :
	print("\nCaught Ctrl+C. Exiting gracefully...")


	finally:
	# Clean up - although Python's garbage collection usually handles this
	# when the script exits, explicitly deleting can sometimes free Metal resources sooner.
	del llm
	print("Model unloaded (or will be by Python's GC). Goodbye!")

	if __name__ == "__main__" :
	main(f"{PATH_TO_MODELS}/{MODEL_FILE}", FILES)