To compile and execute the program from a different source file, such as infer.rs
, and name the resulting binary infer
, follow these modified instructions:
-
Update Project Structure: Create a new source file named
infer.rs
in thesrc
directory.touch src/infer.rs
-
Copy Code to
infer.rs
: Move the updated inference-only code tosrc/infer.rs
:// src/infer.rs use femto_gpt::gpt::{TrainingState, GPT}; use femto_gpt::graph::GraphError; use femto_gpt::tokenizer::{SimpleTokenizer, Tokenizer}; use std::fs; use std::io::prelude::*; use std::path::Path; use std::env; fn main() -> Result<(), GraphError> { #[cfg(not(feature = "gpu"))] let graph = femto_gpt::graph::CpuGraph::new(); #[cfg(not(feature = "gpu"))] let is_gpu = false; #[cfg(feature = "gpu")] let graph = femto_gpt::graph::gpu::GpuGraph::new()?; #[cfg(feature = "gpu")] let is_gpu = true; let training_state_path = Path::new("training_state.dat"); let mut rng = rand::thread_rng(); // Create a unique char-to-int mapping for all unique characters inside our dataset let dataset_char = fs::read_to_string("dataset.txt").expect("Should have been able to read the file"); let tokenizer = SimpleTokenizer::new(&dataset_char); let dataset = tokenizer.tokenize(&dataset_char); let batch_size = 1; let num_tokens = 64; let vocab_size = tokenizer.vocab_size(); let embedding_degree = 64; let num_layers = 4; let num_heads = 4; let head_size = embedding_degree / num_heads; let dropout = 0.0; assert_eq!(num_heads * head_size, embedding_degree); println!("Vocab-size: {} unique characters", vocab_size); let mut gpt = GPT::new( &mut rng, graph, is_gpu.then(|| batch_size), // Pre-allocate batches only when using GPUs vocab_size, embedding_degree, num_tokens, num_layers, num_heads, head_size, dropout, )?; gpt.sync()?; println!("Number of parameters: {}", gpt.num_params()); // Load pre-trained model if training_state_path.is_file() { let mut ts_file = fs::File::open(training_state_path).unwrap(); let mut bytes = Vec::new(); ts_file.read_to_end(&mut bytes).unwrap(); let ts: TrainingState = bincode::deserialize(&bytes).unwrap(); gpt.set_training_state(ts, true)?; } else { eprintln!("Error: Pre-trained model not found!"); return Ok(()); } // Get input text from command line arguments let args: Vec<String> = env::args().collect(); if args.len() < 2 { eprintln!("Usage: {} <input_text>", args[0]); return Ok(()); } let input_text = &args[1]; let inference_temperature = 0.5; // How creative? 0.0 min 1.0 max println!("Generating text based on input: '{}'", input_text); // Perform inference let inference = gpt.infer( &mut rng, &tokenizer.tokenize(input_text), 100, inference_temperature, |_ch| {}, )?; // Print the generated text println!("{}", tokenizer.untokenize(&inference)); Ok(()) }
-
Update
Cargo.toml
: Add a custom binary section inCargo.toml
to specify the source file and the binary name.[[bin]] name = "infer" path = "src/infer.rs"
-
Compile the Program:
For CPU:
cargo build --release
For GPU:
cargo build --release --features gpu
-
Run the Program:
After compiling, you can execute the inference by providing a text input as a command line argument:
./target/release/infer "The quick brown fox"
This setup will ensure that the binary named infer
is created, and will run the inference-only code from src/infer.rs
.
The untrained model will output like this: