FemtoGPT Inference Mode

To compile and execute the program from a different source file, such as infer.rs, and name the resulting binary infer, follow these modified instructions:

Step-by-step Instructions

Update Project Structure: Create a new source file named infer.rs in the src directory.
```
touch src/infer.rs
```

Copy Code to infer.rs: Move the updated inference-only code to src/infer.rs:

// src/infer.rs
use femto_gpt::gpt::{TrainingState, GPT};
use femto_gpt::graph::GraphError;
use femto_gpt::tokenizer::{SimpleTokenizer, Tokenizer};
use std::fs;
use std::io::prelude::*;
use std::path::Path;
use std::env;

fn main() -> Result<(), GraphError> {
    #[cfg(not(feature = "gpu"))]
    let graph = femto_gpt::graph::CpuGraph::new();
    #[cfg(not(feature = "gpu"))]
    let is_gpu = false;

    #[cfg(feature = "gpu")]
    let graph = femto_gpt::graph::gpu::GpuGraph::new()?;
    #[cfg(feature = "gpu")]
    let is_gpu = true;

    let training_state_path = Path::new("training_state.dat");

    let mut rng = rand::thread_rng();

    // Create a unique char-to-int mapping for all unique characters inside our dataset
    let dataset_char =
        fs::read_to_string("dataset.txt").expect("Should have been able to read the file");
    let tokenizer = SimpleTokenizer::new(&dataset_char);

    let dataset = tokenizer.tokenize(&dataset_char);

    let batch_size = 1;
    let num_tokens = 64;
    let vocab_size = tokenizer.vocab_size();
    let embedding_degree = 64;
    let num_layers = 4;
    let num_heads = 4;
    let head_size = embedding_degree / num_heads;
    let dropout = 0.0;

    assert_eq!(num_heads * head_size, embedding_degree);

    println!("Vocab-size: {} unique characters", vocab_size);

    let mut gpt = GPT::new(
        &mut rng,
        graph,
        is_gpu.then(|| batch_size), // Pre-allocate batches only when using GPUs
        vocab_size,
        embedding_degree,
        num_tokens,
        num_layers,
        num_heads,
        head_size,
        dropout,
    )?;

    gpt.sync()?;

    println!("Number of parameters: {}", gpt.num_params());

    // Load pre-trained model
    if training_state_path.is_file() {
        let mut ts_file = fs::File::open(training_state_path).unwrap();
        let mut bytes = Vec::new();
        ts_file.read_to_end(&mut bytes).unwrap();
        let ts: TrainingState = bincode::deserialize(&bytes).unwrap();
        gpt.set_training_state(ts, true)?;
    } else {
        eprintln!("Error: Pre-trained model not found!");
        return Ok(());
    }

    // Get input text from command line arguments
    let args: Vec<String> = env::args().collect();
    if args.len() < 2 {
        eprintln!("Usage: {} <input_text>", args[0]);
        return Ok(());
    }
    let input_text = &args[1];

    let inference_temperature = 0.5; // How creative? 0.0 min 1.0 max

    println!("Generating text based on input: '{}'", input_text);

    // Perform inference
    let inference = gpt.infer(
        &mut rng,
        &tokenizer.tokenize(input_text),
        100,
        inference_temperature,
        |_ch| {},
    )?;

    // Print the generated text
    println!("{}", tokenizer.untokenize(&inference));

    Ok(())
}

Update Cargo.toml: Add a custom binary section in Cargo.toml to specify the source file and the binary name.
```
[[bin]]
name = "infer"
path = "src/infer.rs"
```

Compile the Program:

For CPU:

cargo build --release

For GPU:

cargo build --release --features gpu

Run the Program:

After compiling, you can execute the inference by providing a text input as a command line argument:
```
./target/release/infer "The quick brown fox"
```

This setup will ensure that the binary named infer is created, and will run the inference-only code from src/infer.rs.

stephenlb/README.md

FemtoGPT Inference Mode

Step-by-step Instructions

stephenlb commented Sep 3, 2024 •

edited

Loading

Uh oh!

stephenlb/README.md

FemtoGPT Inference Mode

Step-by-step Instructions

stephenlb commented Sep 3, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

stephenlb commented Sep 3, 2024 •

edited

Loading