Skip to content

Instantly share code, notes, and snippets.

@Steboss89
Created June 10, 2021 13:41
Show Gist options
  • Save Steboss89/e2ea573fc87ac1642e589b8a705a6895 to your computer and use it in GitHub Desktop.
Save Steboss89/e2ea573fc87ac1642e589b8a705a6895 to your computer and use it in GitHub Desktop.
Preprocess in linear regression
// Read input data
let ifile = "FULLPATHTODATASET";
let mut input_data = read_housing_csv(&ifile);
// data preprocessing: train test split
// define the length of the test data
let test_chunk_size: f64 = input_data.len() as f64 * 0.3; // we are taking the 30% as test data
// as cast between types
let test_chunk_size = test_chunk_size.round() as usize;
// split
let (test, train) = input_data.split_at(test_chunk_size);
// impressively, rust vectors have split_at attribute
//https://doc.rust-lang.org/std/primitive.slice.html#method.split_at
let train_size = train.len() ;
let test_size = test.len();
// define the training and target variables
// to return train and target variables use a simple flat map
let x_train: Vec<f64> = train.iter().flat_map(|row| row.train_features()).collect();
let y_train: Vec<f64> = train.iter().map(|row| row.train_target()).collect();
// same for test
let x_test: Vec<f64> = test.iter().flat_map(|row| row.train_features()).collect();
let y_test: Vec<f64> = test.iter().map(|row| row.train_target()).collect();
// now as an input linregressionwants a matrix and a vector for y so
let x_train_matrix = Matrix::new(train_size, 13, x_train); // 13 is the number of features
let y_train_vector = Vector::new(y_train);
let x_test_matrix = Matrix::new(test_size, 13, x_test);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment