Created
June 10, 2021 13:41
-
-
Save Steboss89/e2ea573fc87ac1642e589b8a705a6895 to your computer and use it in GitHub Desktop.
Preprocess in linear regression
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Read input data | |
let ifile = "FULLPATHTODATASET"; | |
let mut input_data = read_housing_csv(&ifile); | |
// data preprocessing: train test split | |
// define the length of the test data | |
let test_chunk_size: f64 = input_data.len() as f64 * 0.3; // we are taking the 30% as test data | |
// as cast between types | |
let test_chunk_size = test_chunk_size.round() as usize; | |
// split | |
let (test, train) = input_data.split_at(test_chunk_size); | |
// impressively, rust vectors have split_at attribute | |
//https://doc.rust-lang.org/std/primitive.slice.html#method.split_at | |
let train_size = train.len() ; | |
let test_size = test.len(); | |
// define the training and target variables | |
// to return train and target variables use a simple flat map | |
let x_train: Vec<f64> = train.iter().flat_map(|row| row.train_features()).collect(); | |
let y_train: Vec<f64> = train.iter().map(|row| row.train_target()).collect(); | |
// same for test | |
let x_test: Vec<f64> = test.iter().flat_map(|row| row.train_features()).collect(); | |
let y_test: Vec<f64> = test.iter().map(|row| row.train_target()).collect(); | |
// now as an input linregressionwants a matrix and a vector for y so | |
let x_train_matrix = Matrix::new(train_size, 13, x_train); // 13 is the number of features | |
let y_train_vector = Vector::new(y_train); | |
let x_test_matrix = Matrix::new(test_size, 13, x_test); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment