Created
March 31, 2012 17:22
-
-
Save stephenjbarr/2266900 to your computer and use it in GitHub Desktop.
using boost tokenizer to parse CSV files into Eigen Matrices
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <iomanip> | |
#include <fstream> | |
#include "mkl.h" | |
#include "math.h" | |
#include <vector> | |
#include <cmath> | |
#include <string> | |
#include <cstdlib> | |
#include <ctype.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <unistd.h> | |
#include <getopt.h> | |
#define EIGEN_USE_MKL_BLAS | |
#define EIGEN_USE_MKL_LAPACKE | |
#define NDEBUG | |
#ifndef PI | |
#define PI 3.141592653589793 | |
#endif | |
#include <Eigen/Dense> | |
#include <boost/tokenizer.hpp> | |
using namespace boost; | |
using namespace Eigen; | |
using namespace std; | |
// THIS IS A TYPEDEF FOR A ROWMAJOR MATRIX | |
typedef Eigen::Matrix<double,Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> MatrixRMXd; | |
//////////////////////////////////////// | |
// PARSE THE OPTIONS | |
void parseTheOptions(int argc, char *argv[], | |
string& fname) { | |
cout << "This is the argument parsing function" << endl; | |
// process the command line arguments | |
while(1) | |
{ | |
int c; | |
int digit_optind = 0; | |
int this_option_optind = optind ? optind : 1; | |
int option_index = 0; | |
static struct option long_options[] = { | |
{"input" , 1, 0, 'u' }, | |
{0,0,0,0} | |
}; | |
c = getopt_long_only(argc, argv, "abc:d:012", | |
long_options, &option_index); | |
if (c == -1) | |
break; | |
switch(c) | |
{ | |
case 'u': | |
fname = optarg; | |
break; | |
} | |
} // end while | |
} // end opt parse | |
//////////////////////////////////////// | |
// double round(double number) | |
// { | |
// return number < 0.0 ? ceil(number - 0.5) : floor(number + 0.5); | |
// } | |
//////////////////////////////////////// | |
// parse csv file into a Matrix of Integers | |
// assumes no header | |
// (yes this is ugly) | |
// NOTE: assumes that the "right" number of columns is the number | |
// of columns in the first row | |
MatrixXi parseCSVfile_int(string infilename) { | |
ifstream in(infilename.c_str()); | |
if (!in.is_open()) return MatrixXi(1,1); | |
typedef tokenizer< escaped_list_separator<char> > Tokenizer; | |
vector< string > vec; | |
string line; | |
vector< vector< string > > matrows; | |
while (getline(in,line)) | |
{ | |
Tokenizer tok(line); | |
vec.assign(tok.begin(),tok.end()); | |
// // Print each row | |
// copy(vec.begin(), vec.end(), | |
// ostream_iterator<string>(cout, "|")); | |
// cout << "\n----------------------" << endl; | |
matrows.push_back(vec); | |
} | |
in.close(); | |
// FIGURE OUT HOW MANY OF THE ROWS HAVE THE RIGHT NUMBER | |
// OF COLUMNS | |
int Nrows = matrows.size(); | |
int Ncols = matrows[0].size(); | |
int Ngoodrows = 0; | |
for(int i = 0; i < Nrows; i++) { | |
if(matrows[i].size() == Ncols) { | |
Ngoodrows++; | |
} | |
} | |
// TRANSFORM THE VECTOR OF ROWS INTO AN EIGEN INTEGER MATRIX | |
MatrixXi xmat = MatrixXi(Ngoodrows, Ncols); | |
cout << "INPUT MATRIX: " << Nrows << "x" << Ncols << endl; | |
int rc = 0; | |
for(int i = 0; i < Nrows; i++) { | |
int rowsize = matrows[i].size(); | |
if(rowsize != Ncols) { | |
cout << "Row " << i << " has bad column count" << endl; | |
continue; | |
} | |
for(int j = 0; j < Ncols; j++) { | |
xmat(rc,j) = int(round(strtod(matrows[i][j].c_str(), NULL))); | |
} | |
rc++; | |
} | |
return(xmat); | |
} | |
int main(int argc, char **argv) | |
{ | |
string fname; | |
parseTheOptions(argc, argv, fname); | |
MatrixXi parsed = parseCSVfile_int(fname); | |
cout << "PARSED: " << endl; | |
cout << parsed << endl << endl; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment