Last active
October 17, 2020 15:51
-
-
Save ifknot/1b14714a15cd1e107efe38e0ac54e10a to your computer and use it in GitHub Desktop.
read csv data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "read_csv.h" | |
#include "tokenize.h" | |
#include <fstream> | |
#include <sstream> | |
#include <iostream> | |
namespace R { | |
data_frame read_csv(std::string file_path, bool has_header) { | |
data_frame d; | |
try { | |
std::ifstream file(file_path); | |
if (!file.is_open()) { | |
throw std::ifstream::failure(file_path); | |
} | |
file.exceptions (std::ifstream::badbit); | |
std::string line; | |
size_t nline{ 1 }, nfield{ 0 }; // line number n, field number nn | |
std::vector<std::string> column; | |
if (has_header) { | |
std::getline(file, line); | |
std::istringstream iss(line); | |
std::string field; | |
while (getline(iss, field, ',')) { | |
if (tokenize(field) == token_t::string_t) { | |
field = field.substr(1, field.size() - 2); // chop enclosing sigils | |
column.push_back(field); // collect column name | |
d[field]; // construct empty column | |
} | |
else { | |
throw std::runtime_error( | |
"record " + std::to_string(nline) + " field " + std::to_string(nfield) + " malformed string " + field | |
); | |
} | |
nfield++; | |
} | |
nline++; | |
} | |
while (std::getline(file, line)) { | |
std::istringstream iss(line); | |
std::string field; | |
nfield = 0; | |
while (getline(iss, field, ',')) { | |
switch (tokenize(field)) { | |
case token_t::logical_t: | |
d[column[nfield]].push_back((field == "true") ? true : false); | |
break; | |
case token_t::integer_t: | |
d[column[nfield]].push_back(stoi(field)); | |
break; | |
case token_t::numeric_t: | |
d[column[nfield]].push_back(stod(field)); | |
break; | |
case token_t::complex_t: | |
// TODO: complex_t | |
break; | |
case token_t::date_t: | |
// TODO: date_t | |
case token_t::string_t: | |
d[column[nfield]].push_back(field.substr(1, field.size() - 2)); | |
break; | |
case token_t::raw_t: | |
d[column[nfield]].push_back(field.substr(1, field.size() - 2)); | |
break; | |
case token_t::broken_t: | |
throw std::runtime_error( | |
file_path + " broken record on line " + std::to_string(nline) + " field " + std::to_string(nfield) + " : " + field | |
); | |
break; | |
} | |
nfield++; | |
} | |
nline++; | |
} | |
} | |
catch (const std::exception& e) { | |
std::cerr << e.what() << std::endl; | |
} | |
return d; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment