Created
September 27, 2017 13:25
-
-
Save xgdgsc/22227ac7992226920a5bee3ef409bd65 to your computer and use it in GitHub Desktop.
compressed_csv_reader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include <iostream> | |
#include <sstream> | |
#include <vector> | |
#include <boost/algorithm/string.hpp> | |
#include <boost/iostreams/copy.hpp> | |
#include <boost/iostreams/filter/bzip2.hpp> | |
#include <boost/iostreams/filter/gzip.hpp> | |
#include <boost/iostreams/filtering_stream.hpp> | |
#include <boost/iostreams/filtering_streambuf.hpp> | |
using std::string; | |
using std::vector; | |
template <class RowData> | |
class CompressedCSVReader { | |
public: | |
CompressedCSVReader() {} | |
CompressedCSVReader(std::string file_path, string delimiter = ",", | |
string compression = "gzip", bool header = true) | |
: delimiter(delimiter), compression(compression), header(header) { | |
file = std::ifstream(file_path, std::ios_base::in | std::ios_base::binary); | |
} | |
vector<RowData> read() { | |
vector<RowData> row_data_list; | |
try { | |
boost::iostreams::filtering_istream in; | |
if (compression == "gzip") { | |
in.push(boost::iostreams::gzip_decompressor()); | |
} else if (compression == "bzip2") { | |
in.push(boost::iostreams::bzip2_decompressor()); | |
} | |
in.push(file); | |
if (header) { | |
string str; | |
std::getline(in, str); | |
} | |
for (std::string str; std::getline(in, str);) { | |
vector<string> tokens; | |
boost::split(tokens, str, boost::is_any_of(delimiter)); | |
RowData row_data(tokens); | |
row_data_list.push_back(row_data); | |
} | |
} catch (const boost::iostreams::gzip_error& e) { | |
std::cout << e.what() << '\n'; | |
} catch (const boost::iostreams::bzip2_error& e) { | |
std::cout << e.what() << '\n'; | |
} | |
return row_data_list; | |
} | |
private: | |
std::ifstream file; | |
string delimiter; | |
string compression = "gzip"; | |
bool header = true; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment