Skip to content

Instantly share code, notes, and snippets.

@xgdgsc
Created September 27, 2017 13:25
Show Gist options
  • Save xgdgsc/22227ac7992226920a5bee3ef409bd65 to your computer and use it in GitHub Desktop.
Save xgdgsc/22227ac7992226920a5bee3ef409bd65 to your computer and use it in GitHub Desktop.
compressed_csv_reader
#pragma once
#include <iostream>
#include <sstream>
#include <vector>
#include <boost/algorithm/string.hpp>
#include <boost/iostreams/copy.hpp>
#include <boost/iostreams/filter/bzip2.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/filtering_streambuf.hpp>
using std::string;
using std::vector;
template <class RowData>
class CompressedCSVReader {
public:
CompressedCSVReader() {}
CompressedCSVReader(std::string file_path, string delimiter = ",",
string compression = "gzip", bool header = true)
: delimiter(delimiter), compression(compression), header(header) {
file = std::ifstream(file_path, std::ios_base::in | std::ios_base::binary);
}
vector<RowData> read() {
vector<RowData> row_data_list;
try {
boost::iostreams::filtering_istream in;
if (compression == "gzip") {
in.push(boost::iostreams::gzip_decompressor());
} else if (compression == "bzip2") {
in.push(boost::iostreams::bzip2_decompressor());
}
in.push(file);
if (header) {
string str;
std::getline(in, str);
}
for (std::string str; std::getline(in, str);) {
vector<string> tokens;
boost::split(tokens, str, boost::is_any_of(delimiter));
RowData row_data(tokens);
row_data_list.push_back(row_data);
}
} catch (const boost::iostreams::gzip_error& e) {
std::cout << e.what() << '\n';
} catch (const boost::iostreams::bzip2_error& e) {
std::cout << e.what() << '\n';
}
return row_data_list;
}
private:
std::ifstream file;
string delimiter;
string compression = "gzip";
bool header = true;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment