Last active
May 25, 2020 13:32
-
-
Save ochinchina/63b50f82d3cf85656879 to your computer and use it in GitHub Desktop.
A lightweighted Csv parser in C++ language
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* parse csv from file or string | |
* | |
*/ | |
#ifndef _CSV_PARSER_HPP | |
#define _CSV_PARSER_HPP | |
#include <iostream> | |
#include <string> | |
#include <fstream> | |
#include <sstream> | |
#include <vector> | |
/** | |
* csv record | |
* | |
* Autor: Steven Ou | |
*/ | |
class CsvRecord { | |
public: | |
CsvRecord( const std::string& s, char delimeter = ',') | |
:delimeter_( delimeter ) | |
{ | |
parse( s ); | |
} | |
CsvRecord( char delimeter = ',' ) | |
:delimeter_( delimeter ) | |
{ | |
} | |
void parse( const std::string& s, char delimeter ) { | |
delimeter_ = delimeter; | |
parse( s ); | |
} | |
void parse( const std::string& s ) { | |
size_t i = 0; | |
size_t j = 0; | |
size_t n = s.length(); | |
record_.clear(); | |
for( ; ; ) { | |
for( ; i < n && s[i] != delimeter_; i++ ); | |
if( i >= n ) { | |
record_.push_back( s.substr( j, i - j ) ); | |
break; | |
} else { | |
record_.push_back( s.substr( j, i - j ) ); | |
i ++; | |
j = i; | |
} | |
} | |
} | |
/** | |
* return the number of fields in this record | |
*/ | |
int length() const { | |
return record_.size(); | |
} | |
/** | |
* get the index-th field, the index must be in range [0, length() ) | |
*/ | |
std::string getField( int index ) const { | |
return record_[ index ]; | |
} | |
template< typename DataType > | |
DataType getFieldAs( int index ) const { | |
std::istringstream in( record_[index] ); | |
DataType t; | |
in >> t; | |
return t; | |
} | |
void print( std::ostream& out ) const { | |
for( int i = 0, n = record_.size(); i < n; i++ ) { | |
if( i ) { | |
out << delimeter_; | |
} | |
out << record_[ i ]; | |
} | |
} | |
private: | |
char delimeter_; | |
std::vector< std::string > record_; | |
}; | |
inline std::ostream& operator<<( std::ostream& out, const CsvRecord& record ) { | |
record.print( out ); | |
return out; | |
} | |
/** | |
* the CSV parser to parse csv from an std::istream. | |
* | |
* Next example shows how to use this csv parser: | |
* | |
* #include <fstream> | |
* #include "CsvParser.hpp" | |
* | |
* ... | |
* //open the csv file | |
* std::ifstream in( "test.csv" ); | |
* | |
* //create parse with std::istream and the char delimeter | |
* CsvParser parser( in, ',' ); | |
* | |
* //Now we can read the record from the parser now | |
* | |
* CsvRecord record; | |
* | |
* // read a record from file and save it to record object | |
* while( parser.getNext( record ) ) { | |
* //TODO: start to process this record | |
* std::cout << record.getField( 0 ) << std::endl; | |
* } | |
} | |
* | |
* | |
* @Autor: Steven Ou | |
*/ | |
class CsvParser { | |
public: | |
CsvParser( std::istream& in, char delimeter = ',' ) | |
:in_( in ), | |
delimeter_( delimeter ) | |
{ | |
} | |
const CsvRecord& getHeader() const { | |
return header_; | |
} | |
bool getNext( CsvRecord& record ) { | |
while( in_ ) { | |
std::string line; | |
std::getline( in_, line ); | |
if( !line.empty() ) { | |
if( line[0] == '#' ) { | |
line = line.substr( 1 ); | |
header_.parse( line, delimeter_ ); | |
} else { | |
record.parse( line, delimeter_ ); | |
} | |
return true; | |
} | |
} | |
return false; | |
} | |
private: | |
char delimeter_; | |
std::istream& in_; | |
CsvRecord header_; | |
}; | |
#endif/*_CSV_PARSER_HPP*/ | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "CsvParser.hpp" | |
#include <fstream> | |
int main( int argc, char** argv ) { | |
const char* csvFile = argv[1]; | |
std::ifstream fs( csvFile ); | |
CsvRecord csvRecord; | |
//create a parser with comma as delimeter | |
CsvParser parser( fs, ',' ); | |
while( parser.getNext( csvRecord ) ) { | |
//TODO: process the fetched record | |
//we simply print all the fields of fetched record | |
std::cout << "-----------get a record----------------"<< std::endl; | |
for( int i = 0; i < csvRecord.length(); i++ ) { | |
std::cout << csvRecord.getField( i ) << std::endl; | |
} | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment