Created
July 18, 2013 15:51
-
-
Save klmr/6030474 to your computer and use it in GitHub Desktop.
A very simple and incomplete outline of a file format parser in C++ which determines the exact format dynamically at runtime. Most of the classes involved represent values. A single class hierarchy represents a control flow logic.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <fstream> | |
#include <iostream> | |
#include <memory> | |
#include <string> | |
#include <vector> | |
// This class is a container, hence a value type (akin to vector) | |
class row { /* ... */ }; | |
// This class is a container (collection of rows), hence a value type. | |
class table { /* ... */ }; | |
namespace csv { | |
// Finally, this class is *not* a value type. Consequently, it's explicitly | |
// noncopyable. We could have made it a value type using type erasure | |
// internally but I don't see the benefit since it's only used internally. | |
class format { | |
public: | |
// Represents an iterable object (i.e. exposing `begin` and `end`) | |
// which is lazy, i.e. doesn’t store the actual values. | |
// This is a value type. | |
class row_generator; | |
virtual ~format() = default; | |
format(format const&) = delete; | |
format& operator=(format const&) = delete; | |
row_generator parse() const { /* ... */ } | |
protected: | |
virtual char field_delimiter() = 0; | |
virtual char quote_char() = 0; | |
virtual char escape_char() = 0; | |
virtual std::string line_terminator() = 0; | |
// ... | |
}; | |
class format_csv : public format { /* ... */ }; | |
class format_tsv : public format { /* ... */ }; | |
// Return an appropriate `format` instance for a given stream. | |
// NB: this skips some ugly details, such as the fact that this | |
// function logically needs to rewind the stream, which not all | |
// streams support. | |
std::unique_ptr<format> guess_format(std::ifstream in) { | |
// ... | |
} | |
table parse(std::istream& in, format const& fmt) { | |
std::vector<row> rows; | |
std::copy(begin(rows), end(rows), begin(fmt.parse(in))); | |
return table{rows}; | |
} | |
table parse(std::istream& in) { | |
auto fmt = guess_format(in); | |
return parse(filename, *fmt); | |
} | |
table parse(std::string const& filename) { | |
std::ifstream in{filename}; | |
return parse(in); | |
} | |
} // namespace csv | |
int main() { | |
// Call it like this: | |
auto result1 = csv::parse(std::cin, csv::format_csv()); | |
// Or like this: | |
auto result2 = csv::parse("Some_filename"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment