Created
August 17, 2016 18:36
-
-
Save simmplecoder/6f6ea6248fb9ada5fc207f3858c19d91 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <string> | |
#include <fstream> | |
#include <vector> | |
#include <utility> | |
#include <cctype> | |
#include <iterator> | |
struct parse_result | |
{ | |
std::vector<std::string> text; | |
std::vector<std::string> tokens; | |
}; | |
template <typename BidirIt> | |
std::pair<BidirIt, BidirIt> trim(BidirIt first, BidirIt last) | |
{ | |
while (first != last && std::isspace(*first)) | |
{ | |
++first; | |
} | |
--last; | |
while (last != first && std::isspace(*last)) | |
{ | |
--last; | |
} | |
return{ first, last }; | |
} | |
parse_result parse(const std::string& file_name) | |
{ | |
std::ifstream file(file_name); | |
if (!file.is_open()) | |
{ | |
throw std::runtime_error("couldn't open file"); | |
} | |
std::vector<std::string> text; | |
std::vector<std::string> tokens; | |
std::string buffer; | |
char current_delim = '<'; | |
char next_delim = '>'; | |
while (std::getline(file, buffer, current_delim)) | |
{ | |
//everything UNTIL token opening | |
if (current_delim == '<') | |
{ | |
if (buffer.size() > 0 && !(buffer.size() == 1 && buffer.front() == '\n')) | |
{ | |
auto range = trim(buffer.begin(), buffer.end()); | |
std::string::size_type first = std::distance(buffer.begin(), range.first); | |
std::string::size_type last = std::distance(buffer.begin(), range.second); | |
text.push_back(buffer.substr(first, last)); | |
} | |
} | |
else | |
{ | |
tokens.push_back(buffer); | |
} | |
std::swap(current_delim, next_delim); | |
} | |
return { text, tokens }; | |
} | |
#include <iostream> | |
std::ostream& operator<<(std::ostream& os, const parse_result& res) | |
{ | |
for (decltype(res.text.size()) i = 0; i < res.text.size(); ++i) | |
{ | |
//every text has pre and post tokens, so 2x | |
os << '<' << res.tokens[i * 2] << '>' << '\n'; | |
os << res.text[i] << '\n'; | |
os << '<' << res.tokens[i * 2 + 1] << '>' << '\n'; | |
} | |
return os; | |
} | |
int main() | |
{ | |
auto result = parse("myfile.txt"); | |
std::cout << result; | |
std::cin.get(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment