Skip to content

Instantly share code, notes, and snippets.

@dodheim
Forked from tcbrindle/Tokenizer_Comparison.cpp
Last active January 29, 2016 17:25
Show Gist options
  • Save dodheim/fb61441d1630327dd842 to your computer and use it in GitHub Desktop.
Save dodheim/fb61441d1630327dd842 to your computer and use it in GitHub Desktop.
#include <cstdlib>
#include <cstring>
#include <cstdio>
#include <algorithm>
#include <chrono>
#include <iterator>
#include <string>
#include <vector>
#include <fstream>
#include <sstream>
#include <boost/spirit/home/x3.hpp>
#include <boost/tokenizer.hpp>
#include <boost/utility/string_ref.hpp>
struct timer
{
using clock_type = std::chrono::high_resolution_clock;
template<typename DurationT = std::chrono::milliseconds>
auto elapsed() const noexcept
{
return std::chrono::duration_cast<DurationT>(clock_type::now() - start_);
}
void reset() noexcept { start_ = clock_type::now(); }
private:
clock_type::time_point start_ = clock_type::now();
};
// 4 statements
void DoBoostTokenizer(std::ofstream& os, std::string const& str)
{
boost::char_separator<char> const sep(" \n\t\r\f");
boost::tokenizer<boost::char_separator<char>> const tokens(str, sep);
for (auto const& t : tokens)
os << t;
}
// 6 statements
void DoIterators(std::ofstream& os, std::string const& str)
{
// construct a stream from the string
std::istringstream strstr(str);
// use stream iterators to copy the stream to the vector as whitespace separated strings
std::istream_iterator<std::string> const it(strstr);
std::istream_iterator<std::string> const end;
std::vector<std::string> const results(it, end);
// send the vector to stdout.
std::ostream_iterator<std::string> const oit(os);
std::copy(results.begin(), results.end(), oit);
}
// 4 statements
void DoIteratorsRight(std::ofstream& os, std::string const& str)
{
// construct a stream from the string
std::istringstream strstr(str);
// use stream iterators to read individual strings
std::istream_iterator<std::string> const it(strstr);
std::istream_iterator<std::string> const end;
std::for_each(it, end, [&os](std::string const& str) { os << str; });
}
// 7 statements
void DoStrtok(std::ofstream& os, std::string const& str)
{
char* const pMutableString = static_cast<char*>(std::malloc(str.size() + 1));
std::strcpy(pMutableString, str.c_str());
char const* p = strtok(pMutableString, " \n\t\r\f");
while (p)
{
os << p;
p = std::strtok(nullptr, " \n\t\r\f");
}
std::free(pMutableString);
}
// 6 statements
static bool IsDelim(char tst)
{
const char* delims = " \n\t\r\f";
do // Delimiter string cannot be empty, so don't check for it
{
if (tst == *delims)
return true;
++delims;
} while (*delims);
return false;
}
// 14 statements (20 total)
void DoJoshsWay(std::ofstream& os, std::string const& str)
{
char* const pMutableString = static_cast<char*>(std::malloc(str.size() + 1));
std::strcpy(pMutableString, str.c_str());
char* p = pMutableString;
// skip leading delimiters
while (*p && IsDelim(*p))
++p;
while (*p)
{
// note start of token
char const* const pTok = p;
do // skip non-delimiters
{
++p;
} while (!IsDelim(*p) && *p);
// clobber trailing delimiter with null
*p = 0;
os << pTok; // send the token
do // skip null, and any subsequent trailing delimiters
{
++p;
} while (*p && IsDelim(*p));
}
std::free(pMutableString);
}
// 3 statements
static bool IsDelim_STDFind(char tst)
{
// For those of you who insist that nobody can ever outperform
// the standard algorithms... std::find clocks in about 25% slower
const char delims[] = " \n\t\r\f";
const char* const end = delims + 5;
return std::find(delims, end, tst) != end;
}
// 14 statements (17 total)
void DoJoshsWay_STDFind(std::ofstream& os, std::string const& str)
{
char* const pMutableString = static_cast<char*>(std::malloc(str.size() + 1));
std::strcpy(pMutableString, str.c_str());
char* p = pMutableString;
// skip leading delimiters
while (*p && IsDelim_STDFind(*p))
++p;
while (*p)
{
// note start of token
char const* const pTok = p;
do // skip non-delimiters
{
++p;
} while (!IsDelim_STDFind(*p) && *p);
// clobber trailing delimiter with null
*p = 0;
os << pTok; // send the token
do // skip null, and any subsequent trailing delimiters
{
++p;
} while (*p && IsDelim_STDFind(*p));
}
std::free(pMutableString);
}
// 6 statements
template<typename InputIt, typename ForwardIt, typename BinOp>
static void for_each_token(
InputIt first, InputIt last,
ForwardIt d_first, ForwardIt d_last,
BinOp binary_op)
{
while (first != last)
{
InputIt const pos = std::find_first_of(first, last, d_first, d_last);
binary_op(first, pos);
if (pos == last)
break;
first = std::next(pos);
}
}
// 4 statements (10 total)
void DoTristansWay(std::ofstream& os, std::string const& str)
{
constexpr char delims[] = " \n\t\r\f";
for_each_token(
str.cbegin(), str.cend(),
std::cbegin(delims), std::cend(delims),
[&os](auto first, auto second)
{
if (first != second)
os << std::string(first, second);
}
);
}
// 4 statements
void DoBoostSpiritX3(std::ofstream& os, std::string const& str)
{
namespace x3 = boost::spirit::x3;
auto write = [&os](auto const& ctx)
{
os << boost::string_ref(&x3::_attr(ctx)[0], x3::_attr(ctx).size());
};
auto const skip = x3::char_(" \n\t\r\f");
x3::parse(
str.begin(), str.end(),
x3::raw[+~skip][write] % +skip
);
}
int main()
{
std::string const text = []
{
std::ostringstream str;
std::ifstream t("crytek_sponza.obj");
str << t.rdbuf();
return str.str();
}();
std::FILE* const fp = std::fopen("times.csv", "w");
std::fprintf(fp, "boost_tokenizer,iterators,iterators_right,strtok,josh,josh_stdfind,tristan,boost_spiritx3\n");
for (int i = 0; i != 5; ++i)
{
std::chrono::milliseconds tm;
{
timer t;
std::ofstream out_boost_tokenizer("out_boost_tokenizer.txt");
DoBoostTokenizer(out_boost_tokenizer, text);
tm = t.elapsed();
}
std::fprintf(fp, "%lld,", tm.count());
{
timer t;
std::ofstream out_iterators("out_iterators.txt");
DoIterators(out_iterators, text);
tm = t.elapsed();
}
std::fprintf(fp, "%lld,", tm.count());
{
timer t;
std::ofstream out_iterators_right("out_iterators_right.txt");
DoIteratorsRight(out_iterators_right, text);
tm = t.elapsed();
}
std::fprintf(fp, "%lld,", tm.count());
{
timer t;
std::ofstream out_strtok("out_strtok.txt");
DoStrtok(out_strtok, text);
tm = t.elapsed();
}
std::fprintf(fp, "%lld,", tm.count());
{
timer t;
std::ofstream out_josh("out_josh.txt");
DoJoshsWay(out_josh, text);
tm = t.elapsed();
}
std::fprintf(fp, "%lld,", tm.count());
{
timer t;
std::ofstream out_josh_stdfind("out_josh_stdfind.txt");
DoJoshsWay_STDFind(out_josh_stdfind, text);
tm = t.elapsed();
}
std::fprintf(fp, "%lld,", tm.count());
{
timer t;
std::ofstream out_tristan("out_tristan.txt");
DoTristansWay(out_tristan, text);
tm = t.elapsed();
}
std::fprintf(fp, "%lld,", tm.count());
{
timer t;
std::ofstream out_boost_spiritx3("out_boost_spiritx3.txt");
DoBoostSpiritX3(out_boost_spiritx3, text);
tm = t.elapsed();
}
std::fprintf(fp, "%lld\n", tm.count());
}
std::fclose(fp);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment