dodheim · January 29, 2016 17:25
diff --git a/Tokenizer_Comparison.cpp b/Tokenizer_Comparison.cpp
 #include <cstdlib>
 #include <cstring>
 #include <cstdio>
 #include <algorithm>
 #include <chrono>
 #include <iterator>
 #include <string>
 #include <vector>
 #include <fstream>
 #include <sstream>
 #include <boost/spirit/home/x3.hpp>
 #include <boost/tokenizer.hpp>
 #include <boost/utility/string_ref.hpp>

 struct timer
 {
    using clock_type = std::chrono::high_resolution_clock;

    template<typename DurationT = std::chrono::milliseconds>
    auto elapsed() const noexcept
    {
        return std::chrono::duration_cast<DurationT>(clock_type::now() - start_);
    }

    void reset() noexcept { start_ = clock_type::now(); }

 private:
    clock_type::time_point start_ = clock_type::now();
 };

 // 4 statements
 void DoBoostTokenizer(std::ofstream& os, std::string const& str)
 {
    boost::char_separator<char> const sep(" \n\t\r\f");
    boost::tokenizer<boost::char_separator<char>> const tokens(str, sep);
    for (auto const& t : tokens)
        os << t;
 }

 // 6 statements
 void DoIterators(std::ofstream& os, std::string const& str)
 {
    // construct a stream from the string
    std::istringstream strstr(str);

    // use stream iterators to copy the stream to the vector as whitespace separated strings
    std::istream_iterator<std::string> const it(strstr);
    std::istream_iterator<std::string> const end;
    std::vector<std::string> const results(it, end);

    // send the vector to stdout.
    std::ostream_iterator<std::string> const oit(os);
    std::copy(results.begin(), results.end(), oit);
 }

 // 4 statements
 void DoIteratorsRight(std::ofstream& os, std::string const& str)
 {
    // construct a stream from the string
    std::istringstream strstr(str);

    // use stream iterators to read individual strings
    std::istream_iterator<std::string> const it(strstr);
    std::istream_iterator<std::string> const end;

    std::for_each(it, end, [&os](std::string const& str) { os << str; });
 }

 // 7 statements
 void DoStrtok(std::ofstream& os, std::string const& str)
 {
    char* const pMutableString = static_cast<char*>(std::malloc(str.size() + 1));
    std::strcpy(pMutableString, str.c_str());

    char const* p = strtok(pMutableString, " \n\t\r\f");
    while (p)
    {
        os << p;
        p = std::strtok(nullptr, " \n\t\r\f");
    }
    std::free(pMutableString);
 }

 // 6 statements
 static bool IsDelim(char tst)
 {
    const char* delims = " \n\t\r\f";
    do // Delimiter string cannot be empty, so don't check for it
    {
        if (tst == *delims)
            return true;
        ++delims;
    } while (*delims);

    return false;
 }

 // 14 statements (20 total)
 void DoJoshsWay(std::ofstream& os, std::string const& str)
 {
    char* const pMutableString = static_cast<char*>(std::malloc(str.size() + 1));
    std::strcpy(pMutableString, str.c_str());

    char* p = pMutableString;

    // skip leading delimiters
    while (*p && IsDelim(*p))
        ++p;

    while (*p)
    {
        // note start of token
        char const* const pTok = p;

        do // skip non-delimiters
        {
            ++p;
        } while (!IsDelim(*p) && *p);

        // clobber trailing delimiter with null
        *p = 0;
        os << pTok; // send the token

        do // skip null, and any subsequent trailing delimiters
        {
            ++p;
        } while (*p && IsDelim(*p));
    }

    std::free(pMutableString);
 }

 // 3 statements
 static bool IsDelim_STDFind(char tst)
 {
    // For those of you who insist that nobody can ever outperform
    // the standard algorithms... std::find clocks in about 25% slower
    const char delims[] = " \n\t\r\f";
    const char* const end = delims + 5;
    return std::find(delims, end, tst) != end;
 }

 // 14 statements (17 total)
 void DoJoshsWay_STDFind(std::ofstream& os, std::string const& str)
 {
    char* const pMutableString = static_cast<char*>(std::malloc(str.size() + 1));
    std::strcpy(pMutableString, str.c_str());

    char* p = pMutableString;

    // skip leading delimiters
    while (*p && IsDelim_STDFind(*p))
        ++p;

    while (*p)
    {
        // note start of token
        char const* const pTok = p;

        do // skip non-delimiters
        {
            ++p;
        } while (!IsDelim_STDFind(*p) && *p);

        // clobber trailing delimiter with null
        *p = 0;
        os << pTok; // send the token

        do // skip null, and any subsequent trailing delimiters
        {
            ++p;
        } while (*p && IsDelim_STDFind(*p));
    }

    std::free(pMutableString);
 }

 // 6 statements
 template<typename InputIt, typename ForwardIt, typename BinOp>
 static void for_each_token(
    InputIt first, InputIt last,
    ForwardIt d_first, ForwardIt d_last,
    BinOp binary_op)
 {
    while (first != last)
    {
        InputIt const pos = std::find_first_of(first, last, d_first, d_last);
        binary_op(first, pos);
        if (pos == last)
            break;
        first = std::next(pos);
    }
 }

 // 4 statements (10 total)
 void DoTristansWay(std::ofstream& os, std::string const& str)
 {
    constexpr char delims[] = " \n\t\r\f";
    for_each_token(
        str.cbegin(), str.cend(),
        std::cbegin(delims), std::cend(delims),
        [&os](auto first, auto second)
        {
            if (first != second)
                os << std::string(first, second);
        }
    );
 }

 // 4 statements
 void DoBoostSpiritX3(std::ofstream& os, std::string const& str)
 {
    namespace x3 = boost::spirit::x3;

    auto write = [&os](auto const& ctx)
    {
        os << boost::string_ref(&x3::_attr(ctx)[0], x3::_attr(ctx).size());
    };
    auto const skip = x3::char_(" \n\t\r\f");
    x3::parse(
        str.begin(), str.end(),
        x3::raw[+~skip][write] % +skip
    );
 }

 int main()
 {
    std::string const text = []
    {
        std::ostringstream str;
        std::ifstream t("crytek_sponza.obj");
        str << t.rdbuf();
        return str.str();
    }();

    std::FILE* const fp = std::fopen("times.csv", "w");
    std::fprintf(fp, "boost_tokenizer,iterators,iterators_right,strtok,josh,josh_stdfind,tristan,boost_spiritx3\n");

    for (int i = 0; i != 5; ++i)
    {
        std::chrono::milliseconds tm;
        {
            timer t;
            std::ofstream out_boost_tokenizer("out_boost_tokenizer.txt");
            DoBoostTokenizer(out_boost_tokenizer, text);
            tm = t.elapsed();
        }
        std::fprintf(fp, "%lld,", tm.count());

        {
            timer t;
            std::ofstream out_iterators("out_iterators.txt");
            DoIterators(out_iterators, text);
            tm = t.elapsed();
        }
        std::fprintf(fp, "%lld,", tm.count());

        {
            timer t;
            std::ofstream out_iterators_right("out_iterators_right.txt");
            DoIteratorsRight(out_iterators_right, text);
            tm = t.elapsed();
        }
        std::fprintf(fp, "%lld,", tm.count());

        {
            timer t;
            std::ofstream out_strtok("out_strtok.txt");
            DoStrtok(out_strtok, text);
            tm = t.elapsed();
        }
        std::fprintf(fp, "%lld,", tm.count());

        {
            timer t;
            std::ofstream out_josh("out_josh.txt");
            DoJoshsWay(out_josh, text);
            tm = t.elapsed();
        }
        std::fprintf(fp, "%lld,", tm.count());

        {
            timer t;
            std::ofstream out_josh_stdfind("out_josh_stdfind.txt");
            DoJoshsWay_STDFind(out_josh_stdfind, text);
            tm = t.elapsed();
        }
        std::fprintf(fp, "%lld,", tm.count());

        {
            timer t;
            std::ofstream out_tristan("out_tristan.txt");
            DoTristansWay(out_tristan, text);
            tm = t.elapsed();
        }
        std::fprintf(fp, "%lld,", tm.count());

        {
            timer t;
            std::ofstream out_boost_spiritx3("out_boost_spiritx3.txt");
            DoBoostSpiritX3(out_boost_spiritx3, text);
            tm = t.elapsed();
        }
        std::fprintf(fp, "%lld\n", tm.count());
    }

    std::fclose(fp);
 }
	#include <cstdlib>
	#include <cstring>
	#include <cstdio>
	#include <algorithm>
	#include <chrono>
	#include <iterator>
	#include <string>
	#include <vector>
	#include <fstream>
	#include <sstream>
	#include <boost/spirit/home/x3.hpp>
	#include <boost/tokenizer.hpp>
	#include <boost/utility/string_ref.hpp>

	struct timer
	{
	using clock_type = std::chrono::high_resolution_clock;

	template<typename DurationT = std::chrono::milliseconds>
	auto elapsed() const noexcept
	{
	return std::chrono::duration_cast<DurationT>(clock_type::now() - start_);
	}

	void reset() noexcept { start_ = clock_type::now(); }

	private:
	clock_type::time_point start_ = clock_type::now();
	};

	// 4 statements
	void DoBoostTokenizer(std::ofstream& os, std::string const& str)
	{
	boost::char_separator<char> const sep(" \n\t\r\f");
	boost::tokenizer<boost::char_separator<char>> const tokens(str, sep);
	for (auto const& t : tokens)
	os << t;
	}

	// 6 statements
	void DoIterators(std::ofstream& os, std::string const& str)
	{
	// construct a stream from the string
	std::istringstream strstr(str);

	// use stream iterators to copy the stream to the vector as whitespace separated strings
	std::istream_iterator<std::string> const it(strstr);
	std::istream_iterator<std::string> const end;
	std::vector<std::string> const results(it, end);

	// send the vector to stdout.
	std::ostream_iterator<std::string> const oit(os);
	std::copy(results.begin(), results.end(), oit);
	}

	// 4 statements
	void DoIteratorsRight(std::ofstream& os, std::string const& str)
	{
	// construct a stream from the string
	std::istringstream strstr(str);

	// use stream iterators to read individual strings
	std::istream_iterator<std::string> const it(strstr);
	std::istream_iterator<std::string> const end;

	std::for_each(it, end, [&os](std::string const& str) { os << str; });
	}

	// 7 statements
	void DoStrtok(std::ofstream& os, std::string const& str)
	{
	char* const pMutableString = static_cast<char*>(std::malloc(str.size() + 1));
	std::strcpy(pMutableString, str.c_str());

	char const* p = strtok(pMutableString, " \n\t\r\f");
	while (p)
	{
	os << p;
	p = std::strtok(nullptr, " \n\t\r\f");
	}
	std::free(pMutableString);
	}

	// 6 statements
	static bool IsDelim(char tst)
	{
	const char* delims = " \n\t\r\f";
	do // Delimiter string cannot be empty, so don't check for it
	{
	if (tst == *delims)
	return true;
	++delims;
	} while (*delims);

	return false;
	}

	// 14 statements (20 total)
	void DoJoshsWay(std::ofstream& os, std::string const& str)
	{
	char* const pMutableString = static_cast<char*>(std::malloc(str.size() + 1));
	std::strcpy(pMutableString, str.c_str());

	char* p = pMutableString;

	// skip leading delimiters
	while (p && IsDelim(p))
	++p;

	while (*p)
	{
	// note start of token
	char const* const pTok = p;

	do // skip non-delimiters
	{
	++p;
	} while (!IsDelim(p) && p);

	// clobber trailing delimiter with null
	*p = 0;
	os << pTok; // send the token

	do // skip null, and any subsequent trailing delimiters
	{
	++p;
	} while (p && IsDelim(p));
	}

	std::free(pMutableString);
	}

	// 3 statements
	static bool IsDelim_STDFind(char tst)
	{
	// For those of you who insist that nobody can ever outperform
	// the standard algorithms... std::find clocks in about 25% slower
	const char delims[] = " \n\t\r\f";
	const char* const end = delims + 5;
	return std::find(delims, end, tst) != end;
	}

	// 14 statements (17 total)
	void DoJoshsWay_STDFind(std::ofstream& os, std::string const& str)
	{
	char* const pMutableString = static_cast<char*>(std::malloc(str.size() + 1));
	std::strcpy(pMutableString, str.c_str());

	char* p = pMutableString;

	// skip leading delimiters
	while (p && IsDelim_STDFind(p))
	++p;

	while (*p)
	{
	// note start of token
	char const* const pTok = p;

	do // skip non-delimiters
	{
	++p;
	} while (!IsDelim_STDFind(p) && p);

	// clobber trailing delimiter with null
	*p = 0;
	os << pTok; // send the token

	do // skip null, and any subsequent trailing delimiters
	{
	++p;
	} while (p && IsDelim_STDFind(p));
	}

	std::free(pMutableString);
	}

	// 6 statements
	template<typename InputIt, typename ForwardIt, typename BinOp>
	static void for_each_token(
	InputIt first, InputIt last,
	ForwardIt d_first, ForwardIt d_last,
	BinOp binary_op)
	{
	while (first != last)
	{
	InputIt const pos = std::find_first_of(first, last, d_first, d_last);
	binary_op(first, pos);
	if (pos == last)
	break;
	first = std::next(pos);
	}
	}

	// 4 statements (10 total)
	void DoTristansWay(std::ofstream& os, std::string const& str)
	{
	constexpr char delims[] = " \n\t\r\f";
	for_each_token(
	str.cbegin(), str.cend(),
	std::cbegin(delims), std::cend(delims),
	[&os](auto first, auto second)
	{
	if (first != second)
	os << std::string(first, second);
	}
	);
	}

	// 4 statements
	void DoBoostSpiritX3(std::ofstream& os, std::string const& str)
	{
	namespace x3 = boost::spirit::x3;

	auto write = [&os](auto const& ctx)
	{
	os << boost::string_ref(&x3::_attr(ctx)[0], x3::_attr(ctx).size());
	};
	auto const skip = x3::char_(" \n\t\r\f");
	x3::parse(
	str.begin(), str.end(),
	x3::raw[+~skip][write] % +skip
	);
	}

	int main()
	{
	std::string const text = []
	{
	std::ostringstream str;
	std::ifstream t("crytek_sponza.obj");
	str << t.rdbuf();
	return str.str();
	}();

	std::FILE* const fp = std::fopen("times.csv", "w");
	std::fprintf(fp, "boost_tokenizer,iterators,iterators_right,strtok,josh,josh_stdfind,tristan,boost_spiritx3\n");

	for (int i = 0; i != 5; ++i)
	{
	std::chrono::milliseconds tm;
	{
	timer t;
	std::ofstream out_boost_tokenizer("out_boost_tokenizer.txt");
	DoBoostTokenizer(out_boost_tokenizer, text);
	tm = t.elapsed();
	}
	std::fprintf(fp, "%lld,", tm.count());

	{
	timer t;
	std::ofstream out_iterators("out_iterators.txt");
	DoIterators(out_iterators, text);
	tm = t.elapsed();
	}
	std::fprintf(fp, "%lld,", tm.count());

	{
	timer t;
	std::ofstream out_iterators_right("out_iterators_right.txt");
	DoIteratorsRight(out_iterators_right, text);
	tm = t.elapsed();
	}
	std::fprintf(fp, "%lld,", tm.count());

	{
	timer t;
	std::ofstream out_strtok("out_strtok.txt");
	DoStrtok(out_strtok, text);
	tm = t.elapsed();
	}
	std::fprintf(fp, "%lld,", tm.count());

	{
	timer t;
	std::ofstream out_josh("out_josh.txt");
	DoJoshsWay(out_josh, text);
	tm = t.elapsed();
	}
	std::fprintf(fp, "%lld,", tm.count());

	{
	timer t;
	std::ofstream out_josh_stdfind("out_josh_stdfind.txt");
	DoJoshsWay_STDFind(out_josh_stdfind, text);
	tm = t.elapsed();
	}
	std::fprintf(fp, "%lld,", tm.count());

	{
	timer t;
	std::ofstream out_tristan("out_tristan.txt");
	DoTristansWay(out_tristan, text);
	tm = t.elapsed();
	}
	std::fprintf(fp, "%lld,", tm.count());

	{
	timer t;
	std::ofstream out_boost_spiritx3("out_boost_spiritx3.txt");
	DoBoostSpiritX3(out_boost_spiritx3, text);
	tm = t.elapsed();
	}
	std::fprintf(fp, "%lld\n", tm.count());
	}

	std::fclose(fp);
	}