marchelbling · April 1, 2022 06:35 · guest271314 · Mar 26, 2022 · marchelbling · Mar 31, 2022
diff --git a/README.md b/README.md
diff --git a/json_stream b/json_stream
 #include <string>
 #include <sstream>
 #include <fstream>
 #include <vector>
 #include <map>
 #include <cmath>
 #include <limits>
 #include "utf8_json"


 template<typename T>
 struct is_container : std::false_type {};

 template<typename T>
 struct is_associative_container : std::false_type {};

 // vector overload
 template<typename T>
 struct is_container< std::vector<T> > : std::true_type {};

 // map overload
 template<typename K, typename V>
 struct is_container< std::map<K, V> > : std::true_type {};

 template<typename V>
 struct is_associative_container< std::map<std::string, V> > : std::true_type {};



 class json_stream {

 public:
    json_stream(const std::string& path) : _stream(path.c_str())
    {}

    template<typename T>
    json_stream& operator<<(const T& data) {
        if (_stream.is_open()) {
            _stream << dump(data);
        }
        return *this;
    }

 private:
    // forward standard manipulator like std::endl
    typedef std::ostream& (*ostream_manipulator)(std::ostream&);
    json_stream& operator<<(ostream_manipulator pf) {
        if (_stream.is_open()) {
            _stream << pf;
        }
        return *this;
    }

    template<typename T>
    std::string dump(const T& t) const {
        // dispatch to actual dump method:
        // * not iterable type dumped as simple value
        // * iterable type
        //   * with mapped value dumped as mapped_container
        //   * otherwise dumped as simple_container
        return dump_value_or_container(t, typename is_container<T>::type());
    }

    // dispatch to correct dump method
    template<typename T>
    std::string dump_value_or_container(const T& t, std::false_type) const {
        return dump_value(t);
    }

    template<typename T>
    std::string dump_value_or_container(const T& t, std::true_type) const {
        return dump_simple_or_associative_container(t, typename is_associative_container<T>::type());
    }

    template<typename T>
    std::string dump_simple_or_associative_container(const T& t, std::false_type) const {
        return dump_simple_container(t);
    }

    template<typename T>
    std::string dump_simple_or_associative_container(const T& t, std::true_type) const {
        return dump_associative_container(t);
    }

    // implement type specific serialization
    template<typename V>
    std::string dump_value(const V& value) const {
        std::ostringstream oss;
        oss << sanitize(value);
        return oss.str();
    }

    std::string dump_value(const std::string& value) const {
        return "\"" + sanitize(value) + "\"";
    }

    template<typename K, typename V>
    std::string dump_value(const std::pair<const K, V>& pair) const {
        std::ostringstream oss;
        oss << "[" << dump(pair.first) << ", " << dump(pair.second) << "]";
        return oss.str();
    }

    template<typename V>
    std::string dump_pair(const std::pair<const std::string, V>& pair) const {
        std::ostringstream oss;
        oss << dump(pair.first) << ": " << dump(pair.second);
        return oss.str();
    }

    template<typename C>
    std::string dump_simple_container(const C& container) const
    {
        std::ostringstream oss;
        typename C::const_iterator it = container.begin();

        oss << "[" << dump(*it);
        for (++ it ; it != container.end() ; ++ it) {
            oss << ", " << dump(*it);
        }
        oss << "]";

        return oss.str();
    }

    template<typename M>
    std::string dump_associative_container(const M& map) const
    {
        std::ostringstream oss;
        typename M::const_iterator it = map.begin();

        oss << "{" << dump_pair(*it);
        for (++ it ; it != map.end() ; ++ it) {
            oss << ", " << dump_pair(*it);
        }
        oss << "}";

        return oss.str();
    }

    template<typename T>
    T sanitize(const T& t) const {
        return t;
    }

    template <typename T>
    int sgn(const T&  val) const {
        return (T(0) < val) - (val < T(0));
    }

    double sanitize(const double d) const {
        if(std::isfinite(d)) {
            return d;
        }
        else {
            if(std::isinf(d)) {
                return sgn(d) * std::numeric_limits<double>::max();
            }
            return 0.;
        }
    }

    double sanitize(const float f) const {
        return sanitize(static_cast<double>(f));
    }

    std::string sanitize(std::string const& input) const {
        return utf8_json::json_encode_codepoints(utf8_json::decode_utf8(input));
    }

    std::ofstream _stream;
 };
diff --git a/json_test.cpp b/json_test.cpp
 #include "json_stream"

 #include <map>
 #include <vector>
 #include <iostream>

 int main() {
    json_stream js(std::string("/tmp/toto"));
    
    std::map< std::string, std::vector<float> > object;
    std::vector<float> data = { 1., std::numeric_limits<double>::quiet_NaN(), -std::numeric_limits<float>::infinity() };
    std::string name = u8"foo+é+\n\r\b\t+\v\0+∞";
    object[name] = data;
    
    js << object;
    return 0;
 }
diff --git a/utf8_json b/utf8_json
 #include <string>
 #include <sstream>
 #include <iomanip>


 namespace utf8_json {
    inline unsigned int mask8(char const value) {
        return value & 0xff;
    }

    inline bool is_valid_continuation_byte(unsigned int byte) {
        return ((byte & 0xC0) == 0x80);
    }

    inline int get_next_byte(std::string::const_iterator& iterator, std::string::const_iterator end_iterator) {
        if(iterator != end_iterator) {
            return mask8(*(++ iterator));
        }
        else {
            return 0; // invalid continuation byte
        }
    }

    void insert_replacement(std::vector<unsigned int>& output, unsigned int replacement, unsigned int count) {
        for(unsigned int i = 0 ; i < count ; ++ i) {
            output.push_back(replacement);
        }
    }

    std::vector<unsigned int> decode_utf8(const std::string& input, const int replacement=0xfffd) {
        unsigned int code_unit1, code_unit2, code_unit3, code_unit4;
        std::vector<unsigned int> codepoints;

        for(std::string::const_iterator iterator = input.begin() ; iterator != input.end() ; ++ iterator) {
            code_unit1 = mask8(*iterator);
            if (code_unit1 < 0x80) {
                codepoints.push_back(code_unit1);
            }
            else if (code_unit1 < 0xC2) { // continuation or overlong 2-byte sequence
                codepoints.push_back(replacement);
            }
            else if (code_unit1 < 0xE0) { // 2-byte sequence
                code_unit2 = get_next_byte(iterator, input.end());

                if (!is_valid_continuation_byte(code_unit2)) {
                    insert_replacement(codepoints, replacement, 2);
                }
                else {
                    codepoints.push_back((code_unit1 << 6) + code_unit2 - 0x3080);
                }
            }
            else if (code_unit1 < 0xF0) { // 3-byte sequence
                code_unit2 = get_next_byte(iterator, input.end());

                if (!is_valid_continuation_byte(code_unit2) ||
                    (code_unit1 == 0xE0 && code_unit2 < 0xA0)) /* overlong */ {
                    insert_replacement(codepoints, replacement, 2);
                }
                else {
                    code_unit3 = get_next_byte(iterator, input.end());

                    if (!is_valid_continuation_byte(code_unit3)) {
                        insert_replacement(codepoints, replacement, 3);
                    }
                    else {
                        codepoints.push_back((code_unit1 << 12) + (code_unit2 << 6) + code_unit3 - 0xE2080);
                    }
                }
            }
            else if (code_unit1 < 0xF5) { // 4-byte sequence
                code_unit2 = get_next_byte(iterator, input.end());
                if(!is_valid_continuation_byte(code_unit2) ||
                   (code_unit1 == 0xF0 && code_unit2 < 0x90) || /* overlong */
                   (code_unit1 == 0xF4 && code_unit2 >= 0x90)) {  /* > U+10FFFF */
                    insert_replacement(codepoints, replacement, 2);
                }
                else {
                    code_unit3 = get_next_byte(iterator, input.end());
                    if(!is_valid_continuation_byte(code_unit3)) {
                        insert_replacement(codepoints, replacement, 3);
                    }
                    else {
                        code_unit4 = get_next_byte(iterator, input.end());
                        if(!is_valid_continuation_byte(code_unit4)) {
                            insert_replacement(codepoints, replacement, 4);
                        }
                        else {
                            codepoints.push_back((code_unit1 << 18) + (code_unit2 << 12) + (code_unit3 << 6) + code_unit4 - 0x3C82080);
                        }
                    }
                }
            }
            else {
                /* > U+10FFFF */
                insert_replacement(codepoints, replacement, 1);
            }
        }
        return codepoints;
    }


    std::string json_encode_control_char(unsigned int codepoint) {
        std::ostringstream oss;
        oss.fill('0');
        oss << "\\u" << std::setw(4) << std::hex << codepoint;
        return oss.str();
    }


    std::string utf8_encode(unsigned int codepoint) {
        std::string output;

        if(codepoint > 0x590 && codepoint < 0x5F4) {
            return output;
        }

        // out of range
        if(codepoint > 1114112) {
            return utf8_encode(0xfffd);
        }

        if (codepoint < 0x80) {
            output.push_back(codepoint);
        }
        else if (codepoint <= 0x7FF) {
            output.push_back((codepoint >> 6) + 0xC0);
            output.push_back((codepoint & 0x3F) + 0x80);
        }
        else if (codepoint <= 0xFFFF) {
            output.push_back((codepoint >> 12) + 0xE0);
            output.push_back(((codepoint >> 6) & 0x3F) + 0x80);
            output.push_back((codepoint & 0x3F) + 0x80);
        }
        else if (codepoint <= 0x10FFFF) {
            output.push_back((codepoint >> 18) + 0xF0);
            output.push_back(((codepoint >> 12) & 0x3F) + 0x80);
            output.push_back(((codepoint >> 6) & 0x3F) + 0x80);
            output.push_back((codepoint & 0x3F) + 0x80);
        }
        return output;
    }


    std::string json_encode_codepoints(std::vector<unsigned int> const& codepoints) {
        std::string json_string;

        for(std::vector<unsigned int>::const_iterator codepoint = codepoints.begin() ; codepoint != codepoints.end() ; ++ codepoint) {
            if(*codepoint == 8) { // \b
                json_string.push_back('\\');
                json_string.push_back('b');
            }
            else if(*codepoint == 9) {  // \t
                json_string.push_back('\\');
                json_string.push_back('t');
            }
            else if(*codepoint == 10) { // \n
                json_string.push_back('\\');
                json_string.push_back('n');
            }
            else if(*codepoint == 12) { // \f
                json_string.push_back('\\');
                json_string.push_back('f');
            }
            else if(*codepoint == 13) { // \r
                json_string.push_back('\\');
                json_string.push_back('r');
            }
            else if(*codepoint == 34) { // "
                json_string.push_back('\\');
                json_string.push_back('"');
            }
            else if(*codepoint == 47) { // /
                json_string.push_back('\\');
                json_string.push_back('/');
            }
            else if(*codepoint == 92) {
                json_string.push_back('\\');
                json_string.push_back('\\');
            }
            else if(*codepoint < 32 || *codepoint == 127 || (*codepoint >= 128 && *codepoint <= 159)) {
                json_string += json_encode_control_char(*codepoint);
            }
            else {
                json_string += utf8_encode(*codepoint);
            }
        }
        return json_string;
    }
 }
	#include <string>
	#include <sstream>
	#include <fstream>
	#include <vector>
	#include <map>
	#include <cmath>
	#include <limits>
	#include "utf8_json"


	template<typename T>
	struct is_container : std::false_type {};

	template<typename T>
	struct is_associative_container : std::false_type {};

	// vector overload
	template<typename T>
	struct is_container< std::vector<T> > : std::true_type {};

	// map overload
	template<typename K, typename V>
	struct is_container< std::map<K, V> > : std::true_type {};

	template<typename V>
	struct is_associative_container< std::map<std::string, V> > : std::true_type {};



	class json_stream {

	public:
	json_stream(const std::string& path) : _stream(path.c_str())
	{}

	template<typename T>
	json_stream& operator<<(const T& data) {
	if (_stream.is_open()) {
	_stream << dump(data);
	}
	return *this;
	}

	private:
	// forward standard manipulator like std::endl
	typedef std::ostream& (*ostream_manipulator)(std::ostream&);
	json_stream& operator<<(ostream_manipulator pf) {
	if (_stream.is_open()) {
	_stream << pf;
	}
	return *this;
	}

	template<typename T>
	std::string dump(const T& t) const {
	// dispatch to actual dump method:
	// * not iterable type dumped as simple value
	// * iterable type
	// * with mapped value dumped as mapped_container
	// * otherwise dumped as simple_container
	return dump_value_or_container(t, typename is_container<T>::type());
	}

	// dispatch to correct dump method
	template<typename T>
	std::string dump_value_or_container(const T& t, std::false_type) const {
	return dump_value(t);
	}

	template<typename T>
	std::string dump_value_or_container(const T& t, std::true_type) const {
	return dump_simple_or_associative_container(t, typename is_associative_container<T>::type());
	}

	template<typename T>
	std::string dump_simple_or_associative_container(const T& t, std::false_type) const {
	return dump_simple_container(t);
	}

	template<typename T>
	std::string dump_simple_or_associative_container(const T& t, std::true_type) const {
	return dump_associative_container(t);
	}

	// implement type specific serialization
	template<typename V>
	std::string dump_value(const V& value) const {
	std::ostringstream oss;
	oss << sanitize(value);
	return oss.str();
	}

	std::string dump_value(const std::string& value) const {
	return "\"" + sanitize(value) + "\"";
	}

	template<typename K, typename V>
	std::string dump_value(const std::pair<const K, V>& pair) const {
	std::ostringstream oss;
	oss << "[" << dump(pair.first) << ", " << dump(pair.second) << "]";
	return oss.str();
	}

	template<typename V>
	std::string dump_pair(const std::pair<const std::string, V>& pair) const {
	std::ostringstream oss;
	oss << dump(pair.first) << ": " << dump(pair.second);
	return oss.str();
	}

	template<typename C>
	std::string dump_simple_container(const C& container) const
	{
	std::ostringstream oss;
	typename C::const_iterator it = container.begin();

	oss << "[" << dump(*it);
	for (++ it ; it != container.end() ; ++ it) {
	oss << ", " << dump(*it);
	}
	oss << "]";

	return oss.str();
	}

	template<typename M>
	std::string dump_associative_container(const M& map) const
	{
	std::ostringstream oss;
	typename M::const_iterator it = map.begin();

	oss << "{" << dump_pair(*it);
	for (++ it ; it != map.end() ; ++ it) {
	oss << ", " << dump_pair(*it);
	}
	oss << "}";

	return oss.str();
	}

	template<typename T>
	T sanitize(const T& t) const {
	return t;
	}

	template <typename T>
	int sgn(const T& val) const {
	return (T(0) < val) - (val < T(0));
	}

	double sanitize(const double d) const {
	if(std::isfinite(d)) {
	return d;
	}
	else {
	if(std::isinf(d)) {
	return sgn(d) * std::numeric_limits<double>::max();
	}
	return 0.;
	}
	}

	double sanitize(const float f) const {
	return sanitize(static_cast<double>(f));
	}

	std::string sanitize(std::string const& input) const {
	return utf8_json::json_encode_codepoints(utf8_json::decode_utf8(input));
	}

	std::ofstream _stream;
	};
	#include "json_stream"

	#include <map>
	#include <vector>
	#include <iostream>

	int main() {
	json_stream js(std::string("/tmp/toto"));

	std::map< std::string, std::vector<float> > object;
	std::vector<float> data = { 1., std::numeric_limits<double>::quiet_NaN(), -std::numeric_limits<float>::infinity() };
	std::string name = u8"foo+é+\n\r\b\t+\v\0+∞";
	object[name] = data;

	js << object;
	return 0;
	}
	#include <string>
	#include <sstream>
	#include <iomanip>


	namespace utf8_json {
	inline unsigned int mask8(char const value) {
	return value & 0xff;
	}

	inline bool is_valid_continuation_byte(unsigned int byte) {
	return ((byte & 0xC0) == 0x80);
	}

	inline int get_next_byte(std::string::const_iterator& iterator, std::string::const_iterator end_iterator) {
	if(iterator != end_iterator) {
	return mask8(*(++ iterator));
	}
	else {
	return 0; // invalid continuation byte
	}
	}

	void insert_replacement(std::vector<unsigned int>& output, unsigned int replacement, unsigned int count) {
	for(unsigned int i = 0 ; i < count ; ++ i) {
	output.push_back(replacement);
	}
	}

	std::vector<unsigned int> decode_utf8(const std::string& input, const int replacement=0xfffd) {
	unsigned int code_unit1, code_unit2, code_unit3, code_unit4;
	std::vector<unsigned int> codepoints;

	for(std::string::const_iterator iterator = input.begin() ; iterator != input.end() ; ++ iterator) {
	code_unit1 = mask8(*iterator);
	if (code_unit1 < 0x80) {
	codepoints.push_back(code_unit1);
	}
	else if (code_unit1 < 0xC2) { // continuation or overlong 2-byte sequence
	codepoints.push_back(replacement);
	}
	else if (code_unit1 < 0xE0) { // 2-byte sequence
	code_unit2 = get_next_byte(iterator, input.end());

	if (!is_valid_continuation_byte(code_unit2)) {
	insert_replacement(codepoints, replacement, 2);
	}
	else {
	codepoints.push_back((code_unit1 << 6) + code_unit2 - 0x3080);
	}
	}
	else if (code_unit1 < 0xF0) { // 3-byte sequence
	code_unit2 = get_next_byte(iterator, input.end());

	if (!is_valid_continuation_byte(code_unit2) \|\|
	(code_unit1 == 0xE0 && code_unit2 < 0xA0)) /* overlong */ {
	insert_replacement(codepoints, replacement, 2);
	}
	else {
	code_unit3 = get_next_byte(iterator, input.end());

	if (!is_valid_continuation_byte(code_unit3)) {
	insert_replacement(codepoints, replacement, 3);
	}
	else {
	codepoints.push_back((code_unit1 << 12) + (code_unit2 << 6) + code_unit3 - 0xE2080);
	}
	}
	}
	else if (code_unit1 < 0xF5) { // 4-byte sequence
	code_unit2 = get_next_byte(iterator, input.end());
	if(!is_valid_continuation_byte(code_unit2) \|\|
	(code_unit1 == 0xF0 && code_unit2 < 0x90) \|\| /* overlong */
	(code_unit1 == 0xF4 && code_unit2 >= 0x90)) { /* > U+10FFFF */
	insert_replacement(codepoints, replacement, 2);
	}
	else {
	code_unit3 = get_next_byte(iterator, input.end());
	if(!is_valid_continuation_byte(code_unit3)) {
	insert_replacement(codepoints, replacement, 3);
	}
	else {
	code_unit4 = get_next_byte(iterator, input.end());
	if(!is_valid_continuation_byte(code_unit4)) {
	insert_replacement(codepoints, replacement, 4);
	}
	else {
	codepoints.push_back((code_unit1 << 18) + (code_unit2 << 12) + (code_unit3 << 6) + code_unit4 - 0x3C82080);
	}
	}
	}
	}
	else {
	/* > U+10FFFF */
	insert_replacement(codepoints, replacement, 1);
	}
	}
	return codepoints;
	}


	std::string json_encode_control_char(unsigned int codepoint) {
	std::ostringstream oss;
	oss.fill('0');
	oss << "\\u" << std::setw(4) << std::hex << codepoint;
	return oss.str();
	}


	std::string utf8_encode(unsigned int codepoint) {
	std::string output;

	if(codepoint > 0x590 && codepoint < 0x5F4) {
	return output;
	}

	// out of range
	if(codepoint > 1114112) {
	return utf8_encode(0xfffd);
	}

	if (codepoint < 0x80) {
	output.push_back(codepoint);
	}
	else if (codepoint <= 0x7FF) {
	output.push_back((codepoint >> 6) + 0xC0);
	output.push_back((codepoint & 0x3F) + 0x80);
	}
	else if (codepoint <= 0xFFFF) {
	output.push_back((codepoint >> 12) + 0xE0);
	output.push_back(((codepoint >> 6) & 0x3F) + 0x80);
	output.push_back((codepoint & 0x3F) + 0x80);
	}
	else if (codepoint <= 0x10FFFF) {
	output.push_back((codepoint >> 18) + 0xF0);
	output.push_back(((codepoint >> 12) & 0x3F) + 0x80);
	output.push_back(((codepoint >> 6) & 0x3F) + 0x80);
	output.push_back((codepoint & 0x3F) + 0x80);
	}
	return output;
	}


	std::string json_encode_codepoints(std::vector<unsigned int> const& codepoints) {
	std::string json_string;

	for(std::vector<unsigned int>::const_iterator codepoint = codepoints.begin() ; codepoint != codepoints.end() ; ++ codepoint) {
	if(*codepoint == 8) { // \b
	json_string.push_back('\\');
	json_string.push_back('b');
	}
	else if(*codepoint == 9) { // \t
	json_string.push_back('\\');
	json_string.push_back('t');
	}
	else if(*codepoint == 10) { // \n
	json_string.push_back('\\');
	json_string.push_back('n');
	}
	else if(*codepoint == 12) { // \f
	json_string.push_back('\\');
	json_string.push_back('f');
	}
	else if(*codepoint == 13) { // \r
	json_string.push_back('\\');
	json_string.push_back('r');
	}
	else if(*codepoint == 34) { // "
	json_string.push_back('\\');
	json_string.push_back('"');
	}
	else if(*codepoint == 47) { // /
	json_string.push_back('\\');
	json_string.push_back('/');
	}
	else if(*codepoint == 92) {
	json_string.push_back('\\');
	json_string.push_back('\\');
	}
	else if(codepoint < 32 \|\| codepoint == 127 \|\| (codepoint >= 128 && codepoint <= 159)) {
	json_string += json_encode_control_char(*codepoint);
	}
	else {
	json_string += utf8_encode(*codepoint);
	}
	}
	return json_string;
	}
	}