Created
February 24, 2026 00:11
-
-
Save lemire/05cdd4925572a03199ff30d786921c8c to your computer and use it in GitHub Desktop.
simdjson example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| This program serves as a comprehensive test suite for the consume_array and | |
| consume_object functions, which are designed to parse specific JSON structures | |
| using the simdjson library. The primary goal is to validate the parsing of | |
| arrays and objects that contain a string, a double, and an optional integer | |
| value. The test generates a large JSON array consisting of 200 elements: the | |
| first 100 are sub-arrays, and the next 100 are objects. This allows for thorough | |
| testing of both parsing paths under various conditions. | |
| The consume_array function takes a simdjson ondemand::array and extracts three | |
| values: a string_view from the first element, a double from the second, and an | |
| int64_t from the third if present. If the third element is missing, the integer | |
| is defaulted to 0. The function uses error handling to ensure robust parsing and | |
| returns an expected tuple containing the parsed values or an error code. This | |
| function demonstrates how to handle optional elements in JSON arrays using | |
| iterators and type-safe extraction. | |
| Similarly, the consume_object function processes a simdjson ondemand::object, | |
| retrieving the "str" field as a string_view, the "num" field as a double, and | |
| optionally the "integer" field as an int64_t. If the "integer" field is absent, | |
| it defaults to 0. The function checks for field existence using error codes, | |
| providing a safe way to parse JSON objects with optional fields. Both functions | |
| are noexcept and return expected types for modern C++ error handling. | |
| In the main function, a JSON string is dynamically constructed using std::format | |
| to create the test data. For arrays, elements are formatted as ["string{i}", | |
| i.0, optional i*100], and for objects as {"str":"string{i}", "num": i.0, | |
| optional "integer": i*100}. Every 10th element omits the optional integer to | |
| test missing field handling. The JSON is parsed using simdjson's ondemand | |
| parser, and each element is processed based on its type (array or object). | |
| Expected values are precomputed and compared against parsed results to ensure | |
| accuracy. The program outputs parsing success for each element and a final | |
| summary, making it easy to verify the correctness of the parsing logic. | |
| */ | |
| #include "simdjson.h" | |
| #include <expected> | |
| #include <format> | |
| #include <iomanip> | |
| #include <iostream> | |
| #include <tuple> | |
| #include <vector> | |
| std::expected<std::tuple<std::string_view, double, int64_t>, | |
| simdjson::error_code> | |
| consume_array(simdjson::ondemand::array arr) noexcept { | |
| std::string_view str; | |
| double num; | |
| int64_t integer{}; | |
| auto it = arr.begin(); | |
| simdjson::ondemand::value v = *it; | |
| if (auto error = v.get_string().get(str)) { | |
| return std::unexpected(error); | |
| } | |
| ++it; | |
| v = *it; | |
| if (auto error = v.get_double().get(num)) { | |
| return std::unexpected(error); | |
| } | |
| ++it; | |
| if (it != arr.end()) { | |
| v = *it; | |
| if (auto error = v.get_int64().get(integer)) { | |
| return std::unexpected(error); | |
| } | |
| } | |
| return std::tuple{str, num, integer}; | |
| } | |
| std::expected<std::tuple<std::string_view, double, int64_t>, | |
| simdjson::error_code> | |
| consume_object(simdjson::ondemand::object obj) noexcept { | |
| std::string_view str; | |
| double num; | |
| int64_t integer{}; | |
| if (auto error = obj["str"].get_string().get(str)) { | |
| return std::unexpected(error); | |
| } | |
| if (auto error = obj["num"].get_double().get(num)) { | |
| return std::unexpected(error); | |
| } | |
| auto int_field = obj["integer"]; | |
| if (int_field.error() != simdjson::error_code::NO_SUCH_FIELD) { | |
| if (auto error = int_field.get_int64().get(integer)) { | |
| return std::unexpected(error); | |
| } | |
| } | |
| return std::tuple{str, num, integer}; | |
| } | |
| simdjson::padded_string generate_json(const int num_elements = 200) { | |
| simdjson::padded_string_builder builder; | |
| builder.append("["); | |
| for (int i = 0; i < num_elements; ++i) { | |
| if (i > 0) | |
| builder.append(","); | |
| if (i < 100) { | |
| // array | |
| builder.append( | |
| std::format(R"(["string{}", {:.1f})", i, static_cast<double>(i))); | |
| if (i % 10 != 0) { | |
| builder.append(std::format(R"(, {})", i * 100LL)); | |
| } | |
| builder.append("]"); | |
| } else { | |
| // object | |
| int data_i = i; | |
| builder.append(std::format(R"({{"str":"string{}", "num": {:.1f})", data_i, | |
| static_cast<double>(data_i))); | |
| if (data_i % 10 != 0) { | |
| builder.append(std::format(R"(, "integer": {})", data_i * 100LL)); | |
| } | |
| builder.append("}"); | |
| } | |
| } | |
| builder.append("]"); | |
| return builder.convert(); | |
| } | |
| int main() { | |
| const int num_elements = 200; | |
| simdjson::padded_string json = generate_json(num_elements); | |
| simdjson::ondemand::parser parser; | |
| simdjson::ondemand::document doc = parser.iterate(json); | |
| simdjson::ondemand::array arr = doc.get_array(); | |
| std::vector<std::tuple<std::string, double, int64_t>> expected; | |
| for (int i = 0; i < num_elements; ++i) { | |
| int64_t integer = (i % 10 != 0) ? i * 100LL : 0; | |
| expected.emplace_back("string" + std::to_string(i), i * 1.0, integer); | |
| } | |
| size_t index = 0; | |
| for (auto val : arr) { | |
| std::expected<std::tuple<std::string_view, double, int64_t>, | |
| simdjson::error_code> | |
| result; | |
| if (val.type() == simdjson::ondemand::json_type::array) { | |
| simdjson::ondemand::array sub_arr; | |
| if (auto error = val.get_array().get(sub_arr)) { | |
| std::cerr << "Error getting array at index " << index << ": " << error | |
| << std::endl; | |
| return EXIT_FAILURE; | |
| } | |
| result = consume_array(sub_arr); | |
| } else if (val.type() == simdjson::ondemand::json_type::object) { | |
| simdjson::ondemand::object sub_obj; | |
| if (auto error = val.get_object().get(sub_obj)) { | |
| std::cerr << "Error getting object at index " << index << ": " << error | |
| << std::endl; | |
| return EXIT_FAILURE; | |
| } | |
| result = consume_object(sub_obj); | |
| } else { | |
| std::cerr << "Unexpected type at index " << index << std::endl; | |
| return EXIT_FAILURE; | |
| } | |
| if (!result) { | |
| std::cerr << "Error parsing at index " << index << ": " << result.error() | |
| << std::endl; | |
| return EXIT_FAILURE; | |
| } | |
| auto [str, num, integer] = *result; | |
| auto [exp_str, exp_num, exp_int] = expected[index]; | |
| if (str != exp_str || num != exp_num || integer != exp_int) { | |
| std::cerr << "Mismatch at index " << index << ": expected (" << exp_str | |
| << ", " << exp_num << ", " << exp_int << ") but got (" << str | |
| << ", " << num << ", " << integer << ")" << std::endl; | |
| return EXIT_FAILURE; | |
| } | |
| std::cout << "Element " << index << " parsed successfully." << std::endl; | |
| index++; | |
| } | |
| std::cout << "All elements parsed successfully." << std::endl; | |
| return EXIT_SUCCESS; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment