Created
July 5, 2022 11:16
-
-
Save jtanx/2b56e3acc02fc233b8f6d9aee36a57d2 to your computer and use it in GitHub Desktop.
Working example of writing a Parquet file in C++
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cmake_minimum_required(VERSION 3.10) | |
project(parq) | |
set(CMAKE_CXX_STANDARD 17) | |
set(CMAKE_CXX_STANDARD_REQUIRED TRUE) | |
find_package(Arrow REQUIRED) | |
add_executable(parq park.cc) | |
target_link_libraries(parq PRIVATE arrow_static parquet) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <arrow/io/file.h> | |
#include <parquet/stream_writer.h> | |
#include <stdint.h> | |
int main() | |
{ | |
std::shared_ptr<arrow::io::FileOutputStream> outfile; | |
PARQUET_ASSIGN_OR_THROW( | |
outfile, | |
arrow::io::FileOutputStream::Open("test.parquet")); | |
parquet::WriterProperties::Builder builder; | |
builder.compression(parquet::Compression::ZSTD); | |
std::shared_ptr<parquet::schema::GroupNode> schema; | |
parquet::schema::NodeVector fields; | |
fields.push_back(parquet::schema::PrimitiveNode::Make( | |
"field1", parquet::Repetition::REQUIRED, parquet::Type::BYTE_ARRAY, parquet::ConvertedType::UTF8 | |
)); | |
fields.push_back(parquet::schema::PrimitiveNode::Make( | |
"field2", parquet::Repetition::REQUIRED, parquet::Type::FLOAT | |
)); | |
fields.push_back(parquet::schema::PrimitiveNode::Make( | |
"field3", parquet::Repetition::REQUIRED, parquet::Type::INT32, parquet::ConvertedType::INT_32 | |
)); | |
schema = std::static_pointer_cast<parquet::schema::GroupNode>( | |
parquet::schema::GroupNode::Make("schema", parquet::Repetition::REQUIRED, fields)); | |
parquet::StreamWriter os{ | |
parquet::ParquetFileWriter::Open(outfile, schema, builder.build())}; | |
for (int i = 0; i < 1000; ++i) | |
{ | |
os << "TESTCOL" << 3.14f << int32_t(100) << parquet::EndRow; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment