This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import pyarrow as pa | |
import ibis | |
from pyspark.sql import SparkSession | |
# create example data in a pandas DataFrame | |
df = pd.DataFrame(data={'fruit': ['apple', 'apple', 'apple', 'orange', 'orange', 'orange'], | |
'variety': ['gala', 'honeycrisp', 'fuji', 'navel', 'valencia', 'cara cara'], | |
'weight': [134.2 , 158.6, None, 142.1, 96.7, None]}) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <arrow/api.h> | |
#include <arrow/type.h> | |
#include <arrow/result.h> | |
#include <arrow/io/api.h> | |
#include <arrow/compute/api.h> | |
#include <arrow/acero/exec_plan.h> | |
#include <arrow/acero/options.h> | |
#include <parquet/arrow/reader.h> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <arrow/api.h> | |
#include <arrow/type.h> | |
#include <arrow/result.h> | |
#include <arrow/io/api.h> | |
#include <arrow/compute/api.h> | |
#include <arrow/acero/exec_plan.h> | |
#include <arrow/acero/options.h> | |
#include <parquet/arrow/reader.h> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <arrow/api.h> | |
#include <arrow/type.h> | |
#include <arrow/result.h> | |
#include <arrow/io/api.h> | |
#include <arrow/compute/api.h> | |
#include <arrow/acero/exec_plan.h> | |
#include <arrow/acero/options.h> | |
#include <parquet/arrow/reader.h> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyarrow as pa | |
import pyarrow.parquet as pq | |
import random | |
import string | |
# write parquet files | |
original = [] | |
for i in range(3): | |
data = [[random.uniform(0, 1) for _ in range(1000000)]] | |
original.extend(data) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <arrow/api.h> | |
#include <arrow/io/api.h> | |
#include <arrow/util/float16.h> | |
#include <parquet/arrow/writer.h> | |
arrow::Status WriteTableToParquetFile() { | |
std::shared_ptr<arrow::Array> array; | |
arrow::HalfFloatBuilder builder; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <random> | |
#include <arrow/api.h> | |
#include <arrow/io/api.h> | |
#include <parquet/arrow/writer.h> | |
float GetRandomFloat() | |
{ | |
static std::default_random_engine e; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <random> | |
#include <vector> | |
#include <string> | |
#include <arrow/api.h> | |
#include <arrow/io/api.h> | |
#include <parquet/arrow/writer.h> | |
std::vector<std::string> GenerateUniqueStrings() { | |
// generates 26^4 = 456,976 unique 4-letter combinations |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <arrow/api.h> | |
#include <arrow/compute/api.h> | |
int main(int, char**) { | |
// lookup set | |
std::shared_ptr<arrow::Array> array; | |
arrow::Int32Builder builder; | |
if (!builder.Append(5).ok()) return 1; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tempfile | |
import pathlib | |
import numpy as np | |
import pyarrow as pa | |
import pyarrow.compute as pc | |
import pyarrow.parquet as pq | |
import pyarrow.dataset as ds | |
# create a small dataset for example purposes |