Last active
May 19, 2022 20:26
-
-
Save kylebarron/73d4a126499ef113add6ea1dca04e79f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This file is automatically @generated by Cargo. | |
# It is not intended for manual editing. | |
version = 3 | |
[[package]] | |
name = "adler" | |
version = "1.0.2" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" | |
[[package]] | |
name = "alloc-no-stdlib" | |
version = "2.0.3" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "35ef4730490ad1c4eae5c4325b2a95f521d023e5c885853ff7aca0a6a1631db3" | |
[[package]] | |
name = "alloc-stdlib" | |
version = "0.2.1" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "697ed7edc0f1711de49ce108c541623a0af97c6c60b2f6e2b65229847ac843c2" | |
dependencies = [ | |
"alloc-no-stdlib", | |
] | |
[[package]] | |
name = "async-stream" | |
version = "0.3.3" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "dad5c83079eae9969be7fadefe640a1c566901f05ff91ab221de4b6f68d9507e" | |
dependencies = [ | |
"async-stream-impl", | |
"futures-core", | |
] | |
[[package]] | |
name = "async-stream-impl" | |
version = "0.3.3" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "10f203db73a71dfa2fb6dd22763990fa26f3d2625a6da2da900d23b87d26be27" | |
dependencies = [ | |
"proc-macro2", | |
"quote", | |
"syn", | |
] | |
[[package]] | |
name = "async-trait" | |
version = "0.1.53" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "ed6aa3524a2dfcf9fe180c51eae2b58738348d819517ceadf95789c51fff7600" | |
dependencies = [ | |
"proc-macro2", | |
"quote", | |
"syn", | |
] | |
[[package]] | |
name = "bitpacking" | |
version = "0.8.4" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "a8c7d2ac73c167c06af4a5f37e6e59d84148d57ccbe4480b76f0273eefea82d7" | |
dependencies = [ | |
"crunchy", | |
] | |
[[package]] | |
name = "brotli" | |
version = "3.3.4" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "a1a0b1dbcc8ae29329621f8d4f0d835787c1c38bb1401979b49d13b0b305ff68" | |
dependencies = [ | |
"alloc-no-stdlib", | |
"alloc-stdlib", | |
"brotli-decompressor", | |
] | |
[[package]] | |
name = "brotli-decompressor" | |
version = "2.3.2" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "59ad2d4653bf5ca36ae797b1f4bb4dbddb60ce49ca4aed8a2ce4829f60425b80" | |
dependencies = [ | |
"alloc-no-stdlib", | |
"alloc-stdlib", | |
] | |
[[package]] | |
name = "cc" | |
version = "1.0.73" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" | |
dependencies = [ | |
"jobserver", | |
] | |
[[package]] | |
name = "cfg-if" | |
version = "1.0.0" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" | |
[[package]] | |
name = "crc32fast" | |
version = "1.3.2" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" | |
dependencies = [ | |
"cfg-if", | |
] | |
[[package]] | |
name = "crunchy" | |
version = "0.2.2" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" | |
[[package]] | |
name = "fallible-streaming-iterator" | |
version = "0.1.9" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" | |
[[package]] | |
name = "flate2" | |
version = "1.0.23" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "b39522e96686d38f4bc984b9198e3a0613264abaebaff2c5c918bfa6b6da09af" | |
dependencies = [ | |
"cfg-if", | |
"crc32fast", | |
"libc", | |
"miniz_oxide", | |
] | |
[[package]] | |
name = "futures" | |
version = "0.3.21" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "f73fe65f54d1e12b726f517d3e2135ca3125a437b6d998caf1962961f7172d9e" | |
dependencies = [ | |
"futures-channel", | |
"futures-core", | |
"futures-executor", | |
"futures-io", | |
"futures-sink", | |
"futures-task", | |
"futures-util", | |
] | |
[[package]] | |
name = "futures-channel" | |
version = "0.3.21" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010" | |
dependencies = [ | |
"futures-core", | |
"futures-sink", | |
] | |
[[package]] | |
name = "futures-core" | |
version = "0.3.21" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3" | |
[[package]] | |
name = "futures-executor" | |
version = "0.3.21" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "9420b90cfa29e327d0429f19be13e7ddb68fa1cccb09d65e5706b8c7a749b8a6" | |
dependencies = [ | |
"futures-core", | |
"futures-task", | |
"futures-util", | |
] | |
[[package]] | |
name = "futures-io" | |
version = "0.3.21" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b" | |
[[package]] | |
name = "futures-macro" | |
version = "0.3.21" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "33c1e13800337f4d4d7a316bf45a567dbcb6ffe087f16424852d97e97a91f512" | |
dependencies = [ | |
"proc-macro2", | |
"quote", | |
"syn", | |
] | |
[[package]] | |
name = "futures-sink" | |
version = "0.3.21" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868" | |
[[package]] | |
name = "futures-task" | |
version = "0.3.21" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a" | |
[[package]] | |
name = "futures-util" | |
version = "0.3.21" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a" | |
dependencies = [ | |
"futures-channel", | |
"futures-core", | |
"futures-io", | |
"futures-macro", | |
"futures-sink", | |
"futures-task", | |
"memchr", | |
"pin-project-lite", | |
"pin-utils", | |
"slab", | |
] | |
[[package]] | |
name = "integer-encoding" | |
version = "3.0.3" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "0e85a1509a128c855368e135cffcde7eac17d8e1083f41e2b98c58bc1a5074be" | |
dependencies = [ | |
"async-trait", | |
"futures-util", | |
] | |
[[package]] | |
name = "jobserver" | |
version = "0.1.24" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa" | |
dependencies = [ | |
"libc", | |
] | |
[[package]] | |
name = "libc" | |
version = "0.2.126" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" | |
[[package]] | |
name = "lz4" | |
version = "1.23.3" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "4edcb94251b1c375c459e5abe9fb0168c1c826c3370172684844f8f3f8d1a885" | |
dependencies = [ | |
"libc", | |
"lz4-sys", | |
] | |
[[package]] | |
name = "lz4-sys" | |
version = "1.9.3" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "d7be8908e2ed6f31c02db8a9fa962f03e36c53fbfde437363eae3306b85d7e17" | |
dependencies = [ | |
"cc", | |
"libc", | |
] | |
[[package]] | |
name = "memchr" | |
version = "2.5.0" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" | |
[[package]] | |
name = "miniz_oxide" | |
version = "0.5.1" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "d2b29bd4bc3f33391105ebee3589c19197c4271e3e5a9ec9bfe8127eeff8f082" | |
dependencies = [ | |
"adler", | |
] | |
[[package]] | |
name = "parquet-format-async-temp" | |
version = "0.3.0" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "488c8b5f43521d019fade4bcc0ce88cce5da5fd26eb1d38b933807041f5930bf" | |
dependencies = [ | |
"async-trait", | |
"futures", | |
"integer-encoding", | |
] | |
[[package]] | |
name = "parquet-metadata-demo" | |
version = "0.1.0" | |
dependencies = [ | |
"parquet2", | |
] | |
[[package]] | |
name = "parquet2" | |
version = "0.12.0" | |
source = "git+https://github.com/jorgecarleitao/parquet2?branch=improve_meta_read#9427962ca7af01f99ccf5b960dc4bb3484ec9c3d" | |
dependencies = [ | |
"async-stream", | |
"bitpacking", | |
"brotli", | |
"flate2", | |
"futures", | |
"lz4", | |
"parquet-format-async-temp", | |
"snap", | |
"streaming-decompression", | |
"xxhash-rust", | |
"zstd", | |
] | |
[[package]] | |
name = "pin-project-lite" | |
version = "0.2.9" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" | |
[[package]] | |
name = "pin-utils" | |
version = "0.1.0" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" | |
[[package]] | |
name = "proc-macro2" | |
version = "1.0.39" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" | |
dependencies = [ | |
"unicode-ident", | |
] | |
[[package]] | |
name = "quote" | |
version = "1.0.18" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" | |
dependencies = [ | |
"proc-macro2", | |
] | |
[[package]] | |
name = "slab" | |
version = "0.4.6" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32" | |
[[package]] | |
name = "snap" | |
version = "1.0.5" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "45456094d1983e2ee2a18fdfebce3189fa451699d0502cb8e3b49dba5ba41451" | |
[[package]] | |
name = "streaming-decompression" | |
version = "0.1.0" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "9bc687acd5dc742c4a7094f2927a8614a68e4743ef682e7a2f9f0f711656cc92" | |
dependencies = [ | |
"fallible-streaming-iterator", | |
] | |
[[package]] | |
name = "syn" | |
version = "1.0.95" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "fbaf6116ab8924f39d52792136fb74fd60a80194cf1b1c6ffa6453eef1c3f942" | |
dependencies = [ | |
"proc-macro2", | |
"quote", | |
"unicode-ident", | |
] | |
[[package]] | |
name = "unicode-ident" | |
version = "1.0.0" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" | |
[[package]] | |
name = "xxhash-rust" | |
version = "0.8.5" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "074914ea4eec286eb8d1fd745768504f420a1f7b7919185682a4a267bed7d2e7" | |
[[package]] | |
name = "zstd" | |
version = "0.11.2+zstd.1.5.2" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" | |
dependencies = [ | |
"zstd-safe", | |
] | |
[[package]] | |
name = "zstd-safe" | |
version = "5.0.2+zstd.1.5.2" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" | |
dependencies = [ | |
"libc", | |
"zstd-sys", | |
] | |
[[package]] | |
name = "zstd-sys" | |
version = "2.0.1+zstd.1.5.2" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b" | |
dependencies = [ | |
"cc", | |
"libc", | |
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[package] | |
name = "parquet-metadata-demo" | |
version = "0.1.0" | |
edition = "2021" | |
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | |
[dependencies] | |
parquet2 = {git = "https://github.com/jorgecarleitao/parquet2", branch = "improve_meta_read"} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from io import BytesIO | |
import pyarrow as pa | |
import pyarrow.parquet as pq | |
def create_example_file_meta_data(): | |
data = { | |
"str": pa.array(["a", "b", "c", "d"], type=pa.string()), | |
"uint8": pa.array([1, 2, 3, 4], type=pa.uint8()), | |
"int32": pa.array([0, -2147483638, 2147483637, 1], type=pa.int32()), | |
"bool": pa.array([True, True, False, False], type=pa.bool_()), | |
} | |
table = pa.table(data) | |
metadata_collector = [] | |
pq.write_table(table, BytesIO(), metadata_collector=metadata_collector) | |
return table.schema, metadata_collector[0] | |
def main(): | |
schema, meta = create_example_file_meta_data() | |
print('created collector') | |
metadata_collector = [meta] * 30_000 | |
print('writing meta') | |
pq.write_metadata(schema, '_metadata', metadata_collector=metadata_collector) | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::{fs::File, time::Instant, io::BufReader}; | |
use parquet2::read::read_metadata; | |
fn main() { | |
let mut file = BufReader::new(File::open("_metadata").unwrap()); | |
let now = Instant::now(); | |
let _ = read_metadata(&mut file).unwrap(); | |
println!("Time to parse metadata: {}", now.elapsed().as_secs_f32()); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment