Skip to content

Instantly share code, notes, and snippets.

@Kodiologist
Last active July 24, 2023 16:52
Show Gist options
  • Save Kodiologist/c78c8d3bf3ae232425eb1e8d2902e569 to your computer and use it in GitHub Desktop.
Save Kodiologist/c78c8d3bf3ae232425eb1e8d2902e569 to your computer and use it in GitHub Desktop.
Example of writing and reading a Parquet file with XIS temperature predictions
from datetime import date
import pyarrow as pa
import pyarrow.parquet
d = pa.Table.from_pydict(dict(
date = [date(2004, 7, 15)] * (48 + 1),
# Approximate centroids of the lower 48 states, plus DC.
lon = [-82.4582, -109.6296, -94.3071, -76.7647, -80.8914, -69.2428, -77.0163, -71.5538, -99.7947, -120.4494, -106.111, -100.2269, -99.3213, -119.4527, -86.8321, -83.4384, -92.444, -77.8015, -92.459, -111.6673, -97.4937, -86.3525, -107.5496, -86.2834, -98.3788, -114.607, -116.6251, -89.1989, -72.6675, -72.7269, -74.6642, -100.4658, -93.4967, -71.5819, -111.6567, -75.4997, -85.3034, -82.7961, -89.9957, -120.5568, -89.6696, -105.5465, -79.3739, -78.825, -80.6253, -91.9736, -75.5119, -85.3843, -71.7952],
lat = [28.6196, 47.0513, 46.2804, 39.0379, 33.9072, 45.3598, 38.9047, 41.6757, 41.539, 47.3827, 34.4105, 44.4446, 31.4765, 37.1948, 32.7786, 32.6397, 34.8953, 40.8793, 38.3577, 39.3071, 35.5913, 35.8614, 42.9963, 39.8947, 38.4957, 44.3518, 39.3317, 40.0426, 44.0694, 41.6207, 40.18, 47.4495, 42.0757, 43.6795, 34.2778, 38.9851, 37.5371, 40.288, 44.6248, 43.9356, 32.7373, 38.9992, 35.5574, 37.5232, 38.642, 31.0535, 42.9384, 44.3193, 42.2552],
temp_min_cK = [29629, 28927, 28514, 29110, 29489, 28803, 29385, 28991, 29010, 28806, 29113, 29313, 29309, 29156, 29525, 29426, 29607, 28816, 29007, 28880, 29661, 29089, 28844, 28842, 29354, 28265, 28816, 28951, 28844, 29052, 29073, 29119, 28946, 28830, 29614, 29121, 28991, 28775, 28438, 28413, 29648, 28377, 29197, 29037, 28755, 29801, 28883, 28501, 29159],
temp_mean_cK = [30067, 29685, 29329, 29589, 30095, 28940, 29832, 29361, 29788, 29550, 29857, 29943, 30172, 29949, 30009, 30018, 30111, 29137, 29735, 29456, 30344, 29653, 29416, 29452, 30236, 28982, 29703, 29572, 28998, 29486, 29535, 29665, 29639, 29085, 30327, 29621, 29662, 29439, 29412, 29517, 30118, 28981, 29797, 29625, 29359, 30317, 29116, 29164, 29438],
temp_max_cK = [30587, 30426, 30031, 30097, 30762, 29066, 30211, 29914, 30742, 30306, 30564, 30616, 31014, 30725, 30500, 30693, 30783, 29477, 30488, 30059, 31001, 30227, 29866, 30046, 30987, 29681, 30475, 30086, 29332, 30024, 30064, 30122, 30208, 29451, 31118, 30122, 30254, 30050, 30192, 30564, 30842, 30033, 30450, 30124, 30073, 30992, 29609, 29799, 29929]))
pa.parquet.write_table(d, 'example.parquet')
print(pa.parquet.read_table('example.parquet'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment