Skip to content

Instantly share code, notes, and snippets.

@riga
Last active January 11, 2023 08:22
Show Gist options
  • Save riga/6e04cae06d65cb78f17ae625d83de0b0 to your computer and use it in GitHub Desktop.
Save riga/6e04cae06d65cb78f17ae625d83de0b0 to your computer and use it in GitHub Desktop.
# coding: utf-8
import dask_awkward as dak
import awkward as ak
# located at https://cernbox.cern.ch/remote.php/dav/public-files/rvipIIozKQAypwu/calib.parquet
input_path = "calib.parquet"
# same file, saved with extensionarray=False
# located at https://cernbox.cern.ch/remote.php/dav/public-files/xQTpSqrvZQSSTZp/calib_no_extensionarray.parquet
# input_path = "calib_no_extensionarray.parquet"
def load(**kwargs):
dak_array = dak.from_parquet(input_path, **kwargs)
print(f"npartitions: {dak_array.npartitions}")
print(f"divisions : {dak_array.divisions}")
# compute
ak_array = dak_array.compute()
print(f"ak array : {ak_array}")
return ak_array
print(f"dask_awkward {dak.__version__}")
print(f"awkward {ak.__version__}")
print(80 * "-")
# load all columns
load() # works nicely
print(80 * "-")
# load some 1st-level columns
load(columns=["mc_weight", "deterministic_seed"]) # works nicely
print(80 * "-")
# load all (!) 2nd-level columns under "Jet"
load(columns=["Jet.*"]) # works nicely
print(80 * "-")
# load a subset of 2nd-level columns under "Jet"
load(columns=["Jet.pt"]) # segfaults at compute()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment