Skip to content

Instantly share code, notes, and snippets.

@h-mayorquin
Last active January 21, 2022 14:46
Show Gist options
  • Save h-mayorquin/090e154a7637d88d85b724098b3b940d to your computer and use it in GitHub Desktop.
Save h-mayorquin/090e154a7637d88d85b724098b3b940d to your computer and use it in GitHub Desktop.
Calculates the compression for a given dandi-set
import h5py
from dandi.dandiapi import DandiAPIClient
from pynwb import NWBHDF5IO
import pynwb
import numpy as np
dandi_dataset_id = "000166"
dandi_dataset_id = "000213"
with DandiAPIClient() as client:
dandi_set = client.get_dandiset(dandi_dataset_id, "draft")
# Order assets by size for faster speed
asset_list = list(dandi_set.get_assets())
asset_list.sort(key=lambda x:x.size)
number_of_sessions = 8
asset_size_dic = {}
compression_ratio_dic = {}
for index in range(number_of_sessions):
print(index)
asset = asset_list[index]
asset_size_gb = asset.size / 10 ** 9
asset_size_dic[index] = asset_size_gb
print(asset.path)
s3_path = asset.get_content_url(follow_redirects=1, strip_query=True)
with NWBHDF5IO(s3_path, mode='r', load_namespaces=True, driver='ros3') as io:
nwbfile = io.read()
compression_ratio_list = []
for key1, obj in nwbfile.objects.items():
for key2, field in obj.fields.items():
if isinstance(field, h5py.Dataset) and isinstance(obj, pynwb.ecephys.ElectricalSeries):
compression_ratio = field.id.get_storage_size() / (field.size * field.dtype.itemsize)
compression_ratio_list.append(compression_ratio)
compression_ratio_dic[index] = compression_ratio_list
compression_averages_dic = {index:np.mean(compression) for index, compression in compression_ratio_dic.items()}
overall_average = np.mean(list(compression_averages_dic.values()))
print("""""")
print("Asset size")
print(asset_size_dic)
print("Compression averages")
print(compression_averages_dic)
print("Average of averages")
print(overall_average)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment