Last active
January 21, 2022 14:46
-
-
Save h-mayorquin/090e154a7637d88d85b724098b3b940d to your computer and use it in GitHub Desktop.
Calculates the compression for a given dandi-set
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import h5py | |
from dandi.dandiapi import DandiAPIClient | |
from pynwb import NWBHDF5IO | |
import pynwb | |
import numpy as np | |
dandi_dataset_id = "000166" | |
dandi_dataset_id = "000213" | |
with DandiAPIClient() as client: | |
dandi_set = client.get_dandiset(dandi_dataset_id, "draft") | |
# Order assets by size for faster speed | |
asset_list = list(dandi_set.get_assets()) | |
asset_list.sort(key=lambda x:x.size) | |
number_of_sessions = 8 | |
asset_size_dic = {} | |
compression_ratio_dic = {} | |
for index in range(number_of_sessions): | |
print(index) | |
asset = asset_list[index] | |
asset_size_gb = asset.size / 10 ** 9 | |
asset_size_dic[index] = asset_size_gb | |
print(asset.path) | |
s3_path = asset.get_content_url(follow_redirects=1, strip_query=True) | |
with NWBHDF5IO(s3_path, mode='r', load_namespaces=True, driver='ros3') as io: | |
nwbfile = io.read() | |
compression_ratio_list = [] | |
for key1, obj in nwbfile.objects.items(): | |
for key2, field in obj.fields.items(): | |
if isinstance(field, h5py.Dataset) and isinstance(obj, pynwb.ecephys.ElectricalSeries): | |
compression_ratio = field.id.get_storage_size() / (field.size * field.dtype.itemsize) | |
compression_ratio_list.append(compression_ratio) | |
compression_ratio_dic[index] = compression_ratio_list | |
compression_averages_dic = {index:np.mean(compression) for index, compression in compression_ratio_dic.items()} | |
overall_average = np.mean(list(compression_averages_dic.values())) | |
print("""""") | |
print("Asset size") | |
print(asset_size_dic) | |
print("Compression averages") | |
print(compression_averages_dic) | |
print("Average of averages") | |
print(overall_average) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment