Last active
February 24, 2023 23:31
-
-
Save rly/5c21903d892674fde8bd11a3e7800373 to your computer and use it in GitHub Desktop.
Tailored Python script to replace particular values in an NWB file to conform with DANDI upload requirements
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
import h5py | |
import numpy as np | |
import argparse | |
import pynwb | |
STR_DTYPE = h5py.special_dtype(vlen=str) | |
def adjust(filepath, print_changes): | |
print("-------------------------------------------------------------------") | |
print("Adjusting NWB file:", filepath) | |
with h5py.File(filepath, "a") as f: | |
# replace subject sex value "Male" with "M" | |
sex_value = f["/general/subject/sex"][()].decode("utf-8") | |
assert sex_value in ("Male", "M") | |
if sex_value == "Male": | |
new_sex_value = "M" | |
print(f"Adjusting subject sex from '{sex_value}' to '{new_sex_value}'.") | |
f["/general/subject/sex"][()] = new_sex_value | |
# replace subject species value "Rat" with "Rattus norvegicus" | |
species_value = f["/general/subject/species"][()].decode("utf-8") | |
assert species_value in ("Rat", "Rattus norvegicus") | |
if species_value == "Rat": | |
new_species_value = "Rattus norvegicus" | |
print( | |
f"Adjusting subject species from '{species_value}' to '{new_species_value}'." | |
) | |
f["/general/subject/species"][()] = new_species_value | |
# add subject age dataset with value "P4M/P8M" | |
if "age" not in f["/general/subject"]: | |
new_age_value = "P4M/P8M" | |
print(f"Adding missing subject age, set to '{new_age_value}'.") | |
f["/general/subject"].create_dataset( | |
name="age", data=new_age_value, dtype=STR_DTYPE | |
) | |
# replace experimenter list value ["Abhilasha Joshi"] with ["Joshi, Abhilasha"] | |
experimenter_value = f["/general/experimenter"][:].astype(str) | |
assert experimenter_value in ( | |
np.array(["Abhilasha Joshi"]), | |
np.array(["Joshi, Abhilasha"]), | |
) | |
if experimenter_value == np.array(["Abhilasha Joshi"]): | |
new_experimenter_value = np.array(["Joshi, Abhilasha"], dtype=STR_DTYPE) | |
print( | |
f"Adjusting experimenter from {experimenter_value} to {new_experimenter_value}." | |
) | |
f["/general/experimenter"][:] = new_experimenter_value | |
# add keywords dataset with value "test" | |
# if "keywords" not in f["/general"]: | |
# new_keywords = ["test"] | |
# print(f"Adding missing keywords, set to {new_keywords}.") | |
# f["/general"].create_dataset(name="keywords", data=np.array(new_keywords, dtype=STR_DTYPE)) | |
# remove the "camera_sample_frame_counts" processing module | |
# NOTE: this does not shrink the file | |
if "camera_sample_frame_counts" in f["/processing"]: | |
print("Removing camera_sample_frame_counts processing module.") | |
del f["/processing/camera_sample_frame_counts"] | |
# change the external file path of all videos from absolute path to relative path | |
# NOTE: decision on 2023-02-21 to remove video files from the NWB file instead | |
# image_series_groups = f["/processing/video_files/video"].values() | |
# for image_series in image_series_groups: | |
# assert len(image_series["external_file"]) == 1 | |
# external_file_value = image_series["external_file"][:].astype(str) | |
# if external_file_value[0].startswith("/opt/data16/nwb_test/Jaq/nwb/"): | |
# new_external_file_value = np.array( | |
# [ | |
# external_file_value[0].replace( | |
# "/opt/data16/nwb_test/Jaq/nwb/", "../" | |
# ) | |
# ], | |
# dtype=STR_DTYPE, | |
# ) | |
# print( | |
# f"Adjusting external file value from {external_file_value} to {new_external_file_value}." | |
# ) | |
# image_series["external_file"][:] = new_external_file_value | |
# remove the "camera_sample_frame_counts" processing module | |
# NOTE: this does not shrink the file | |
# if "video_files" in f["/processing"]: | |
print("Removing video_files processing module.") | |
del f["/processing/video_files"] | |
# if the analog time series has no data, remove the "analog" processing module | |
# NOTE: this does not shrink the file | |
if len(f["/processing/analog/analog/analog/data"]) == 0: | |
print("Analog time series has no data. Removing analog processing module.") | |
del f["/processing/analog"] | |
# verify changes | |
if print_changes: | |
with pynwb.NWBHDF5IO(filepath, "a", load_namespaces=True) as io: | |
nwbfile = io.read() | |
print("Sex:", nwbfile.subject.sex) | |
assert nwbfile.subject.sex == "M" | |
print("Species:", nwbfile.subject.species) | |
assert nwbfile.subject.species == "Rattus norvegicus" | |
print("Age:", nwbfile.subject.age) | |
assert nwbfile.subject.age == "P4M/P8M" | |
print("Experimenter:", nwbfile.experimenter[:]) | |
assert nwbfile.experimenter == np.array(["Joshi, Abhilasha"]) | |
# print("Keywords:", nwbfile.keywords[:]) | |
# assert nwbfile.keywords == np.array(["test"]) | |
print( | |
"'camera_sample_frame_counts' processing module found:", | |
"camera_sample_frame_counts" in nwbfile.processing, | |
) | |
assert "camera_sample_frame_counts" not in nwbfile.processing | |
# for image_series in nwbfile.processing["video_files"][ | |
# "video" | |
# ].time_series.values(): | |
# print(image_series.external_file[:]) | |
# assert image_series.external_file[0].startswith("../") | |
print( | |
"'video_files' processing module found:", | |
"video_files" in nwbfile.processing, | |
) | |
assert "video_files" not in nwbfile.processing | |
print( | |
"'analog' processing module found:", | |
"analog" in nwbfile.processing, | |
) | |
assert ( | |
"analog" not in nwbfile.processing | |
or len(nwbfile.processing["analog"]["analog"]["analog"].data) > 0 | |
) | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"path", help="path to the NWB file or directory of NWB files to adjust" | |
) | |
parser.add_argument( | |
"--print", | |
action="store_true", | |
help="whether to print the adjusted values from the NWB file", | |
) | |
args = parser.parse_args() | |
path = args.path | |
if path.endswith(".nwb"): | |
filepaths = [path] | |
else: | |
filepaths = glob.glob(path + "*.nwb") | |
print("Adjusting these NWB files:", filepaths, sep="\n") | |
for filepath in filepaths: | |
adjust(filepath=filepath, print_changes=args.print) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This code does the following without rewriting the whole file:
change the external file path of all videos from a specific absolute path to a specific relative pathTo run:
nwbinspector
or load an environment with the latest version ofnwbinspector
installed.python adjust_nwb.py [path to the NWB file]
nwbinspector --config dandi [path to the NWB file]
If the critical errors go away, then delete the copy of the NWB file. Then:
python adjust_nwb.py [path to the directory containing all your NWB files that you want to adjust]
nwbinspector --config dandi [path to the same directory]