rly · February 24, 2023 23:31 · rly · Feb 18, 2023
diff --git a/adjust_nwb.py b/adjust_nwb.py
 import glob
 import h5py
 import numpy as np
 import argparse
 import pynwb


 STR_DTYPE = h5py.special_dtype(vlen=str)


 def adjust(filepath, print_changes):
    print("-------------------------------------------------------------------")
    print("Adjusting NWB file:", filepath)
    with h5py.File(filepath, "a") as f:
        # replace subject sex value "Male" with "M"
        sex_value = f["/general/subject/sex"][()].decode("utf-8")
        assert sex_value in ("Male", "M")
        if sex_value == "Male":
            new_sex_value = "M"
            print(f"Adjusting subject sex from '{sex_value}' to '{new_sex_value}'.")
            f["/general/subject/sex"][()] = new_sex_value

        # replace subject species value "Rat" with "Rattus norvegicus"
        species_value = f["/general/subject/species"][()].decode("utf-8")
        assert species_value in ("Rat", "Rattus norvegicus")
        if species_value == "Rat":
            new_species_value = "Rattus norvegicus"
            print(
                f"Adjusting subject species from '{species_value}' to '{new_species_value}'."
            )
            f["/general/subject/species"][()] = new_species_value

        # add subject age dataset with value "P4M/P8M"
        if "age" not in f["/general/subject"]:
            new_age_value = "P4M/P8M"
            print(f"Adding missing subject age, set to '{new_age_value}'.")
            f["/general/subject"].create_dataset(
                name="age", data=new_age_value, dtype=STR_DTYPE
            )

        # replace experimenter list value ["Abhilasha Joshi"] with ["Joshi, Abhilasha"]
        experimenter_value = f["/general/experimenter"][:].astype(str)
        assert experimenter_value in (
            np.array(["Abhilasha Joshi"]),
            np.array(["Joshi, Abhilasha"]),
        )
        if experimenter_value == np.array(["Abhilasha Joshi"]):
            new_experimenter_value = np.array(["Joshi, Abhilasha"], dtype=STR_DTYPE)
            print(
                f"Adjusting experimenter from {experimenter_value} to {new_experimenter_value}."
            )
            f["/general/experimenter"][:] = new_experimenter_value

        # add keywords dataset with value "test"
        # if "keywords" not in f["/general"]:
        #     new_keywords = ["test"]
        #     print(f"Adding missing keywords, set to {new_keywords}.")
        #     f["/general"].create_dataset(name="keywords", data=np.array(new_keywords, dtype=STR_DTYPE))

        # remove the "camera_sample_frame_counts" processing module
        # NOTE: this does not shrink the file
        if "camera_sample_frame_counts" in f["/processing"]:
            print("Removing camera_sample_frame_counts processing module.")
            del f["/processing/camera_sample_frame_counts"]

            # change the external file path of all videos from absolute path to relative path
            # NOTE: decision on 2023-02-21 to remove video files from the NWB file instead
            # image_series_groups = f["/processing/video_files/video"].values()
            # for image_series in image_series_groups:
            #     assert len(image_series["external_file"]) == 1
            #     external_file_value = image_series["external_file"][:].astype(str)
            #     if external_file_value[0].startswith("/opt/data16/nwb_test/Jaq/nwb/"):
            #         new_external_file_value = np.array(
            #             [
            #                 external_file_value[0].replace(
            #                     "/opt/data16/nwb_test/Jaq/nwb/", "../"
            #                 )
            #             ],
            #             dtype=STR_DTYPE,
            #         )
            #         print(
            #             f"Adjusting external file value from {external_file_value} to {new_external_file_value}."
            #         )
            #         image_series["external_file"][:] = new_external_file_value

            # remove the "camera_sample_frame_counts" processing module
            # NOTE: this does not shrink the file
            # if "video_files" in f["/processing"]:
            print("Removing video_files processing module.")
            del f["/processing/video_files"]

        # if the analog time series has no data, remove the "analog" processing module
        # NOTE: this does not shrink the file
        if len(f["/processing/analog/analog/analog/data"]) == 0:
            print("Analog time series has no data. Removing analog processing module.")
            del f["/processing/analog"]

    # verify changes
    if print_changes:
        with pynwb.NWBHDF5IO(filepath, "a", load_namespaces=True) as io:
            nwbfile = io.read()
            print("Sex:", nwbfile.subject.sex)
            assert nwbfile.subject.sex == "M"
            print("Species:", nwbfile.subject.species)
            assert nwbfile.subject.species == "Rattus norvegicus"
            print("Age:", nwbfile.subject.age)
            assert nwbfile.subject.age == "P4M/P8M"
            print("Experimenter:", nwbfile.experimenter[:])
            assert nwbfile.experimenter == np.array(["Joshi, Abhilasha"])
            # print("Keywords:", nwbfile.keywords[:])
            # assert nwbfile.keywords == np.array(["test"])
            print(
                "'camera_sample_frame_counts' processing module found:",
                "camera_sample_frame_counts" in nwbfile.processing,
            )
            assert "camera_sample_frame_counts" not in nwbfile.processing
            # for image_series in nwbfile.processing["video_files"][
            #     "video"
            # ].time_series.values():
            #     print(image_series.external_file[:])
            #     assert image_series.external_file[0].startswith("../")
            print(
                "'video_files' processing module found:",
                "video_files" in nwbfile.processing,
            )
            assert "video_files" not in nwbfile.processing

            print(
                "'analog' processing module found:",
                "analog" in nwbfile.processing,
            )
            assert (
                "analog" not in nwbfile.processing
                or len(nwbfile.processing["analog"]["analog"]["analog"].data) > 0
            )


 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "path", help="path to the NWB file or directory of NWB files to adjust"
    )
    parser.add_argument(
        "--print",
        action="store_true",
        help="whether to print the adjusted values from the NWB file",
    )
    args = parser.parse_args()
    path = args.path

    if path.endswith(".nwb"):
        filepaths = [path]
    else:
        filepaths = glob.glob(path + "*.nwb")

    print("Adjusting these NWB files:", filepaths, sep="\n")
    for filepath in filepaths:
        adjust(filepath=filepath, print_changes=args.print)


 if __name__ == "__main__":
    main()
	import glob
	import h5py
	import numpy as np
	import argparse
	import pynwb


	STR_DTYPE = h5py.special_dtype(vlen=str)


	def adjust(filepath, print_changes):
	print("-------------------------------------------------------------------")
	print("Adjusting NWB file:", filepath)
	with h5py.File(filepath, "a") as f:
	# replace subject sex value "Male" with "M"
	sex_value = f["/general/subject/sex"][()].decode("utf-8")
	assert sex_value in ("Male", "M")
	if sex_value == "Male":
	new_sex_value = "M"
	print(f"Adjusting subject sex from '{sex_value}' to '{new_sex_value}'.")
	f["/general/subject/sex"][()] = new_sex_value

	# replace subject species value "Rat" with "Rattus norvegicus"
	species_value = f["/general/subject/species"][()].decode("utf-8")
	assert species_value in ("Rat", "Rattus norvegicus")
	if species_value == "Rat":
	new_species_value = "Rattus norvegicus"
	print(
	f"Adjusting subject species from '{species_value}' to '{new_species_value}'."
	)
	f["/general/subject/species"][()] = new_species_value

	# add subject age dataset with value "P4M/P8M"
	if "age" not in f["/general/subject"]:
	new_age_value = "P4M/P8M"
	print(f"Adding missing subject age, set to '{new_age_value}'.")
	f["/general/subject"].create_dataset(
	name="age", data=new_age_value, dtype=STR_DTYPE
	)

	# replace experimenter list value ["Abhilasha Joshi"] with ["Joshi, Abhilasha"]
	experimenter_value = f["/general/experimenter"][:].astype(str)
	assert experimenter_value in (
	np.array(["Abhilasha Joshi"]),
	np.array(["Joshi, Abhilasha"]),
	)
	if experimenter_value == np.array(["Abhilasha Joshi"]):
	new_experimenter_value = np.array(["Joshi, Abhilasha"], dtype=STR_DTYPE)
	print(
	f"Adjusting experimenter from {experimenter_value} to {new_experimenter_value}."
	)
	f["/general/experimenter"][:] = new_experimenter_value

	# add keywords dataset with value "test"
	# if "keywords" not in f["/general"]:
	# new_keywords = ["test"]
	# print(f"Adding missing keywords, set to {new_keywords}.")
	# f["/general"].create_dataset(name="keywords", data=np.array(new_keywords, dtype=STR_DTYPE))

	# remove the "camera_sample_frame_counts" processing module
	# NOTE: this does not shrink the file
	if "camera_sample_frame_counts" in f["/processing"]:
	print("Removing camera_sample_frame_counts processing module.")
	del f["/processing/camera_sample_frame_counts"]

	# change the external file path of all videos from absolute path to relative path
	# NOTE: decision on 2023-02-21 to remove video files from the NWB file instead
	# image_series_groups = f["/processing/video_files/video"].values()
	# for image_series in image_series_groups:
	# assert len(image_series["external_file"]) == 1
	# external_file_value = image_series["external_file"][:].astype(str)
	# if external_file_value[0].startswith("/opt/data16/nwb_test/Jaq/nwb/"):
	# new_external_file_value = np.array(
	# [
	# external_file_value[0].replace(
	# "/opt/data16/nwb_test/Jaq/nwb/", "../"
	# )
	# ],
	# dtype=STR_DTYPE,
	# )
	# print(
	# f"Adjusting external file value from {external_file_value} to {new_external_file_value}."
	# )
	# image_series["external_file"][:] = new_external_file_value

	# remove the "camera_sample_frame_counts" processing module
	# NOTE: this does not shrink the file
	# if "video_files" in f["/processing"]:
	print("Removing video_files processing module.")
	del f["/processing/video_files"]

	# if the analog time series has no data, remove the "analog" processing module
	# NOTE: this does not shrink the file
	if len(f["/processing/analog/analog/analog/data"]) == 0:
	print("Analog time series has no data. Removing analog processing module.")
	del f["/processing/analog"]

	# verify changes
	if print_changes:
	with pynwb.NWBHDF5IO(filepath, "a", load_namespaces=True) as io:
	nwbfile = io.read()
	print("Sex:", nwbfile.subject.sex)
	assert nwbfile.subject.sex == "M"
	print("Species:", nwbfile.subject.species)
	assert nwbfile.subject.species == "Rattus norvegicus"
	print("Age:", nwbfile.subject.age)
	assert nwbfile.subject.age == "P4M/P8M"
	print("Experimenter:", nwbfile.experimenter[:])
	assert nwbfile.experimenter == np.array(["Joshi, Abhilasha"])
	# print("Keywords:", nwbfile.keywords[:])
	# assert nwbfile.keywords == np.array(["test"])
	print(
	"'camera_sample_frame_counts' processing module found:",
	"camera_sample_frame_counts" in nwbfile.processing,
	)
	assert "camera_sample_frame_counts" not in nwbfile.processing
	# for image_series in nwbfile.processing["video_files"][
	# "video"
	# ].time_series.values():
	# print(image_series.external_file[:])
	# assert image_series.external_file[0].startswith("../")
	print(
	"'video_files' processing module found:",
	"video_files" in nwbfile.processing,
	)
	assert "video_files" not in nwbfile.processing

	print(
	"'analog' processing module found:",
	"analog" in nwbfile.processing,
	)
	assert (
	"analog" not in nwbfile.processing
	or len(nwbfile.processing["analog"]["analog"]["analog"].data) > 0
	)


	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument(
	"path", help="path to the NWB file or directory of NWB files to adjust"
	)
	parser.add_argument(
	"--print",
	action="store_true",
	help="whether to print the adjusted values from the NWB file",
	)
	args = parser.parse_args()
	path = args.path

	if path.endswith(".nwb"):
	filepaths = [path]
	else:
	filepaths = glob.glob(path + "*.nwb")

	print("Adjusting these NWB files:", filepaths, sep="\n")
	for filepath in filepaths:
	adjust(filepath=filepath, print_changes=args.print)


	if __name__ == "__main__":
	main()