-
Notifications
You must be signed in to change notification settings - Fork 25
Open
Description
What happened?
I was working on catalystneuro/neuroconv#1003 and ran into yet another issue with chunking and compression on compound data types (ex. PlaneSegmentation.pixel_mask).
This time I noticed that when writing such data types with a zarr backend, any custom data I/O options were ignored/discarded and replaced with the default from zarr (Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
). See minimal example below:
Steps to Reproduce
import os
import shutil
from pathlib import Path
import numcodecs
from hdmf_zarr import ZarrDataIO
from hdmf_zarr.nwb import NWBZarrIO
from pynwb.ophys import PlaneSegmentation
from pynwb.testing.mock.file import mock_NWBFile
from pynwb.testing.mock.ophys import mock_ImagingPlane
def main():
"""Demonstrate the hdmf-zarr compound dtype bug."""
# Setup file paths
zarr_file_path = Path("test_compound_dtype_bug.nwb.zarr")
# Clean up existing file if it exists
if zarr_file_path.exists():
if zarr_file_path.is_dir():
shutil.rmtree(zarr_file_path)
else:
os.remove(zarr_file_path)
# Create minimal NWB file
nwbfile = mock_NWBFile()
# Create imaging plane
imaging_plane = mock_ImagingPlane(nwbfile=nwbfile)
# Create PlaneSegmentation
plane_segmentation = PlaneSegmentation(
description="Test plane segmentation for compound dtype bug demonstration",
imaging_plane=imaging_plane,
name="TestPlaneSegmentation",
)
# Create pixel mask data (compound dtype)
n_rois = 10
pixel_mask = []
pixel_mask_index = []
for i in range(n_rois):
n_pixels_per_roi = 3
roi_mask = [(x, x, 1.0) for x in range(n_pixels_per_roi)]
pixel_mask.extend(roi_mask)
pixel_mask_index.append(len(pixel_mask))
data = ZarrDataIO(data=pixel_mask, compressor=numcodecs.GZip(level=1))
for i in range(len(pixel_mask_index)):
plane_segmentation.add_row()
plane_segmentation.add_column(name="pixel_mask", description="Pixel mask for each ROI", data=data, index=pixel_mask_index)
# Add to processing module
nwbfile.create_processing_module("ophys", "ophys processing module")
nwbfile.processing["ophys"].add(plane_segmentation)
with NWBZarrIO(str(zarr_file_path), mode="w") as io:
io.write(nwbfile)
# Read the file back
with NWBZarrIO(str(zarr_file_path), mode="r") as io:
read_nwbfile = io.read()
# Access the pixel mask data
expected_compressor = numcodecs.GZip(level=1)
actual_compressor = read_nwbfile.processing["ophys"]["TestPlaneSegmentation"].pixel_mask.data.compressor # Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
assert actual_compressor == expected_compressor, "Compressor does not match expected GZip level 1"
if __name__ == "__main__":
main()
Traceback
python minimal_compound_dtype_bug_example.py
/Users/pauladkisson/Documents/CatalystNeuro/Neuroconv/neuroconv/minimal_compound_dtype_bug_example.py:52: UserWarning: Column 'pixel_mask' is predefined in PlaneSegmentation with index=True which does not match the entered index argument. The predefined index spec will be ignored. Please ensure the new column complies with the spec. This will raise an error in a future version of HDMF.
plane_segmentation.add_column(name="pixel_mask", description="Pixel mask for each ROI", data=data, index=pixel_mask_index)
Traceback (most recent call last):
File "/Users/pauladkisson/Documents/CatalystNeuro/Neuroconv/neuroconv/minimal_compound_dtype_bug_example.py", line 72, in <module>
main()
File "/Users/pauladkisson/Documents/CatalystNeuro/Neuroconv/neuroconv/minimal_compound_dtype_bug_example.py", line 69, in main
assert actual_compressor == expected_compressor, "Compressor does not match expected GZip level 1"
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: Compressor does not match expected GZip level 1
Operating System
macOS
Python Version
3.12
Package Versions
No response
Metadata
Metadata
Assignees
Labels
No labels