From 1b2c55fda8adb0ccbdb29239d96f3637909cf460 Mon Sep 17 00:00:00 2001 From: Kris Fedorenko <1648280+krisfed@users.noreply.github.com> Date: Mon, 14 Jul 2025 17:53:57 +0100 Subject: [PATCH 1/6] more validation for Start/Stride/Count --- Zarr.m | 14 ++++++++++++++ zarrread.m | 6 +++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/Zarr.m b/Zarr.m index 66d6046..11fd169 100644 --- a/Zarr.m +++ b/Zarr.m @@ -103,6 +103,13 @@ function pyReloadInProcess() paramName) end + if any(params>dims) + error("MATLAB:Zarr:PartialReadOutOfBounds",... + "Elements in %s must not exceed "+... + "the corresponding Zarr array dimensions (%s).",... + paramName, join(string(dims), "x")) + end + newParams = params; end @@ -315,6 +322,13 @@ function makeZarrGroups(existingParentPath, newGroupsPath) count = Zarr.processPartialReadParams(count, info.shape,... maxCount, "Count"); + if any(count>maxCount) + error("MATLAB:Zarr:PartialReadOutOfBounds",... + "Requested Count in combination with other "+... + "partial read parameters exceeds "+... + "Zarr array dimensions.") + end + % Convert partial read parameters to tensorstore-style % indexing start = start - 1; % tensorstore is 0-based diff --git a/zarrread.m b/zarrread.m index fbfd458..3d7b068 100644 --- a/zarrread.m +++ b/zarrread.m @@ -27,9 +27,9 @@ arguments filepath {mustBeTextScalar, mustBeNonzeroLengthText} - options.Start (1,:) {mustBeInteger, mustBePositive} = []; - options.Count (1,:) {mustBeInteger, mustBePositive} = []; - options.Stride (1,:) {mustBeInteger, mustBePositive} = []; + options.Start (1,:) {mustBeNumeric, mustBeInteger, mustBePositive} = []; + options.Count (1,:) {mustBeNumeric, mustBeInteger, mustBePositive} = []; + options.Stride (1,:) {mustBeNumeric, mustBeInteger, mustBePositive} = []; end zarrObj = Zarr(filepath); From 5dcc95d756315cea03bb40d36795c69b972c135b Mon Sep 17 00:00:00 2001 From: Kris Fedorenko <1648280+krisfed@users.noreply.github.com> Date: Mon, 14 Jul 2025 18:10:56 +0100 Subject: [PATCH 2/6] updating tZarrRemote.m from main branch --- test/tZarrRead.m | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/test/tZarrRead.m b/test/tZarrRead.m index 262c77b..6a2b25b 100644 --- a/test/tZarrRead.m +++ b/test/tZarrRead.m @@ -136,24 +136,35 @@ function invalidFilePath(testcase) function invalidPartialReadParams(testcase) % Verify zarrread errors when invalid partial read - % Start/Stride/Count are used - + % Start/Stride/Count are used. zpath = testcase.ArrPathReadSmall; % a 2D array, 3x4 + % Wrong number of dimensions in comparison to the array errID = 'MATLAB:Zarr:badPartialReadDimensions'; - wrongNumberOfDimensions = [1,1,1]; - testcase.verifyError(... - @()zarrread(zpath,Start=wrongNumberOfDimensions),... - errID); - testcase.verifyError(... - @()zarrread(zpath,Stride=wrongNumberOfDimensions),... - errID); - testcase.verifyError(... - @()zarrread(zpath,Count=wrongNumberOfDimensions),... - errID); - - %TODO: negative values, wrong datatypes, out of bounds - + wrongDims = [1,1,1]; + testcase.verifyError(@()zarrread(zpath,Start=wrongDims),errID); + testcase.verifyError(@()zarrread(zpath,Stride=wrongDims),errID); + testcase.verifyError(@()zarrread(zpath,Count=wrongDims),errID); + + % Invalid type + errID = 'MATLAB:validators:mustBeNumericOrLogical'; + testcase.verifyError(@()zarrread(zpath,"Start",""),errID); + testcase.verifyError(@()zarrread(zpath,"Stride",""),errID); + testcase.verifyError(@()zarrread(zpath,"Count",""),errID); + + % Negative values + inpVal = [-1 1]; + errID = 'MATLAB:validators:mustBePositive'; + testcase.verifyError(@()zarrread(zpath,"Start",inpVal),errID); + testcase.verifyError(@()zarrread(zpath,"Stride",inpVal),errID); + testcase.verifyError(@()zarrread(zpath,"Count",inpVal),errID); + + % Input out of bounds + inpVal = [100 200]; + errID = 'MATLAB:Python:PyException'; + testcase.verifyError(@()zarrread(zpath,"Start",inpVal),errID); + %testcase.verifyError(@()zarrread(zpath,"Stride",inpVal),errID); + testcase.verifyError(@()zarrread(zpath,"Count",inpVal),errID); end end end \ No newline at end of file From c956357754b3fdb03076fc48c4abab9285f2a3a5 Mon Sep 17 00:00:00 2001 From: Kris Fedorenko <1648280+krisfed@users.noreply.github.com> Date: Mon, 14 Jul 2025 19:58:20 +0100 Subject: [PATCH 3/6] Guard against running out of memory --- Zarr.m | 21 ++++++++++++++++++--- ZarrDatatype.m | 14 ++++++++++++++ test/tZarrRead.m | 22 ++++++++++++++++++---- 3 files changed, 50 insertions(+), 7 deletions(-) diff --git a/Zarr.m b/Zarr.m index 11fd169..20de806 100644 --- a/Zarr.m +++ b/Zarr.m @@ -336,13 +336,28 @@ function makeZarrGroups(existingParentPath, newGroupsPath) % (it does NOT include element at the end index) endInds = start + stride.*count; + % Store the datatype + obj.Datatype = ZarrDatatype.fromZarrType(info.dtype); + + % Check if reading requested data might exceed available memory + try + zeros(count, obj.Datatype.MATLABType); + catch ME + if strcmp(ME.identifier, 'MATLAB:array:SizeLimitExceeded') + %rethrow(ME) + error("MATLAB:Zarr:OutOfMemory",... + "Reading requested data (%s %s array) "+... + "might exceed available memory.",... + join(string(count), "x"), obj.Datatype.MATLABType) + end + end + % Read the data ndArrayData = Zarr.ZarrPy.readZarr(obj.KVStoreSchema,... start, endInds, stride); - % Store the datatype - obj.Datatype = ZarrDatatype.fromTensorstoreType(ndArrayData.dtype.name); - + % assert(ndArrayData.dtype.name == obj.Datatype.TensorstoreType) + % Convert the numpy array to MATLAB array data = cast(ndArrayData, obj.Datatype.MATLABType); end diff --git a/ZarrDatatype.m b/ZarrDatatype.m index b908eaa..899ba5b 100644 --- a/ZarrDatatype.m +++ b/ZarrDatatype.m @@ -74,6 +74,15 @@ obj = ZarrDatatype(ind); end + function obj = fromZarrType(zarrType) + % Create a datatype object based on Zarr datatype name + arguments + zarrType (1,1) string {ZarrDatatype.mustBeZarrType} + end + + ind = find(zarrType == ZarrDatatype.ZarrTypes); + obj = ZarrDatatype(ind); + end function mustBeMATLABType(type) % Validator for MATLAB types @@ -84,6 +93,11 @@ function mustBeTensorstoreType(type) % Validator for Tensorstore types mustBeMember(type, ZarrDatatype.TensorstoreTypes) end + + function mustBeZarrType(type) + % Validator for Zarr types + mustBeMember(type, ZarrDatatype.ZarrTypes) + end end end \ No newline at end of file diff --git a/test/tZarrRead.m b/test/tZarrRead.m index 6a2b25b..0511adb 100644 --- a/test/tZarrRead.m +++ b/test/tZarrRead.m @@ -109,6 +109,15 @@ function nonExistentArray(testcase) testcase.verifyError(@()zarrread('nonexistent/'),errID); end + function tooBigArray(testcase) + % Verify zarrread error when a user tries to read data that is + % too large + + errID = 'MATLAB:Zarr:OutOfMemory'; + bigData = 'https://noaa-nwm-retro-v2-zarr-pds.s3.amazonaws.com/elevation/'; + testcase.verifyError(@()zarrread(bigData,Count=[100000,100000]),errID); + end + function invalidFilePath(testcase) % Verify zarrread error when an invalid file path is used. @@ -147,7 +156,7 @@ function invalidPartialReadParams(testcase) testcase.verifyError(@()zarrread(zpath,Count=wrongDims),errID); % Invalid type - errID = 'MATLAB:validators:mustBeNumericOrLogical'; + errID = 'MATLAB:validators:mustBeNumeric'; testcase.verifyError(@()zarrread(zpath,"Start",""),errID); testcase.verifyError(@()zarrread(zpath,"Stride",""),errID); testcase.verifyError(@()zarrread(zpath,"Count",""),errID); @@ -159,12 +168,17 @@ function invalidPartialReadParams(testcase) testcase.verifyError(@()zarrread(zpath,"Stride",inpVal),errID); testcase.verifyError(@()zarrread(zpath,"Count",inpVal),errID); - % Input out of bounds + % Parameters out of bounds inpVal = [100 200]; - errID = 'MATLAB:Python:PyException'; + errID = 'MATLAB:Zarr:PartialReadOutOfBounds'; testcase.verifyError(@()zarrread(zpath,"Start",inpVal),errID); - %testcase.verifyError(@()zarrread(zpath,"Stride",inpVal),errID); + testcase.verifyError(@()zarrread(zpath,"Stride",inpVal),errID); testcase.verifyError(@()zarrread(zpath,"Count",inpVal),errID); + + % Combination of parameters out of bounds + testcase.verifyError(... + @()zarrread(zpath,Start=[3 4],Count=[2 2]),errID) + end end end \ No newline at end of file From c6ff6d72e66b5aa2e6394db411ff09731a4f859d Mon Sep 17 00:00:00 2001 From: Kris Fedorenko <1648280+krisfed@users.noreply.github.com> Date: Tue, 15 Jul 2025 11:30:07 +0100 Subject: [PATCH 4/6] more recent MATLAB version --- .github/workflows/test_setup.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_setup.yml b/.github/workflows/test_setup.yml index 874db2c..eb34af5 100644 --- a/.github/workflows/test_setup.yml +++ b/.github/workflows/test_setup.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, windows-latest, macos-14] - matlab-version: ['R2024a'] + matlab-version: ['R2024b'] steps: - name: Checkout code From 9ef05b5c457e0181e708ca1803e17dfa5acad5aa Mon Sep 17 00:00:00 2001 From: Kris Fedorenko <1648280+krisfed@users.noreply.github.com> Date: Tue, 15 Jul 2025 17:48:10 +0100 Subject: [PATCH 5/6] updating error messages, clean-up, restricting remote test to >=R2024b --- .github/workflows/test_setup.yml | 2 +- Zarr.m | 14 +++++--------- test/tZarrRead.m | 2 ++ 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/.github/workflows/test_setup.yml b/.github/workflows/test_setup.yml index eb34af5..1140d09 100644 --- a/.github/workflows/test_setup.yml +++ b/.github/workflows/test_setup.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, windows-latest, macos-14] - matlab-version: ['R2024b'] + matlab-version: ['R2024a', 'R2024b'] steps: - name: Checkout code diff --git a/Zarr.m b/Zarr.m index 20de806..db396b7 100644 --- a/Zarr.m +++ b/Zarr.m @@ -106,8 +106,8 @@ function pyReloadInProcess() if any(params>dims) error("MATLAB:Zarr:PartialReadOutOfBounds",... "Elements in %s must not exceed "+... - "the corresponding Zarr array dimensions (%s).",... - paramName, join(string(dims), "x")) + "the corresponding Zarr array dimensions.",... + paramName) end newParams = params; @@ -325,8 +325,7 @@ function makeZarrGroups(existingParentPath, newGroupsPath) if any(count>maxCount) error("MATLAB:Zarr:PartialReadOutOfBounds",... "Requested Count in combination with other "+... - "partial read parameters exceeds "+... - "Zarr array dimensions.") + "parameters exceeds Zarr array dimensions.") end % Convert partial read parameters to tensorstore-style @@ -344,19 +343,16 @@ function makeZarrGroups(existingParentPath, newGroupsPath) zeros(count, obj.Datatype.MATLABType); catch ME if strcmp(ME.identifier, 'MATLAB:array:SizeLimitExceeded') - %rethrow(ME) error("MATLAB:Zarr:OutOfMemory",... - "Reading requested data (%s %s array) "+... + "Reading requested data (%s %s matrix) "+... "might exceed available memory.",... - join(string(count), "x"), obj.Datatype.MATLABType) + join(string(count), "-by-"), obj.Datatype.MATLABType) end end % Read the data ndArrayData = Zarr.ZarrPy.readZarr(obj.KVStoreSchema,... start, endInds, stride); - - % assert(ndArrayData.dtype.name == obj.Datatype.TensorstoreType) % Convert the numpy array to MATLAB array data = cast(ndArrayData, obj.Datatype.MATLABType); diff --git a/test/tZarrRead.m b/test/tZarrRead.m index 892a9ea..b5ef471 100644 --- a/test/tZarrRead.m +++ b/test/tZarrRead.m @@ -113,6 +113,8 @@ function tooBigArray(testcase) % Verify zarrread error when a user tries to read data that is % too large + testcase.assumeFalse(isMATLABReleaseOlderThan('R2024b'),... + "Remote workflows are fully supported only from R2024b") errID = 'MATLAB:Zarr:OutOfMemory'; bigData = 'https://noaa-nwm-retro-v2-zarr-pds.s3.amazonaws.com/elevation/'; testcase.verifyError(@()zarrread(bigData,Count=[100000,100000]),errID); From c7a55bf1a35f228208bf680136312d4d5dc1b54b Mon Sep 17 00:00:00 2001 From: Kris Fedorenko <1648280+krisfed@users.noreply.github.com> Date: Tue, 15 Jul 2025 19:08:40 +0100 Subject: [PATCH 6/6] using local file for out of memory test --- .github/workflows/test_setup.yml | 2 +- test/tZarrRead.m | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test_setup.yml b/.github/workflows/test_setup.yml index 1140d09..874db2c 100644 --- a/.github/workflows/test_setup.yml +++ b/.github/workflows/test_setup.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, windows-latest, macos-14] - matlab-version: ['R2024a', 'R2024b'] + matlab-version: ['R2024a'] steps: - name: Checkout code diff --git a/test/tZarrRead.m b/test/tZarrRead.m index b5ef471..7201be5 100644 --- a/test/tZarrRead.m +++ b/test/tZarrRead.m @@ -113,11 +113,10 @@ function tooBigArray(testcase) % Verify zarrread error when a user tries to read data that is % too large - testcase.assumeFalse(isMATLABReleaseOlderThan('R2024b'),... - "Remote workflows are fully supported only from R2024b") + bigDataPath = "bigData/myzarr"; + zarrcreate(bigDataPath, [100000,100000], Datatype='single'); errID = 'MATLAB:Zarr:OutOfMemory'; - bigData = 'https://noaa-nwm-retro-v2-zarr-pds.s3.amazonaws.com/elevation/'; - testcase.verifyError(@()zarrread(bigData,Count=[100000,100000]),errID); + testcase.verifyError(@()zarrread(bigDataPath),errID); end function invalidFilePath(testcase)