Skip to content

Commit 50c4950

Browse files
authored
More validation for partial read parameters
Merge pull request #116 from mathworks/partial-read-enh
2 parents 8898cee + 27aea9a commit 50c4950

File tree

4 files changed

+65
-10
lines changed

4 files changed

+65
-10
lines changed

Zarr.m

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,13 @@ function pyReloadInProcess()
103103
paramName)
104104
end
105105

106+
if any(params>dims)
107+
error("MATLAB:Zarr:PartialReadOutOfBounds",...
108+
"Elements in %s must not exceed "+...
109+
"the corresponding Zarr array dimensions.",...
110+
paramName)
111+
end
112+
106113
newParams = params;
107114
end
108115

@@ -315,20 +322,39 @@ function makeZarrGroups(existingParentPath, newGroupsPath)
315322
count = Zarr.processPartialReadParams(count, info.shape,...
316323
maxCount, "Count");
317324

325+
if any(count>maxCount)
326+
error("MATLAB:Zarr:PartialReadOutOfBounds",...
327+
"Requested Count in combination with other "+...
328+
"parameters exceeds Zarr array dimensions.")
329+
end
330+
318331
% Convert partial read parameters to tensorstore-style
319332
% indexing
320333
start = start - 1; % tensorstore is 0-based
321334
% Tensorstore uses end index instead of count
322335
% (it does NOT include element at the end index)
323336
endInds = start + stride.*count;
324337

338+
% Store the datatype
339+
obj.Datatype = ZarrDatatype.fromZarrType(info.dtype);
340+
341+
% Check if reading requested data might exceed available memory
342+
try
343+
zeros(count, obj.Datatype.MATLABType);
344+
catch ME
345+
if strcmp(ME.identifier, 'MATLAB:array:SizeLimitExceeded')
346+
error("MATLAB:Zarr:OutOfMemory",...
347+
"Reading requested data (%s %s array) "+...
348+
"would exceed maximum array size preference. "+...
349+
"Select a smaller subset of data to read.",...
350+
join(string(count), "-by-"), obj.Datatype.MATLABType)
351+
end
352+
end
353+
325354
% Read the data
326355
ndArrayData = Zarr.ZarrPy.readZarr(obj.KVStoreSchema,...
327356
start, endInds, stride);
328357

329-
% Store the datatype
330-
obj.Datatype = ZarrDatatype.fromTensorstoreType(ndArrayData.dtype.name);
331-
332358
% Convert the numpy array to MATLAB array
333359
data = cast(ndArrayData, obj.Datatype.MATLABType);
334360
end

ZarrDatatype.m

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,15 @@
7474
obj = ZarrDatatype(ind);
7575
end
7676

77+
function obj = fromZarrType(zarrType)
78+
% Create a datatype object based on Zarr datatype name
79+
arguments
80+
zarrType (1,1) string {ZarrDatatype.mustBeZarrType}
81+
end
82+
83+
ind = find(zarrType == ZarrDatatype.ZarrTypes);
84+
obj = ZarrDatatype(ind);
85+
end
7786

7887
function mustBeMATLABType(type)
7988
% Validator for MATLAB types
@@ -84,6 +93,11 @@ function mustBeTensorstoreType(type)
8493
% Validator for Tensorstore types
8594
mustBeMember(type, ZarrDatatype.TensorstoreTypes)
8695
end
96+
97+
function mustBeZarrType(type)
98+
% Validator for Zarr types
99+
mustBeMember(type, ZarrDatatype.ZarrTypes)
100+
end
87101
end
88102

89103
end

test/tZarrRead.m

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,16 @@ function nonExistentArray(testcase)
109109
testcase.verifyError(@()zarrread('nonexistent/'),errID);
110110
end
111111

112+
function tooBigArray(testcase)
113+
% Verify zarrread error when a user tries to read data that is
114+
% too large
115+
116+
bigDataPath = "bigData/myzarr";
117+
zarrcreate(bigDataPath, [100000,100000], Datatype='single');
118+
errID = 'MATLAB:Zarr:OutOfMemory';
119+
testcase.verifyError(@()zarrread(bigDataPath),errID);
120+
end
121+
112122
function invalidFilePath(testcase)
113123
% Verify zarrread error when an invalid file path is used.
114124

@@ -147,7 +157,7 @@ function invalidPartialReadParams(testcase)
147157
testcase.verifyError(@()zarrread(zpath,Count=wrongDims),errID);
148158

149159
% Invalid type
150-
errID = 'MATLAB:validators:mustBeNumericOrLogical';
160+
errID = 'MATLAB:validators:mustBeNumeric';
151161
testcase.verifyError(@()zarrread(zpath,"Start",""),errID);
152162
testcase.verifyError(@()zarrread(zpath,"Stride",""),errID);
153163
testcase.verifyError(@()zarrread(zpath,"Count",""),errID);
@@ -159,12 +169,17 @@ function invalidPartialReadParams(testcase)
159169
testcase.verifyError(@()zarrread(zpath,"Stride",inpVal),errID);
160170
testcase.verifyError(@()zarrread(zpath,"Count",inpVal),errID);
161171

162-
% Input out of bounds
172+
173+
% Parameters out of bounds
163174
inpVal = [100 200];
164-
errID = 'MATLAB:Python:PyException';
175+
errID = 'MATLAB:Zarr:PartialReadOutOfBounds';
165176
testcase.verifyError(@()zarrread(zpath,"Start",inpVal),errID);
166-
%testcase.verifyError(@()zarrread(zpath,"Stride",inpVal),errID);
177+
testcase.verifyError(@()zarrread(zpath,"Stride",inpVal),errID);
167178
testcase.verifyError(@()zarrread(zpath,"Count",inpVal),errID);
179+
180+
% Combination of parameters out of bounds
181+
testcase.verifyError(...
182+
@()zarrread(zpath,Start=[3 4],Count=[2 2]),errID)
168183
end
169184
end
170185
end

zarrread.m

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@
2727

2828
arguments
2929
filepath {mustBeTextScalar, mustBeNonzeroLengthText}
30-
options.Start (1,:) {mustBeInteger, mustBePositive} = [];
31-
options.Count (1,:) {mustBeInteger, mustBePositive} = [];
32-
options.Stride (1,:) {mustBeInteger, mustBePositive} = [];
30+
options.Start (1,:) {mustBeNumeric, mustBeInteger, mustBePositive} = [];
31+
options.Count (1,:) {mustBeNumeric, mustBeInteger, mustBePositive} = [];
32+
options.Stride (1,:) {mustBeNumeric, mustBeInteger, mustBePositive} = [];
3333
end
3434

3535
zarrObj = Zarr(filepath);

0 commit comments

Comments
 (0)