diff --git a/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrHeader.java b/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrHeader.java index 373e4232cb..4094631936 100644 --- a/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrHeader.java +++ b/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrHeader.java @@ -54,6 +54,7 @@ private class DelayedVarMaker { private RandomAccessDirectoryItem var; private ZArray zarray; private Map initializedChunks; // track any uninitialized chunks for var + private Map chunkStarts; // byte offset of each chunk within the store, keyed by chunk index private List attrs; // list of variable attributes private long dataOffset; // byte position where data starts @@ -65,6 +66,7 @@ void setVar(RandomAccessDirectoryItem var) { this.var = var; this.attrs = null; this.initializedChunks = new HashMap<>(); + this.chunkStarts = new HashMap<>(); this.dataOffset = -1; if (var != null) { try { @@ -104,6 +106,11 @@ void processItem(RandomAccessDirectoryItem item) { this.var = null; // skip rest of var is unrecognized files found } this.initializedChunks.put(index, item.length()); + // Record the actual byte offset of this chunk within the store, keyed by its numeric chunk index. + // This avoids any dependency on the order in which the store lists files (which is lexicographic + // and would otherwise place e.g. chunk 0.10 before chunk 0.2, which is the root cause of + // https://github.com/Unidata/netcdf-java/issues/1542) + this.chunkStarts.put(index, item.startIndex()); // if data offset is uninitialized, set here if (this.dataOffset < 0) { this.dataOffset = item.startIndex(); @@ -115,7 +122,7 @@ void makeVar() { return; // do nothing if no variable is in progress } try { - makeVariable(var, dataOffset, zarray, initializedChunks, attrs); + makeVariable(var, dataOffset, zarray, initializedChunks, chunkStarts, attrs); } catch (ZarrFormatException ex) { logger.error(ex.getMessage()); } @@ -200,7 +207,8 @@ private void makeGroup(RandomAccessDirectoryItem item, List attrs) { } private void makeVariable(RandomAccessDirectoryItem item, long dataOffset, ZArray zarray, - Map initializedChunks, List attrs) throws ZarrFormatException { + Map initializedChunks, Map chunkStarts, List attrs) + throws ZarrFormatException { // make new Variable Variable.Builder var = Variable.builder(); String location = ZarrUtils.trimLocation(item.getLocation()); @@ -303,7 +311,7 @@ private void makeVariable(RandomAccessDirectoryItem item, long dataOffset, ZArra // create VInfo VInfo vinfo = new VInfo(chunks, zarray.getFillValue(), zarray.getCompressor(), zarray.getByteOrder(), - zarray.getOrder(), zarray.getSeparator(), zarray.getFilters(), dataOffset, initializedChunks, + zarray.getOrder(), zarray.getSeparator(), zarray.getFilters(), dataOffset, initializedChunks, chunkStarts, zarray.getElementSize(), zarray.isUnicodeString()); var.setSPobject(vinfo); @@ -389,7 +397,7 @@ private static int getChunkIndex(RandomAccessDirectoryItem item, ZArray zarray) int[] shape = zarray.getShape(); int[] chunkSize = zarray.getChunks(); for (int i = 0; i < nDims; i++) { - nChunks[i] = (int) Math.ceil(shape[i] / chunkSize[i]); + nChunks[i] = (int) Math.ceil((double) shape[i] / chunkSize[i]); } return ZarrUtils.subscriptsToIndex(subs, nChunks); } else { @@ -422,12 +430,13 @@ class VInfo { private final List filters; private final long offset; private final Map initializedChunks; + private final Map chunkStarts; private final int elementSize; private final boolean unicodeString; VInfo(int[] chunks, Object fillValue, Filter compressor, ByteOrder byteOrder, ZArray.Order order, String separator, - List filters, long offset, Map initializedChunks, int elementSize, - boolean unicodeString) { + List filters, long offset, Map initializedChunks, Map chunkStarts, + int elementSize, boolean unicodeString) { this.chunks = chunks; this.fillValue = fillValue; this.byteOrder = byteOrder; @@ -437,6 +446,7 @@ class VInfo { this.filters = filters; this.offset = offset; this.initializedChunks = initializedChunks; + this.chunkStarts = chunkStarts; this.elementSize = elementSize; this.unicodeString = unicodeString; } @@ -477,6 +487,10 @@ public Map getInitializedChunks() { return this.initializedChunks; } + public Map getChunkStarts() { + return this.chunkStarts; + } + int getElementSize() { return this.elementSize; } diff --git a/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrLayoutBB.java b/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrLayoutBB.java index 1ba8952e77..4289f84470 100644 --- a/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrLayoutBB.java +++ b/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrLayoutBB.java @@ -38,6 +38,7 @@ public class ZarrLayoutBB implements LayoutBB { private int totalNChunks; // total number of chunks private boolean F_order = false; // F order storage? private Map initializedChunks; // set of chunks that exist as files and their compressed size + private Map chunkStarts; // byte offset of each existing chunk within the store, keyed by chunk index private Filter compressor; private List filters; @@ -55,12 +56,13 @@ public ZarrLayoutBB(Variable v2, Section wantSection, RandomAccessFile raf) { this.chunkSize = vinfo.getChunks(); int ndims = this.chunkSize.length; this.initializedChunks = vinfo.getInitializedChunks(); + this.chunkStarts = vinfo.getChunkStarts(); this.nChunks = new int[ndims]; this.totalNChunks = 1; for (int i = 0; i < ndims; i++) { Dimension dim = v2.getDimension(i); // round up nchunks if not evenly divisible by chunk size - this.nChunks[i] = (int) Math.ceil(dim.getLength() / this.chunkSize[i]); + this.nChunks[i] = (int) Math.ceil((double) dim.getLength() / this.chunkSize[i]); this.totalNChunks *= nChunks[i]; } @@ -120,7 +122,7 @@ private class DataChunkIterator implements LayoutBBTiled.DataChunkIterator { DataChunkIterator() { this.currChunk = new int[chunkSize.length]; this.chunkNum = 0; - this.currOffset = varOffset; // start at start of variable data + this.currOffset = chunkStarts.getOrDefault(this.chunkNum, varOffset); } public boolean hasNext() { @@ -128,7 +130,8 @@ public boolean hasNext() { } public LayoutBBTiled.DataChunk next() { - DataChunk chunk = new ZarrLayoutBB.DataChunk(this.currChunk, this.chunkNum, this.currOffset); + long offset = chunkStarts.getOrDefault(this.chunkNum, this.currOffset); + DataChunk chunk = new ZarrLayoutBB.DataChunk(this.currChunk, this.chunkNum, offset); incrementChunk(); return chunk; } @@ -142,7 +145,6 @@ private void incrementChunk() { i--; } this.currChunk[i]++; - this.currOffset += initializedChunks.getOrDefault(this.chunkNum, (long) 0); this.chunkNum = ZarrUtils.subscriptsToIndex(this.currChunk, nChunks); } else { // scalar array diff --git a/cdm/zarr/src/test/data/scripts/zarr_o10_multichunk.py b/cdm/zarr/src/test/data/scripts/zarr_o10_multichunk.py new file mode 100644 index 0000000000..03613181a6 --- /dev/null +++ b/cdm/zarr/src/test/data/scripts/zarr_o10_multichunk.py @@ -0,0 +1,25 @@ +import numpy as np +import zarr + +store = zarr.storage.LocalStore('../test_o10_multichunk.zarr') + +# create array +data = np.arange(10000).reshape((100,100)) + +root_group = zarr.group(store, overwrite=True, zarr_format=2) + +# create array with more than 10 chunks in each dimension +# 10 chunks in first dimension, 20 chunks in second +# so chunks will be [0-9].[0-19] +multichunk = root_group.create_array('ten_by_five', shape=data.shape, chunks=(10,5), dtype='