Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
26 changes: 20 additions & 6 deletions cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrHeader.java
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ private class DelayedVarMaker {
private RandomAccessDirectoryItem var;
private ZArray zarray;
private Map<Integer, Long> initializedChunks; // track any uninitialized chunks for var
private Map<Integer, Long> chunkStarts; // byte offset of each chunk within the store, keyed by chunk index
private List<Attribute> attrs; // list of variable attributes
private long dataOffset; // byte position where data starts

Expand All @@ -65,6 +66,7 @@ void setVar(RandomAccessDirectoryItem var) {
this.var = var;
this.attrs = null;
this.initializedChunks = new HashMap<>();
this.chunkStarts = new HashMap<>();
this.dataOffset = -1;
if (var != null) {
try {
Expand Down Expand Up @@ -104,6 +106,11 @@ void processItem(RandomAccessDirectoryItem item) {
this.var = null; // skip rest of var is unrecognized files found
}
this.initializedChunks.put(index, item.length());
// Record the actual byte offset of this chunk within the store, keyed by its numeric chunk index.
// This avoids any dependency on the order in which the store lists files (which is lexicographic
// and would otherwise place e.g. chunk 0.10 before chunk 0.2, which is the root cause of
// https://github.com/Unidata/netcdf-java/issues/1542)
this.chunkStarts.put(index, item.startIndex());
// if data offset is uninitialized, set here
if (this.dataOffset < 0) {
this.dataOffset = item.startIndex();
Expand All @@ -115,7 +122,7 @@ void makeVar() {
return; // do nothing if no variable is in progress
}
try {
makeVariable(var, dataOffset, zarray, initializedChunks, attrs);
makeVariable(var, dataOffset, zarray, initializedChunks, chunkStarts, attrs);
} catch (ZarrFormatException ex) {
logger.error(ex.getMessage());
}
Expand Down Expand Up @@ -200,7 +207,8 @@ private void makeGroup(RandomAccessDirectoryItem item, List<Attribute> attrs) {
}

private void makeVariable(RandomAccessDirectoryItem item, long dataOffset, ZArray zarray,
Map<Integer, Long> initializedChunks, List<Attribute> attrs) throws ZarrFormatException {
Map<Integer, Long> initializedChunks, Map<Integer, Long> chunkStarts, List<Attribute> attrs)
throws ZarrFormatException {
// make new Variable
Variable.Builder<?> var = Variable.builder();
String location = ZarrUtils.trimLocation(item.getLocation());
Expand Down Expand Up @@ -303,7 +311,7 @@ private void makeVariable(RandomAccessDirectoryItem item, long dataOffset, ZArra

// create VInfo
VInfo vinfo = new VInfo(chunks, zarray.getFillValue(), zarray.getCompressor(), zarray.getByteOrder(),
zarray.getOrder(), zarray.getSeparator(), zarray.getFilters(), dataOffset, initializedChunks,
zarray.getOrder(), zarray.getSeparator(), zarray.getFilters(), dataOffset, initializedChunks, chunkStarts,
zarray.getElementSize(), zarray.isUnicodeString());
var.setSPobject(vinfo);

Expand Down Expand Up @@ -389,7 +397,7 @@ private static int getChunkIndex(RandomAccessDirectoryItem item, ZArray zarray)
int[] shape = zarray.getShape();
int[] chunkSize = zarray.getChunks();
for (int i = 0; i < nDims; i++) {
nChunks[i] = (int) Math.ceil(shape[i] / chunkSize[i]);
nChunks[i] = (int) Math.ceil((double) shape[i] / chunkSize[i]);
}
return ZarrUtils.subscriptsToIndex(subs, nChunks);
} else {
Expand Down Expand Up @@ -422,12 +430,13 @@ class VInfo {
private final List<Filter> filters;
private final long offset;
private final Map<Integer, Long> initializedChunks;
private final Map<Integer, Long> chunkStarts;
private final int elementSize;
private final boolean unicodeString;

VInfo(int[] chunks, Object fillValue, Filter compressor, ByteOrder byteOrder, ZArray.Order order, String separator,
List<Filter> filters, long offset, Map<Integer, Long> initializedChunks, int elementSize,
boolean unicodeString) {
List<Filter> filters, long offset, Map<Integer, Long> initializedChunks, Map<Integer, Long> chunkStarts,
int elementSize, boolean unicodeString) {
this.chunks = chunks;
this.fillValue = fillValue;
this.byteOrder = byteOrder;
Expand All @@ -437,6 +446,7 @@ class VInfo {
this.filters = filters;
this.offset = offset;
this.initializedChunks = initializedChunks;
this.chunkStarts = chunkStarts;
this.elementSize = elementSize;
this.unicodeString = unicodeString;
}
Expand Down Expand Up @@ -477,6 +487,10 @@ public Map<Integer, Long> getInitializedChunks() {
return this.initializedChunks;
}

public Map<Integer, Long> getChunkStarts() {
return this.chunkStarts;
}

int getElementSize() {
return this.elementSize;
}
Expand Down
10 changes: 6 additions & 4 deletions cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrLayoutBB.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public class ZarrLayoutBB implements LayoutBB {
private int totalNChunks; // total number of chunks
private boolean F_order = false; // F order storage?
private Map<Integer, Long> initializedChunks; // set of chunks that exist as files and their compressed size
private Map<Integer, Long> chunkStarts; // byte offset of each existing chunk within the store, keyed by chunk index
private Filter compressor;
private List<Filter> filters;

Expand All @@ -55,12 +56,13 @@ public ZarrLayoutBB(Variable v2, Section wantSection, RandomAccessFile raf) {
this.chunkSize = vinfo.getChunks();
int ndims = this.chunkSize.length;
this.initializedChunks = vinfo.getInitializedChunks();
this.chunkStarts = vinfo.getChunkStarts();
this.nChunks = new int[ndims];
this.totalNChunks = 1;
for (int i = 0; i < ndims; i++) {
Dimension dim = v2.getDimension(i);
// round up nchunks if not evenly divisible by chunk size
this.nChunks[i] = (int) Math.ceil(dim.getLength() / this.chunkSize[i]);
this.nChunks[i] = (int) Math.ceil((double) dim.getLength() / this.chunkSize[i]);
this.totalNChunks *= nChunks[i];
}

Expand Down Expand Up @@ -120,15 +122,16 @@ private class DataChunkIterator implements LayoutBBTiled.DataChunkIterator {
DataChunkIterator() {
this.currChunk = new int[chunkSize.length];
this.chunkNum = 0;
this.currOffset = varOffset; // start at start of variable data
this.currOffset = chunkStarts.getOrDefault(this.chunkNum, varOffset);
}

public boolean hasNext() {
return this.chunkNum < totalNChunks;
}

public LayoutBBTiled.DataChunk next() {
DataChunk chunk = new ZarrLayoutBB.DataChunk(this.currChunk, this.chunkNum, this.currOffset);
long offset = chunkStarts.getOrDefault(this.chunkNum, this.currOffset);
DataChunk chunk = new ZarrLayoutBB.DataChunk(this.currChunk, this.chunkNum, offset);
incrementChunk();
return chunk;
}
Expand All @@ -142,7 +145,6 @@ private void incrementChunk() {
i--;
}
this.currChunk[i]++;
this.currOffset += initializedChunks.getOrDefault(this.chunkNum, (long) 0);
this.chunkNum = ZarrUtils.subscriptsToIndex(this.currChunk, nChunks);
} else {
// scalar array
Expand Down
25 changes: 25 additions & 0 deletions cdm/zarr/src/test/data/scripts/zarr_o10_multichunk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import numpy as np
import zarr

store = zarr.storage.LocalStore('../test_o10_multichunk.zarr')

# create array
data = np.arange(10000).reshape((100,100))

root_group = zarr.group(store, overwrite=True, zarr_format=2)

# create array with more than 10 chunks in each dimension
# 10 chunks in first dimension, 20 chunks in second
# so chunks will be [0-9].[0-19]
multichunk = root_group.create_array('ten_by_five', shape=data.shape, chunks=(10,5), dtype='<u8', overwrite=True, compressors=None)
multichunk[:] = data

multichunk_blosc = root_group.create_array('ten_by_five_blosc', shape=data.shape, chunks=(10,5), dtype='<u8', overwrite=True)
multichunk_blosc[:] = data

compressors=None
print(multichunk)
print(multichunk[:])

print(multichunk_blosc)
print(multichunk_blosc[:])
1 change: 1 addition & 0 deletions cdm/zarr/src/test/data/test_o10_multichunk.zarr/.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
3 changes: 3 additions & 0 deletions cdm/zarr/src/test/data/test_o10_multichunk.zarr/.zgroup
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"zarr_format": 2
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"shape": [
100,
100
],
"chunks": [
10,
5
],
"dtype": "<u8",
"fill_value": 0,
"order": "C",
"filters": null,
"dimension_separator": ".",
"compressor": null,
"zarr_format": 2
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"shape": [
100,
100
],
"chunks": [
10,
5
],
"dtype": "<u8",
"fill_value": 0,
"order": "C",
"filters": null,
"dimension_separator": ".",
"compressor": {
"id": "blosc",
"cname": "lz4",
"clevel": 5,
"shuffle": 1,
"blocksize": 0
},
"zarr_format": 2
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading