From a0455a422b0ec4ff89f770424ea782ebdd57321d Mon Sep 17 00:00:00 2001 From: Sean Arms <67096+lesserwhirls@users.noreply.github.com> Date: Wed, 17 Jun 2026 11:05:04 -0600 Subject: [PATCH] Handle U and S dtypes in Zarr Zarr arrays with U and S dtypes need special handling, as the numeric value in their dtype definition does not simply represent the byte size of each element. Also, update the script that generates test_dtypes.zarr to include a U2 and S2 array, and migrate to the Zar v3 API (still writing the Zarr V2 format). Fixes unidata/netcdf-java#1534. --- .../main/java/ucar/nc2/iosp/zarr/ZArray.java | 60 +++++++++++++++- .../java/ucar/nc2/iosp/zarr/ZarrHeader.java | 20 +++++- .../java/ucar/nc2/iosp/zarr/ZarrIosp.java | 66 +++++++++++++++++- .../java/ucar/nc2/iosp/zarr/ZarrLayoutBB.java | 7 +- .../data/scripts/make_zarr_dtype_test_data.py | 55 ++++++++------- .../src/test/data/test_dtypes.zarr/.zattrs | 1 + .../src/test/data/test_dtypes.zarr/.zgroup | 2 +- .../byte_ordered_group/.zattrs | 1 + .../byte_ordered_group/.zgroup | 2 +- .../byte_ordered_group/big_endian/.zattrs | 1 + .../byte_ordered_group/big_endian/.zgroup | 2 +- .../big_endian/double_data/.zarray | 29 ++++---- .../big_endian/double_data/.zattrs | 1 + .../big_endian/float_data/.zarray | 29 ++++---- .../big_endian/float_data/.zattrs | 1 + .../big_endian/int_data/.zarray | 29 ++++---- .../big_endian/int_data/.zattrs | 1 + .../big_endian/long_data/.zarray | 29 ++++---- .../big_endian/long_data/.zattrs | 1 + .../big_endian/short_data/.zarray | 29 ++++---- .../big_endian/short_data/.zattrs | 1 + .../big_endian/uint_data/.zarray | 29 ++++---- .../big_endian/uint_data/.zattrs | 1 + .../big_endian/ulong_data/.zarray | 29 ++++---- .../big_endian/ulong_data/.zattrs | 1 + .../big_endian/ushort_data/.zarray | 29 ++++---- .../big_endian/ushort_data/.zattrs | 1 + .../byte_ordered_group/little_endian/.zattrs | 1 + .../byte_ordered_group/little_endian/.zgroup | 2 +- .../little_endian/double_data/.zarray | 29 ++++---- .../little_endian/double_data/.zattrs | 1 + .../little_endian/float_data/.zarray | 29 ++++---- .../little_endian/float_data/.zattrs | 1 + .../little_endian/int_data/.zarray | 29 ++++---- .../little_endian/int_data/.zattrs | 1 + .../little_endian/long_data/.zarray | 29 ++++---- .../little_endian/long_data/.zattrs | 1 + .../little_endian/short_data/.zarray | 29 ++++---- .../little_endian/short_data/.zattrs | 1 + .../little_endian/uint_data/.zarray | 29 ++++---- .../little_endian/uint_data/.zattrs | 1 + .../little_endian/ulong_data/.zarray | 29 ++++---- .../little_endian/ulong_data/.zattrs | 1 + .../little_endian/ushort_data/.zarray | 29 ++++---- .../little_endian/ushort_data/.zattrs | 1 + .../test_dtypes.zarr/string_types/.zattrs | 1 + .../test_dtypes.zarr/string_types/.zgroup | 2 +- .../string_types/char_data/.zarray | 29 ++++---- .../string_types/char_data/.zattrs | 1 + .../string_types/str_data/.zarray | 29 ++++---- .../string_types/str_data/.zattrs | 1 + .../string_types/str_data_2/.zarray | 17 +++++ .../string_types/str_data_2/.zattrs | 1 + .../string_types/str_data_2/0.0 | 1 + .../string_types/str_data_2/0.1 | 1 + .../string_types/str_data_2/1.0 | 1 + .../string_types/str_data_2/1.1 | 1 + .../string_types/unicode_data/.zarray | 29 ++++---- .../string_types/unicode_data/.zattrs | 1 + .../string_types/unicode_data_2/.zarray | 17 +++++ .../string_types/unicode_data_2/.zattrs | 1 + .../string_types/unicode_data_2/0.0 | Bin 0 -> 240 bytes .../string_types/unicode_data_2/0.1 | Bin 0 -> 240 bytes .../string_types/unicode_data_2/1.0 | Bin 0 -> 240 bytes .../string_types/unicode_data_2/1.1 | Bin 0 -> 240 bytes .../test_dtypes.zarr/unordered_group/.zattrs | 1 + .../test_dtypes.zarr/unordered_group/.zgroup | 2 +- .../unordered_group/boolean_data/.zarray | 29 ++++---- .../unordered_group/boolean_data/.zattrs | 1 + .../unordered_group/byte_data/.zarray | 29 ++++---- .../unordered_group/byte_data/.zattrs | 1 + .../unordered_group/ubyte_data/.zarray | 29 ++++---- .../unordered_group/ubyte_data/.zattrs | 1 + .../ucar/nc2/iosp/zarr/TestZarrDataTypes.java | 10 ++- 74 files changed, 584 insertions(+), 352 deletions(-) create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/double_data/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/float_data/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/int_data/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/long_data/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/short_data/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/uint_data/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/ulong_data/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/ushort_data/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/little_endian/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/little_endian/double_data/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/little_endian/float_data/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/little_endian/int_data/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/little_endian/long_data/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/little_endian/short_data/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/little_endian/uint_data/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/little_endian/ulong_data/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/little_endian/ushort_data/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/string_types/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/string_types/char_data/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/string_types/str_data/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/string_types/str_data_2/.zarray create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/string_types/str_data_2/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/string_types/str_data_2/0.0 create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/string_types/str_data_2/0.1 create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/string_types/str_data_2/1.0 create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/string_types/str_data_2/1.1 create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/string_types/unicode_data/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/string_types/unicode_data_2/.zarray create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/string_types/unicode_data_2/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/string_types/unicode_data_2/0.0 create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/string_types/unicode_data_2/0.1 create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/string_types/unicode_data_2/1.0 create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/string_types/unicode_data_2/1.1 create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/boolean_data/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/byte_data/.zattrs create mode 100644 cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/ubyte_data/.zattrs diff --git a/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZArray.java b/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZArray.java index a34c465a89..9ee667dac6 100644 --- a/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZArray.java +++ b/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZArray.java @@ -1,3 +1,8 @@ +/* + * Copyright (c) 2021-2026 University Corporation for Atmospheric Research/Unidata + * See LICENSE for license information. + */ + package ucar.nc2.iosp.zarr; import com.fasterxml.jackson.core.JsonParser; @@ -16,6 +21,7 @@ import java.io.IOException; import java.nio.ByteOrder; import java.util.*; +import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; import java.util.stream.StreamSupport; @@ -36,6 +42,10 @@ public enum Order { // maps zarr datatypes to CDM datatypes private static Map dTypeMap; + // regex for matching numpy byteorder marks + // see https://numpy.org/doc/stable/reference/generated/numpy.dtype.byteorder.html#numpy-dtype-byteorder + private static final Pattern BYTE_ORDER_PATTERN = Pattern.compile("[><|=]"); + static { dTypeMap = new HashMap<>(); dTypeMap.put("b1", DataType.BOOLEAN); @@ -71,6 +81,8 @@ public enum Order { private final Order order; private final List filters; private final String separator; + private final int elementSize; // size of a single element on disk, in bytes + private final boolean unicodeString; // true for numpy U dtype fixed-length strings public ZArray(int[] shape, int[] chunks, Object fill_value, String dtype, Filter compressor, String order, List filters, String separator) throws ZarrFormatException { @@ -80,6 +92,8 @@ public ZArray(int[] shape, int[] chunks, Object fill_value, String dtype, Filter this.dtype = dtype; this.datatype = parseDataType(this.dtype); this.byteOrder = parseByteOrder(this.dtype); + this.elementSize = parseElementSize(this.dtype); + this.unicodeString = stripByteOrder(this.dtype).charAt(0) == 'U'; this.compressor = compressor; this.filters = filters; this.order = parseOrder(order); @@ -126,10 +140,35 @@ public ByteOrder getByteOrder() { return this.byteOrder; } + /** + * The size, in bytes, of a single element of this array as stored on disk. + */ + public int getElementSize() { + return this.elementSize; + } + + /** + * True if this array holds numpy U dtype. + */ + boolean isUnicodeString() { + return this.unicodeString; + } + + private static String stripByteOrder(String dtype) { + return BYTE_ORDER_PATTERN.matcher(dtype).replaceAll(""); + } + private static DataType parseDataType(String dtype) throws ZarrFormatException { - dtype = dtype.replace(">", ""); - dtype = dtype.replace("<", ""); - dtype = dtype.replace("|", ""); + dtype = stripByteOrder(dtype); + final char typeChar = dtype.charAt(0); + // S (fixed-length byte strings) and U (fixed-length unicode strings) do not follow the + // usual [type char][type size in bytes] pattern: the trailing integer is a fixed character + // count, not a byte size. See https://github.com/Unidata/netcdf-java/issues/1534 + if (typeChar == 'S' || typeChar == 'U') { + final int nChars = parseLength(dtype); + // a single byte char maps to CDM CHAR, otherwise it is a fixed-length String + return (typeChar == 'S' && nChars == 1) ? DataType.CHAR : DataType.STRING; + } DataType dataType = dTypeMap.get(dtype); if (dataType == null) { throw new ZarrFormatException(ZarrKeys.DTYPE, dtype); @@ -137,6 +176,21 @@ private static DataType parseDataType(String dtype) throws ZarrFormatException { return dataType; } + private static int parseElementSize(String dtype) throws ZarrFormatException { + dtype = stripByteOrder(dtype); + final char typeChar = dtype.charAt(0); + final int length = parseLength(dtype); + return (typeChar == 'U') ? 4 * length : length; + } + + private static int parseLength(String dtype) throws ZarrFormatException { + try { + return Integer.parseInt(dtype.substring(1)); + } catch (NumberFormatException | IndexOutOfBoundsException ex) { + throw new ZarrFormatException(ZarrKeys.DTYPE, dtype); + } + } + private static ByteOrder parseByteOrder(String dtype) throws ZarrFormatException { if (dtype.startsWith(">")) { return ByteOrder.BIG_ENDIAN; diff --git a/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrHeader.java b/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrHeader.java index 6399a55f3e..373e4232cb 100644 --- a/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrHeader.java +++ b/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrHeader.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2025 University Corporation for Atmospheric Research/Unidata + * Copyright (c) 2021-2026 University Corporation for Atmospheric Research/Unidata * See LICENSE for license information. */ @@ -303,7 +303,8 @@ private void makeVariable(RandomAccessDirectoryItem item, long dataOffset, ZArra // create VInfo VInfo vinfo = new VInfo(chunks, zarray.getFillValue(), zarray.getCompressor(), zarray.getByteOrder(), - zarray.getOrder(), zarray.getSeparator(), zarray.getFilters(), dataOffset, initializedChunks); + zarray.getOrder(), zarray.getSeparator(), zarray.getFilters(), dataOffset, initializedChunks, + zarray.getElementSize(), zarray.isUnicodeString()); var.setSPobject(vinfo); // Include some info from .zarray file in attributes for display when showing variable detail. @@ -421,9 +422,12 @@ class VInfo { private final List filters; private final long offset; private final Map initializedChunks; + private final int elementSize; + private final boolean unicodeString; VInfo(int[] chunks, Object fillValue, Filter compressor, ByteOrder byteOrder, ZArray.Order order, String separator, - List filters, long offset, Map initializedChunks) { + List filters, long offset, Map initializedChunks, int elementSize, + boolean unicodeString) { this.chunks = chunks; this.fillValue = fillValue; this.byteOrder = byteOrder; @@ -433,6 +437,8 @@ class VInfo { this.filters = filters; this.offset = offset; this.initializedChunks = initializedChunks; + this.elementSize = elementSize; + this.unicodeString = unicodeString; } public int[] getChunks() { @@ -471,6 +477,14 @@ public Map getInitializedChunks() { return this.initializedChunks; } + int getElementSize() { + return this.elementSize; + } + + boolean isUnicodeString() { + return this.unicodeString; + } + } } diff --git a/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrIosp.java b/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrIosp.java index d221bcdf9e..4c187d480c 100644 --- a/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrIosp.java +++ b/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrIosp.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 University Corporation for Atmospheric Research/Unidata + * Copyright (c) 2021-2026 University Corporation for Atmospheric Research/Unidata * See LICENSE for license information. */ @@ -20,6 +20,10 @@ import java.io.IOException; import java.lang.invoke.MethodHandles; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; /** * IOSP for reading/writing Zarr/NCZarr formats @@ -83,8 +87,14 @@ public Array readData(Variable v2, Section section) { Object fillValue = getFillValue(vinfo, dataType); // create layout object - Layout layout = new ZarrLayoutBB(v2, section, this.raf); - Object data = IospHelper.readDataFill((LayoutBB) layout, dataType, fillValue); + LayoutBB layout = new ZarrLayoutBB(v2, section, this.raf); + final Object data; + if (dataType == DataType.STRING) { + // fixed-length string types (S/U) need custom decoding (not handled by the generic IospHelper string reader). + data = readStringData(layout, vinfo, fillValue); + } else { + data = IospHelper.readDataFill(layout, dataType, fillValue); + } Array array = Array.factory(dataType, section.getShape(), data); if (vinfo.getOrder() == ZArray.Order.F) { @@ -99,6 +109,56 @@ public Array readData(Variable v2, Section section) { return array; } + /** + * Read fixed-length string data ('S' or 'U' dtypes) from the layout. + * + *

+ * See https://github.com/Unidata/netcdf-java/issues/1534 + */ + private static String[] readStringData(LayoutBB layout, ZarrHeader.VInfo vinfo, Object fillValue) { + final int nelems = (int) layout.getTotalNelems(); + final int recSize = layout.getElemSize(); + final String[] pa = new String[nelems]; + if (fillValue instanceof String) { + java.util.Arrays.fill(pa, (String) fillValue); + } + + final Charset charset; + if (vinfo.isUnicodeString()) { + charset = + vinfo.getByteOrder() == ByteOrder.BIG_ENDIAN ? Charset.forName("UTF-32BE") : Charset.forName("UTF-32LE"); + } else { + charset = StandardCharsets.UTF_8; + } + + while (layout.hasNext()) { + LayoutBB.Chunk chunk = layout.next(); + ByteBuffer bb = chunk.getByteBuffer(); + // if chunk is empty, use fill value + if (!bb.hasRemaining()) { + continue; + } + bb.position(chunk.getSrcElem() * recSize); + int pos = (int) chunk.getDestElem(); + final byte[] raw = new byte[recSize]; + for (int i = 0; i < chunk.getNelems(); i++) { + bb.get(raw); + pa[pos++] = decodeFixedLengthString(raw, charset); + } + } + return pa; + } + + private static String decodeFixedLengthString(byte[] raw, Charset charset) { + String s = new String(raw, charset); + // NumPy fixed-length strings are null-padded, so strip trailing NUL characters + int end = s.length(); + while (end > 0 && s.charAt(end - 1) == '\0') { + end--; + } + return s.substring(0, end); + } + private Object getFillValue(ZarrHeader.VInfo vinfo, DataType dataType) { // Watch for floating point fill values encoded as Strings diff --git a/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrLayoutBB.java b/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrLayoutBB.java index 95817bfcd4..1ba8952e77 100644 --- a/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrLayoutBB.java +++ b/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrLayoutBB.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2025 University Corporation for Atmospheric Research/Unidata + * Copyright (c) 2021-2026 University Corporation for Atmospheric Research/Unidata * See LICENSE for license information. */ @@ -80,7 +80,10 @@ public ZarrLayoutBB(Variable v2, Section wantSection, RandomAccessFile raf) { this.want = wantSection; } - this.elemSize = v2.getDataType().getSize(); + // Use the on-disk element byte width from the .zarray metadata. For most types this matches + // DataType.getSize(), but for fixed-length string types (S/U) it captures the true element + // width (N bytes for S, 4*N bytes for U). + this.elemSize = vinfo.getElementSize(); // create delegate and chunk iterator ZarrLayoutBB.DataChunkIterator iter = new ZarrLayoutBB.DataChunkIterator(); diff --git a/cdm/zarr/src/test/data/scripts/make_zarr_dtype_test_data.py b/cdm/zarr/src/test/data/scripts/make_zarr_dtype_test_data.py index 72b17d4401..625fc4a27d 100644 --- a/cdm/zarr/src/test/data/scripts/make_zarr_dtype_test_data.py +++ b/cdm/zarr/src/test/data/scripts/make_zarr_dtype_test_data.py @@ -57,13 +57,15 @@ import zarr -store = zarr.DirectoryStore('../test_dtypes.zarr') +# Zarr-Python 3 API. The store is written using the Zarr format 2 +# specification by passing zarr_format=2 to the top level API. +store = zarr.storage.LocalStore('../test_dtypes.zarr') # In[ ]: -root_grp = zarr.group(store, overwrite=True) +root_grp = zarr.group(store, overwrite=True, zarr_format=2) # create a group for byte-order independent data types unordered_group = root_grp.create_group('unordered_group', overwrite=True) @@ -82,11 +84,11 @@ # add data to unordered group -b = unordered_group.create_dataset('boolean_data', shape=(4,5), chunks=(2,5), dtype='|b1', overwrite=True, compressor=None) +b = unordered_group.create_array('boolean_data', shape=(4,5), chunks=(2,5), dtype='|b1', overwrite=True, compressors=None) b[:] = boolean_data -byte = unordered_group.create_dataset('byte_data', shape=(10,8), chunks=(5,4), dtype='|i1', overwrite=True, compressor=None) +byte = unordered_group.create_array('byte_data', shape=(10,8), chunks=(5,4), dtype='|i1', overwrite=True, compressors=None) byte[:] = bdata -ubyte = unordered_group.create_dataset('ubyte_data', shape=(10,8), chunks=(5,4), dtype='|u1', overwrite=True, compressor=None) +ubyte = unordered_group.create_array('ubyte_data', shape=(10,8), chunks=(5,4), dtype='|u1', overwrite=True, compressors=None) ubyte[:] = bdata @@ -94,21 +96,21 @@ # add data to big endian group -shorts = big_endian.create_dataset('short_data', shape=(4,5), chunks=(2,5), dtype='>i2', overwrite=True, compressor=None) +shorts = big_endian.create_array('short_data', shape=(4,5), chunks=(2,5), dtype='>i2', overwrite=True, compressors=None) shorts[:] = be_short_data -ushorts = big_endian.create_dataset('ushort_data', shape=(4,5), chunks=(2,5), dtype='>u2', overwrite=True, compressor=None) +ushorts = big_endian.create_array('ushort_data', shape=(4,5), chunks=(2,5), dtype='>u2', overwrite=True, compressors=None) ushorts[:] = be_short_data -ints = big_endian.create_dataset('int_data', shape=(4,5), chunks=(2,5), dtype='>i4', overwrite=True, compressor=None) +ints = big_endian.create_array('int_data', shape=(4,5), chunks=(2,5), dtype='>i4', overwrite=True, compressors=None) ints[:] = be_int_data -uints = big_endian.create_dataset('uint_data', shape=(4,5), chunks=(2,5), dtype='>u4', overwrite=True, compressor=None) +uints = big_endian.create_array('uint_data', shape=(4,5), chunks=(2,5), dtype='>u4', overwrite=True, compressors=None) uints[:] = be_int_data -longs = big_endian.create_dataset('long_data', shape=(5,4), chunks=(5,2), dtype='>i8', overwrite=True, compressor=None) +longs = big_endian.create_array('long_data', shape=(5,4), chunks=(5,2), dtype='>i8', overwrite=True, compressors=None) longs[:] = be_long_data -ulongs = big_endian.create_dataset('ulong_data', shape=(5,4), chunks=(5,2), dtype='>u8', overwrite=True, compressor=None) +ulongs = big_endian.create_array('ulong_data', shape=(5,4), chunks=(5,2), dtype='>u8', overwrite=True, compressors=None) ulongs[:] = be_long_data -floats = big_endian.create_dataset('float_data', shape=(4,5), chunks=(2,5), dtype='>f4', overwrite=True, compressor=None) +floats = big_endian.create_array('float_data', shape=(4,5), chunks=(2,5), dtype='>f4', overwrite=True, compressors=None) floats[:] = be_float_data -doubles = big_endian.create_dataset('double_data', shape=(5,4), chunks=(5,2), dtype='>f8', overwrite=True, compressor=None) +doubles = big_endian.create_array('double_data', shape=(5,4), chunks=(5,2), dtype='>f8', overwrite=True, compressors=None) doubles[:] = be_double_data @@ -116,21 +118,21 @@ # add data to little endian group -shorts = little_endian.create_dataset('short_data', shape=(4,5), chunks=(2,5), dtype='f8", - "fill_value": 0.0, - "filters": null, - "order": "C", - "shape": [ - 5, - 4 - ], - "zarr_format": 2 + "shape": [ + 5, + 4 + ], + "chunks": [ + 5, + 2 + ], + "dtype": ">f8", + "fill_value": 0.0, + "order": "C", + "filters": null, + "dimension_separator": ".", + "compressor": null, + "zarr_format": 2 } \ No newline at end of file diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/double_data/.zattrs b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/double_data/.zattrs new file mode 100644 index 0000000000..9e26dfeeb6 --- /dev/null +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/double_data/.zattrs @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/float_data/.zarray b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/float_data/.zarray index fbe83fa695..7843668ff1 100644 --- a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/float_data/.zarray +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/float_data/.zarray @@ -1,16 +1,17 @@ { - "chunks": [ - 2, - 5 - ], - "compressor": null, - "dtype": ">f4", - "fill_value": 0.0, - "filters": null, - "order": "C", - "shape": [ - 4, - 5 - ], - "zarr_format": 2 + "shape": [ + 4, + 5 + ], + "chunks": [ + 2, + 5 + ], + "dtype": ">f4", + "fill_value": 0.0, + "order": "C", + "filters": null, + "dimension_separator": ".", + "compressor": null, + "zarr_format": 2 } \ No newline at end of file diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/float_data/.zattrs b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/float_data/.zattrs new file mode 100644 index 0000000000..9e26dfeeb6 --- /dev/null +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/float_data/.zattrs @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/int_data/.zarray b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/int_data/.zarray index 2cc68e89df..20d1fc6e28 100644 --- a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/int_data/.zarray +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/int_data/.zarray @@ -1,16 +1,17 @@ { - "chunks": [ - 2, - 5 - ], - "compressor": null, - "dtype": ">i4", - "fill_value": 0, - "filters": null, - "order": "C", - "shape": [ - 4, - 5 - ], - "zarr_format": 2 + "shape": [ + 4, + 5 + ], + "chunks": [ + 2, + 5 + ], + "dtype": ">i4", + "fill_value": 0, + "order": "C", + "filters": null, + "dimension_separator": ".", + "compressor": null, + "zarr_format": 2 } \ No newline at end of file diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/int_data/.zattrs b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/int_data/.zattrs new file mode 100644 index 0000000000..9e26dfeeb6 --- /dev/null +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/int_data/.zattrs @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/long_data/.zarray b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/long_data/.zarray index 32dee36e4c..d0c50cab63 100644 --- a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/long_data/.zarray +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/long_data/.zarray @@ -1,16 +1,17 @@ { - "chunks": [ - 5, - 2 - ], - "compressor": null, - "dtype": ">i8", - "fill_value": 0, - "filters": null, - "order": "C", - "shape": [ - 5, - 4 - ], - "zarr_format": 2 + "shape": [ + 5, + 4 + ], + "chunks": [ + 5, + 2 + ], + "dtype": ">i8", + "fill_value": 0, + "order": "C", + "filters": null, + "dimension_separator": ".", + "compressor": null, + "zarr_format": 2 } \ No newline at end of file diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/long_data/.zattrs b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/long_data/.zattrs new file mode 100644 index 0000000000..9e26dfeeb6 --- /dev/null +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/long_data/.zattrs @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/short_data/.zarray b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/short_data/.zarray index 84e26663dc..48cfd437ea 100644 --- a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/short_data/.zarray +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/short_data/.zarray @@ -1,16 +1,17 @@ { - "chunks": [ - 2, - 5 - ], - "compressor": null, - "dtype": ">i2", - "fill_value": 0, - "filters": null, - "order": "C", - "shape": [ - 4, - 5 - ], - "zarr_format": 2 + "shape": [ + 4, + 5 + ], + "chunks": [ + 2, + 5 + ], + "dtype": ">i2", + "fill_value": 0, + "order": "C", + "filters": null, + "dimension_separator": ".", + "compressor": null, + "zarr_format": 2 } \ No newline at end of file diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/short_data/.zattrs b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/short_data/.zattrs new file mode 100644 index 0000000000..9e26dfeeb6 --- /dev/null +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/short_data/.zattrs @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/uint_data/.zarray b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/uint_data/.zarray index b8650896f5..abccbc4ca7 100644 --- a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/uint_data/.zarray +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/uint_data/.zarray @@ -1,16 +1,17 @@ { - "chunks": [ - 2, - 5 - ], - "compressor": null, - "dtype": ">u4", - "fill_value": 0, - "filters": null, - "order": "C", - "shape": [ - 4, - 5 - ], - "zarr_format": 2 + "shape": [ + 4, + 5 + ], + "chunks": [ + 2, + 5 + ], + "dtype": ">u4", + "fill_value": 0, + "order": "C", + "filters": null, + "dimension_separator": ".", + "compressor": null, + "zarr_format": 2 } \ No newline at end of file diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/uint_data/.zattrs b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/uint_data/.zattrs new file mode 100644 index 0000000000..9e26dfeeb6 --- /dev/null +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/uint_data/.zattrs @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/ulong_data/.zarray b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/ulong_data/.zarray index 3ec08263b6..3aa58ddf20 100644 --- a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/ulong_data/.zarray +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/ulong_data/.zarray @@ -1,16 +1,17 @@ { - "chunks": [ - 5, - 2 - ], - "compressor": null, - "dtype": ">u8", - "fill_value": 0, - "filters": null, - "order": "C", - "shape": [ - 5, - 4 - ], - "zarr_format": 2 + "shape": [ + 5, + 4 + ], + "chunks": [ + 5, + 2 + ], + "dtype": ">u8", + "fill_value": 0, + "order": "C", + "filters": null, + "dimension_separator": ".", + "compressor": null, + "zarr_format": 2 } \ No newline at end of file diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/ulong_data/.zattrs b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/ulong_data/.zattrs new file mode 100644 index 0000000000..9e26dfeeb6 --- /dev/null +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/ulong_data/.zattrs @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/ushort_data/.zarray b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/ushort_data/.zarray index cb68237fa7..505bf1b2df 100644 --- a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/ushort_data/.zarray +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/ushort_data/.zarray @@ -1,16 +1,17 @@ { - "chunks": [ - 2, - 5 - ], - "compressor": null, - "dtype": ">u2", - "fill_value": 0, - "filters": null, - "order": "C", - "shape": [ - 4, - 5 - ], - "zarr_format": 2 + "shape": [ + 4, + 5 + ], + "chunks": [ + 2, + 5 + ], + "dtype": ">u2", + "fill_value": 0, + "order": "C", + "filters": null, + "dimension_separator": ".", + "compressor": null, + "zarr_format": 2 } \ No newline at end of file diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/ushort_data/.zattrs b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/ushort_data/.zattrs new file mode 100644 index 0000000000..9e26dfeeb6 --- /dev/null +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/big_endian/ushort_data/.zattrs @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/little_endian/.zattrs b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/little_endian/.zattrs new file mode 100644 index 0000000000..9e26dfeeb6 --- /dev/null +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/little_endian/.zattrs @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/little_endian/.zgroup b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/little_endian/.zgroup index 3b7daf227c..cab13da6ee 100644 --- a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/little_endian/.zgroup +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/little_endian/.zgroup @@ -1,3 +1,3 @@ { - "zarr_format": 2 + "zarr_format": 2 } \ No newline at end of file diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/little_endian/double_data/.zarray b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/little_endian/double_data/.zarray index a1ef87eb80..e1f9fc30c7 100644 --- a/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/little_endian/double_data/.zarray +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/byte_ordered_group/little_endian/double_data/.zarray @@ -1,16 +1,17 @@ { - "chunks": [ - 5, - 2 - ], - "compressor": null, - "dtype": "iB;=~a`0|4bN7TW*- literal 0 HcmV?d00001 diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/string_types/unicode_data_2/0.1 b/cdm/zarr/src/test/data/test_dtypes.zarr/string_types/unicode_data_2/0.1 new file mode 100644 index 0000000000000000000000000000000000000000..509aea5ce4537fab89bc01ee224dfd49a1ca7530 GIT binary patch literal 240 RcmYdfU|>iB;=~a`0|4bN7TW*- literal 0 HcmV?d00001 diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/string_types/unicode_data_2/1.0 b/cdm/zarr/src/test/data/test_dtypes.zarr/string_types/unicode_data_2/1.0 new file mode 100644 index 0000000000000000000000000000000000000000..509aea5ce4537fab89bc01ee224dfd49a1ca7530 GIT binary patch literal 240 RcmYdfU|>iB;=~a`0|4bN7TW*- literal 0 HcmV?d00001 diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/string_types/unicode_data_2/1.1 b/cdm/zarr/src/test/data/test_dtypes.zarr/string_types/unicode_data_2/1.1 new file mode 100644 index 0000000000000000000000000000000000000000..509aea5ce4537fab89bc01ee224dfd49a1ca7530 GIT binary patch literal 240 RcmYdfU|>iB;=~a`0|4bN7TW*- literal 0 HcmV?d00001 diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/.zattrs b/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/.zattrs new file mode 100644 index 0000000000..9e26dfeeb6 --- /dev/null +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/.zattrs @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/.zgroup b/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/.zgroup index 3b7daf227c..cab13da6ee 100644 --- a/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/.zgroup +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/.zgroup @@ -1,3 +1,3 @@ { - "zarr_format": 2 + "zarr_format": 2 } \ No newline at end of file diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/boolean_data/.zarray b/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/boolean_data/.zarray index b710ff4e17..08afaf3005 100644 --- a/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/boolean_data/.zarray +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/boolean_data/.zarray @@ -1,16 +1,17 @@ { - "chunks": [ - 2, - 5 - ], - "compressor": null, - "dtype": "|b1", - "fill_value": false, - "filters": null, - "order": "C", - "shape": [ - 4, - 5 - ], - "zarr_format": 2 + "shape": [ + 4, + 5 + ], + "chunks": [ + 2, + 5 + ], + "dtype": "|b1", + "fill_value": false, + "order": "C", + "filters": null, + "dimension_separator": ".", + "compressor": null, + "zarr_format": 2 } \ No newline at end of file diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/boolean_data/.zattrs b/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/boolean_data/.zattrs new file mode 100644 index 0000000000..9e26dfeeb6 --- /dev/null +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/boolean_data/.zattrs @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/byte_data/.zarray b/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/byte_data/.zarray index 980622e172..4ec2d24d10 100644 --- a/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/byte_data/.zarray +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/byte_data/.zarray @@ -1,16 +1,17 @@ { - "chunks": [ - 5, - 4 - ], - "compressor": null, - "dtype": "|i1", - "fill_value": 0, - "filters": null, - "order": "C", - "shape": [ - 10, - 8 - ], - "zarr_format": 2 + "shape": [ + 10, + 8 + ], + "chunks": [ + 5, + 4 + ], + "dtype": "|i1", + "fill_value": 0, + "order": "C", + "filters": null, + "dimension_separator": ".", + "compressor": null, + "zarr_format": 2 } \ No newline at end of file diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/byte_data/.zattrs b/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/byte_data/.zattrs new file mode 100644 index 0000000000..9e26dfeeb6 --- /dev/null +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/byte_data/.zattrs @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/ubyte_data/.zarray b/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/ubyte_data/.zarray index e175889738..36341568d5 100644 --- a/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/ubyte_data/.zarray +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/ubyte_data/.zarray @@ -1,16 +1,17 @@ { - "chunks": [ - 5, - 4 - ], - "compressor": null, - "dtype": "|u1", - "fill_value": 0, - "filters": null, - "order": "C", - "shape": [ - 10, - 8 - ], - "zarr_format": 2 + "shape": [ + 10, + 8 + ], + "chunks": [ + 5, + 4 + ], + "dtype": "|u1", + "fill_value": 0, + "order": "C", + "filters": null, + "dimension_separator": ".", + "compressor": null, + "zarr_format": 2 } \ No newline at end of file diff --git a/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/ubyte_data/.zattrs b/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/ubyte_data/.zattrs new file mode 100644 index 0000000000..9e26dfeeb6 --- /dev/null +++ b/cdm/zarr/src/test/data/test_dtypes.zarr/unordered_group/ubyte_data/.zattrs @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/cdm/zarr/src/test/java/ucar/nc2/iosp/zarr/TestZarrDataTypes.java b/cdm/zarr/src/test/java/ucar/nc2/iosp/zarr/TestZarrDataTypes.java index 93b1899151..c69cb8d20a 100644 --- a/cdm/zarr/src/test/java/ucar/nc2/iosp/zarr/TestZarrDataTypes.java +++ b/cdm/zarr/src/test/java/ucar/nc2/iosp/zarr/TestZarrDataTypes.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 University Corporation for Atmospheric Research/Unidata + * Copyright (c) 2021-2026 University Corporation for Atmospheric Research/Unidata * See LICENSE for license information. */ @@ -53,7 +53,9 @@ public class TestZarrDataTypes { // string variable names private static final String CHAR = "/string_types/char_data"; private static final String STRING = "/string_types/str_data"; + private static final String STRING2 = "/string_types/str_data_2"; private static final String UNICODE = "/string_types/unicode_data"; + private static final String UNICODE2 = "/string_types/unicode_data_2"; private static NetcdfFile ncfile; @@ -205,7 +207,11 @@ public void testReadStringTypes() throws IOException, InvalidRangeException { assertThat(var.read("2,4").getChar(0)).isEqualTo('a'); var = ncfile.findVariable(STRING); assertThat(var.read("0,7").toString().trim()).isEqualTo("abcd"); + var = ncfile.findVariable(STRING2); + assertThat(var.read("0,7").toString().trim()).isEqualTo("ab"); var = ncfile.findVariable(UNICODE); - assertThat(var.read("5,3").toString().trim()).isEqualTo("d"); + assertThat(var.read("5,3").toString().trim()).isEqualTo("abcd"); + var = ncfile.findVariable(UNICODE2); + assertThat(var.read("5,3").toString().trim()).isEqualTo("ab"); } }