From c4f96f24aaa14aac8881ae62b7acafffba6b1fb8 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Tue, 21 May 2024 17:12:39 -0400 Subject: [PATCH 001/124] feat: wip toward codecs --- .../saalfeldlab/n5/DatasetAttributes.java | 41 +++- .../org/janelia/saalfeldlab/n5/N5Writer.java | 30 ++- .../saalfeldlab/n5/codec/AsTypeCodec.java | 178 ++++++++++++++++++ .../janelia/saalfeldlab/n5/codec/Codec.java | 35 ++++ .../saalfeldlab/n5/codec/ComposedCodec.java | 41 ++++ .../FixedLengthConvertedInputStream.java | 71 +++++++ .../FixedLengthConvertedOutputStream.java | 64 +++++++ .../n5/codec/FixedScaleOffsetCodec.java | 55 ++++++ .../saalfeldlab/n5/codec/IdentityCodec.java | 23 +++ .../saalfeldlab/n5/codec/AsTypeTests.java | 80 ++++++++ .../codec/FixedConvertedInputStreamTest.java | 88 +++++++++ .../codec/FixedConvertedOutputStreamTest.java | 111 +++++++++++ .../n5/codec/FixedScaleOffsetTests.java | 62 ++++++ 13 files changed, 874 insertions(+), 5 deletions(-) create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/codec/ComposedCodec.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/codec/FixedLengthConvertedInputStream.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/codec/FixedLengthConvertedOutputStream.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetCodec.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java create mode 100644 src/test/java/org/janelia/saalfeldlab/n5/codec/AsTypeTests.java create mode 100644 src/test/java/org/janelia/saalfeldlab/n5/codec/FixedConvertedInputStreamTest.java create mode 100644 src/test/java/org/janelia/saalfeldlab/n5/codec/FixedConvertedOutputStreamTest.java create mode 100644 src/test/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetTests.java diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java index f4aea9fe..5fa19fc5 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java @@ -29,6 +29,8 @@ import java.util.Arrays; import java.util.HashMap; +import org.janelia.saalfeldlab.n5.codec.Codec; + /** * Mandatory dataset attributes: * @@ -38,6 +40,11 @@ *
  • {@link DataType} : dataType
  • *
  • {@link Compression} : compression
  • * + * + * Optional dataset attributes: + *
      + *
    1. {@link Codec}[] : codecs
    2. + *
    * * @author Stephan Saalfeld * @@ -50,6 +57,7 @@ public class DatasetAttributes implements Serializable { public static final String BLOCK_SIZE_KEY = "blockSize"; public static final String DATA_TYPE_KEY = "dataType"; public static final String COMPRESSION_KEY = "compression"; + public static final String CODEC_KEY = "codecs"; /* version 0 */ protected static final String compressionTypeKey = "compressionType"; @@ -58,17 +66,29 @@ public class DatasetAttributes implements Serializable { private final int[] blockSize; private final DataType dataType; private final Compression compression; + private final Codec[] codecs; public DatasetAttributes( final long[] dimensions, final int[] blockSize, final DataType dataType, - final Compression compression) { + final Compression compression, + final Codec[] codecs ) { this.dimensions = dimensions; this.blockSize = blockSize; this.dataType = dataType; this.compression = compression; + this.codecs = codecs; + } + + public DatasetAttributes( + final long[] dimensions, + final int[] blockSize, + final DataType dataType, + final Compression compression) { + + this(dimensions, blockSize, dataType, compression, null); } public long[] getDimensions() { @@ -96,6 +116,11 @@ public DataType getDataType() { return dataType; } + public Codec[] getCodecs() { + + return codecs; + } + public HashMap asMap() { final HashMap map = new HashMap<>(); @@ -103,6 +128,7 @@ public HashMap asMap() { map.put(BLOCK_SIZE_KEY, blockSize); map.put(DATA_TYPE_KEY, dataType); map.put(COMPRESSION_KEY, compression); + map.put(CODEC_KEY, codecs); // TODO : consider not adding to map when null? return map; } @@ -113,6 +139,17 @@ static DatasetAttributes from( Compression compression, final String compressionVersion0Name) { + return from(dimensions, dataType, blockSize, compression, compressionVersion0Name, null); + } + + static DatasetAttributes from( + final long[] dimensions, + final DataType dataType, + int[] blockSize, + Compression compression, + final String compressionVersion0Name, + Codec[] codecs) { + if (blockSize == null) blockSize = Arrays.stream(dimensions).mapToInt(a -> (int)a).toArray(); @@ -137,6 +174,6 @@ static DatasetAttributes from( } } - return new DatasetAttributes(dimensions, blockSize, dataType, compression); + return new DatasetAttributes(dimensions, blockSize, dataType, compression, codecs); } } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java index 4cfd52be..867044db 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java @@ -34,6 +34,8 @@ import java.util.List; import java.util.Map; +import org.janelia.saalfeldlab.n5.codec.Codec; + /** * A simple structured container API for hierarchies of chunked * n-dimensional datasets and attributes. @@ -208,8 +210,30 @@ default void createDataset( /** * Creates a dataset. This does not create any data but the path and - * mandatory - * attributes only. + * mandatory attributes only. + * + * @param datasetPath dataset path + * @param dimensions the dataset dimensions + * @param blockSize the block size + * @param dataType the data type + * @param compression the compression + * @param codecs optional codecs (may be null) + * @throws N5Exception the exception + */ + default void createDataset( + final String datasetPath, + final long[] dimensions, + final int[] blockSize, + final DataType dataType, + final Compression compression, + final Codec[] codecs) throws N5Exception { + + createDataset(datasetPath, new DatasetAttributes(dimensions, blockSize, dataType, compression, codecs)); + } + + /** + * Creates a dataset. This does not create any data but the path and + * mandatory attributes only. * * @param datasetPath dataset path * @param dimensions the dataset dimensions @@ -225,7 +249,7 @@ default void createDataset( final DataType dataType, final Compression compression) throws N5Exception { - createDataset(datasetPath, new DatasetAttributes(dimensions, blockSize, dataType, compression)); + createDataset(datasetPath, new DatasetAttributes(dimensions, blockSize, dataType, compression, null)); } /** diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java new file mode 100644 index 00000000..78730849 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java @@ -0,0 +1,178 @@ +package org.janelia.saalfeldlab.n5.codec; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.util.function.BiConsumer; + +import org.janelia.saalfeldlab.n5.DataType; + + +public class AsTypeCodec implements Codec { + + private static final long serialVersionUID = 1031322606191894484L; + + protected transient final int numBytes; + protected transient final int numEncodedBytes; + + protected transient final BiConsumer encoder; + protected transient final BiConsumer decoder; + + protected final DataType type; + protected final DataType encodedType; + + public AsTypeCodec( DataType type, DataType encodedType ) + { + this.type = type; + this.encodedType = encodedType; + + numBytes = bytes(type); + numEncodedBytes = bytes(encodedType); + + // TODO fill this out + if (type == DataType.UINT8 && encodedType == DataType.UINT32) { + encoder = BYTE_TO_INT; + decoder = INT_TO_BYTE; + } else if (type == DataType.UINT32 && encodedType == DataType.UINT8) { + encoder = INT_TO_BYTE; + decoder = BYTE_TO_INT; + } else if (type == DataType.FLOAT64 && encodedType == DataType.INT8) { + encoder = DOUBLE_TO_BYTE; + decoder = BYTE_TO_DOUBLE; + } else if (type == DataType.FLOAT32 && encodedType == DataType.INT8) { + encoder = FLOAT_TO_BYTE; + decoder = BYTE_TO_FLOAT; + } else { + encoder = IDENTITY; + decoder = IDENTITY; + } + } + + @Override + public InputStream decode(InputStream in) throws IOException { + + return new FixedLengthConvertedInputStream(numEncodedBytes, numBytes, decoder, in); + } + + @Override + public OutputStream encode(OutputStream out) throws IOException { + + return new FixedLengthConvertedOutputStream(numBytes, numEncodedBytes, encoder, out); + } + + public static int bytes(DataType type) { + + switch (type) { + case UINT8: + case INT8: + return 1; + case UINT16: + case INT16: + return 2; + case UINT32: + case INT32: + case FLOAT32: + return 4; + case UINT64: + case INT64: + case FLOAT64: + return 8; + default: + return -1; + } + } + + public static final BiConsumer IDENTITY_ARR = (x, y) -> { + System.arraycopy(x, 0, y, 0, y.length); + }; + + public static final BiConsumer IDENTITY_ONE_ARR = (x, y) -> { + y[0] = x[0]; + }; + + public static final BiConsumer BYTE_TO_INT_ARR = (b, i) -> { + i[0] = 0; + i[1] = 0; + i[2] = 0; + i[3] = b[0]; + }; + + public static final BiConsumer INT_TO_BYTE_ARR = (i, b) -> { + b[0] = i[3]; + }; + + public static final BiConsumer INT_TO_FLOAT_ARR = (i, f) -> { + ByteBuffer.wrap(f).putFloat( + (float)ByteBuffer.wrap(i).getInt()); + }; + + public static final BiConsumer FLOAT_TO_INT_ARR = (f, i) -> { + ByteBuffer.wrap(i).putInt( + (int)ByteBuffer.wrap(f).getFloat()); + }; + + public static final BiConsumer INT_TO_DOUBLE_ARR = (i, f) -> { + ByteBuffer.wrap(f).putDouble( + (float)ByteBuffer.wrap(i).getInt()); + }; + + public static final BiConsumer DOUBLE_TO_INT_ARR = (f, i) -> { + ByteBuffer.wrap(i).putInt( + (int)ByteBuffer.wrap(f).getDouble()); + }; + + public static final BiConsumer IDENTITY = (x, y) -> { + for (int i = 0; i < y.capacity(); i++) + y.put(x.get()); + }; + + public static final BiConsumer IDENTITY_ONE = (x, y) -> { + y.put(x.get()); + }; + + public static final BiConsumer BYTE_TO_INT = (b, i) -> { + final byte zero = 0; + i.put(zero); + i.put(zero); + i.put(zero); + i.put(b.get()); + }; + + public static final BiConsumer INT_TO_BYTE = (i, b) -> { + b.put(i.get(3)); + }; + + public static final BiConsumer INT_TO_FLOAT = (i, f) -> { + f.putFloat((float)i.getInt()); + }; + + public static final BiConsumer FLOAT_TO_INT = (f, i) -> { + i.putInt((int)f.getFloat()); + }; + + public static final BiConsumer INT_TO_DOUBLE = (i, f) -> { + f.putDouble((float)i.getInt()); + }; + + public static final BiConsumer DOUBLE_TO_INT = (f, i) -> { + i.putInt((int)f.getDouble()); + }; + + public static final BiConsumer BYTE_TO_FLOAT = (b, f) -> { + f.putFloat((float)b.get()); + }; + + public static final BiConsumer FLOAT_TO_BYTE = (f, b) -> { + b.put((byte)f.getFloat()); + }; + + public static final BiConsumer BYTE_TO_DOUBLE = (b, d) -> { + d.putDouble((double)b.get()); + }; + + public static final BiConsumer DOUBLE_TO_BYTE = (d, b) -> { + b.put((byte)d.getDouble()); + }; + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java new file mode 100644 index 00000000..95865dd3 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java @@ -0,0 +1,35 @@ +package org.janelia.saalfeldlab.n5.codec; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.Serializable; + +/** + * Interface representing a filter can encode a {@link OutputStream}s when writing data, and decode + * the {@link InputStream}s when reading data. + * + * Modeled after Filters in + * Zarr. + */ +public interface Codec extends Serializable { + + /** + * Decode an {@link InputStream}. + * + * @param in + * input stream + * @return the decoded input stream + */ + public InputStream decode(InputStream in) throws IOException; + + /** + * Encode an {@link OutputStream}. + * + * @param out + * the output stream + * @return the encoded output stream + */ + public OutputStream encode(OutputStream out) throws IOException; + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/ComposedCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/ComposedCodec.java new file mode 100644 index 00000000..ecf38837 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/ComposedCodec.java @@ -0,0 +1,41 @@ +package org.janelia.saalfeldlab.n5.codec; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +/** + * A {@link Codec} that is composition of a collection of codecs. + */ +public class ComposedCodec implements Codec { + + private static final long serialVersionUID = 5068349140842235924L; + private final Codec[] filters; + + public ComposedCodec(final Codec... filters) { + + this.filters = filters; + } + + @Override + public InputStream decode(InputStream in) throws IOException { + + // DOCME : note that decoding is in reverse order + InputStream decoded = in; + for( int i = filters.length - 1; i >= 0; i-- ) + decoded = filters[i].decode(decoded); + + return decoded; + } + + @Override + public OutputStream encode(OutputStream out) throws IOException { + + OutputStream encoded = out; + for( int i = 0; i < filters.length; i++ ) + encoded = filters[i].encode(encoded); + + return encoded; + } + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedLengthConvertedInputStream.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedLengthConvertedInputStream.java new file mode 100644 index 00000000..f9d65a87 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedLengthConvertedInputStream.java @@ -0,0 +1,71 @@ +package org.janelia.saalfeldlab.n5.codec; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.util.function.BiConsumer; + +/* + * An {@link InputStream} that converts between two fixed-length types. + */ +public class FixedLengthConvertedInputStream extends InputStream { + + private final int numBytes; + private final int numBytesAfterDecoding; + + private final byte[] raw; + private final byte[] decoded; + + private final ByteBuffer rawBuffer; + private final ByteBuffer decodedBuffer; + + private final InputStream src; + + private BiConsumer converter; + + private int incrememntalBytesRead; + + public FixedLengthConvertedInputStream( + final int numBytes, + final int numBytesAfterDecoding, + BiConsumer converter, + final InputStream src ) { + + this.numBytes = numBytes; + this.numBytesAfterDecoding = numBytesAfterDecoding; + this.converter = converter; + + raw = new byte[numBytes]; + decoded = new byte[numBytesAfterDecoding]; + incrememntalBytesRead = 0; + + rawBuffer = ByteBuffer.wrap(raw); + decodedBuffer = ByteBuffer.wrap(decoded); + + this.src = src; + } + + @Override + public int read() throws IOException { + + // TODO not sure if this always reads enough bytes + // int n = src.read(toEncode); + if (incrememntalBytesRead == 0) { + + rawBuffer.rewind(); + decodedBuffer.rewind(); + + for (int i = 0; i < numBytes; i++) + raw[i] = (byte)src.read(); + + converter.accept(rawBuffer, decodedBuffer); + } + + final int out = decoded[incrememntalBytesRead++]; + if (incrememntalBytesRead == numBytesAfterDecoding) + incrememntalBytesRead = 0; + + return out; + } + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedLengthConvertedOutputStream.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedLengthConvertedOutputStream.java new file mode 100644 index 00000000..87544fc7 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedLengthConvertedOutputStream.java @@ -0,0 +1,64 @@ +package org.janelia.saalfeldlab.n5.codec; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.util.function.BiConsumer; + +/* + * An {@link OutputStream} that converts between two fixed-length types. + */ +public class FixedLengthConvertedOutputStream extends OutputStream { + + private final int numBytes; + + private final byte[] raw; + private final byte[] encoded; + + private final ByteBuffer rawBuffer; + private final ByteBuffer encodedBuffer; + + private final OutputStream src; + + private BiConsumer converter; + + private int incrememntalBytesWritten; + + public FixedLengthConvertedOutputStream( + final int numBytes, + final int numBytesAfterEncoding, + final BiConsumer converter, + final OutputStream src ) { + + this.numBytes = numBytes; + this.converter = converter; + + raw = new byte[numBytes]; + encoded = new byte[numBytesAfterEncoding]; + + rawBuffer = ByteBuffer.wrap(raw); + encodedBuffer = ByteBuffer.wrap(encoded); + + incrememntalBytesWritten = 0; + + this.src = src; + } + + @Override + public void write(int b) throws IOException { + + raw[incrememntalBytesWritten++] = (byte)b; + + // write out the encoded bytes after writing numBytes bytes + if (incrememntalBytesWritten == numBytes) { + + rawBuffer.rewind(); + encodedBuffer.rewind(); + + converter.accept(rawBuffer, encodedBuffer); + src.write(encoded); + incrememntalBytesWritten = 0; + } + } + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetCodec.java new file mode 100644 index 00000000..955be235 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetCodec.java @@ -0,0 +1,55 @@ +package org.janelia.saalfeldlab.n5.codec; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.util.function.BiConsumer; + +import org.janelia.saalfeldlab.n5.DataType; + +public class FixedScaleOffsetCodec extends AsTypeCodec { + + private static final long serialVersionUID = 8024945290803548528L; + + @SuppressWarnings("unused") + private final double scale; + + @SuppressWarnings("unused") + private final double offset; + + public transient final BiConsumer encoder; + public transient final BiConsumer decoder; + + public FixedScaleOffsetCodec(final double scale, final double offset, DataType type, DataType encodedType) { + + super(type, encodedType); + this.scale = scale; + this.offset = offset; + + encoder = (f, i) -> { + final double in = f.getDouble(); + final byte res = (byte)(scale * in + offset); + i.put((byte)(scale * in + offset)); + }; + + decoder = (i, f) -> { + final byte in = i.get(); + final double conv = (((double)in) - offset) / scale; + f.putDouble(conv); + }; + } + + @Override + public InputStream decode(InputStream in) throws IOException { + + return new FixedLengthConvertedInputStream(numEncodedBytes, numBytes, this.decoder, in); + } + + @Override + public OutputStream encode(OutputStream out) throws IOException { + + return new FixedLengthConvertedOutputStream(numBytes, numEncodedBytes, this.encoder, out); + } + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java new file mode 100644 index 00000000..83e3925c --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java @@ -0,0 +1,23 @@ +package org.janelia.saalfeldlab.n5.codec; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +public class IdentityCodec implements Codec { + + private static final long serialVersionUID = 8354269325800855621L; + + @Override + public InputStream decode(InputStream in) throws IOException { + + return in; + } + + @Override + public OutputStream encode(OutputStream out) throws IOException { + + return out; + } + +} diff --git a/src/test/java/org/janelia/saalfeldlab/n5/codec/AsTypeTests.java b/src/test/java/org/janelia/saalfeldlab/n5/codec/AsTypeTests.java new file mode 100644 index 00000000..8cd38d83 --- /dev/null +++ b/src/test/java/org/janelia/saalfeldlab/n5/codec/AsTypeTests.java @@ -0,0 +1,80 @@ +package org.janelia.saalfeldlab.n5.codec; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.util.stream.IntStream; + +import org.janelia.saalfeldlab.n5.DataType; +import org.janelia.saalfeldlab.n5.codec.AsTypeCodec; +import org.janelia.saalfeldlab.n5.codec.Codec; +import org.junit.Test; + +public class AsTypeTests { + + @Test + public void testInt2Byte() throws IOException { + + final int N = 16; + final int[] ints = IntStream.rangeClosed(0, N).toArray(); + final ByteBuffer encodedInts = ByteBuffer.allocate(Integer.BYTES * N); + final byte[] bytes = new byte[N]; + for (int i = 0; i < N; i++) { + + bytes[i] = (byte)ints[i]; + encodedInts.putInt(ints[i]); + } + + final AsTypeCodec int2Byte = new AsTypeCodec(DataType.UINT32, DataType.UINT8); + testEncoding( int2Byte, bytes, encodedInts.array()); + testDecoding( int2Byte, encodedInts.array(), bytes); + + final AsTypeCodec byte2Int = new AsTypeCodec(DataType.UINT8, DataType.UINT32); + testEncoding( byte2Int, encodedInts.array(), bytes); + testDecoding( byte2Int, bytes, encodedInts.array()); + } + + @Test + public void testDouble2Byte() throws IOException { + + final int N = 16; + final double[] doubles = new double[N]; + final byte[] bytes = new byte[N]; + final ByteBuffer encodedDoubles = ByteBuffer.allocate(Double.BYTES * N); + for (int i = 0; i < N; i++) { + doubles[i] = i; + encodedDoubles.putDouble(doubles[i]); + + bytes[i] = (byte)i; + } + + final AsTypeCodec double2Byte = new AsTypeCodec(DataType.FLOAT64, DataType.INT8); + testEncoding(double2Byte, bytes, encodedDoubles.array()); + testDecoding(double2Byte, encodedDoubles.array(), bytes); + } + + protected static void testDecoding( final Codec codec, final byte[] expected, final byte[] input ) throws IOException + { + final InputStream result = codec.decode(new ByteArrayInputStream(input)); + for (int i = 0; i < expected.length; i++) + assertEquals(expected[i], (byte)result.read()); + } + + protected static void testEncoding( final Codec codec, final byte[] expected, final byte[] data ) throws IOException + { + + final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(expected.length); + final OutputStream encodedStream = codec.encode(outputStream); + encodedStream.write(data); + encodedStream.flush(); + assertArrayEquals(expected, outputStream.toByteArray()); + encodedStream.close(); + } + +} diff --git a/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedConvertedInputStreamTest.java b/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedConvertedInputStreamTest.java new file mode 100644 index 00000000..e23ab07d --- /dev/null +++ b/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedConvertedInputStreamTest.java @@ -0,0 +1,88 @@ +package org.janelia.saalfeldlab.n5.codec; + +import static org.junit.Assert.assertEquals; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.stream.IntStream; + +import org.janelia.saalfeldlab.n5.codec.AsTypeCodec; +import org.janelia.saalfeldlab.n5.codec.FixedLengthConvertedInputStream; +import org.junit.Test; + +public class FixedConvertedInputStreamTest { + + @Test + public void testLengthOne() throws IOException + { + + final byte expected = 5; + final byte[] data = new byte[32]; + Arrays.fill(data, expected); + + final FixedLengthConvertedInputStream convertedId = new FixedLengthConvertedInputStream(1, 1, + AsTypeCodec.IDENTITY_ONE, + new ByteArrayInputStream(data)); + + final FixedLengthConvertedInputStream convertedPlusOne = new FixedLengthConvertedInputStream(1, 1, + (x, y) -> { + y.put((byte)(x.get() + 1)); + }, + new ByteArrayInputStream(data)); + + for (int i = 0; i < 32; i++) { + assertEquals(expected, convertedId.read()); + assertEquals(expected + 1, convertedPlusOne.read()); + } + + convertedId.close(); + convertedPlusOne.close(); + } + + @Test + public void testIntToByte() throws IOException + { + + final int N = 16; + final ByteBuffer buf = ByteBuffer.allocate(Integer.BYTES * N); + IntStream.range(0, N).forEach( x -> { + buf.putInt(x); + }); + + final byte[] data = buf.array(); + final FixedLengthConvertedInputStream intToByte = new FixedLengthConvertedInputStream( + 4, 1, + AsTypeCodec.INT_TO_BYTE, + new ByteArrayInputStream(data)); + + for( int i = 0; i < N; i++ ) + assertEquals((byte)i, intToByte.read()); + + intToByte.close(); + } + + @Test + public void testByteToInt() throws IOException + { + + final int N = 16; + final byte[] data = new byte[16]; + for( int i = 0; i < N; i++ ) + data[i] = (byte)i; + + final FixedLengthConvertedInputStream byteToInt = new FixedLengthConvertedInputStream( + 1, 4, AsTypeCodec.BYTE_TO_INT, + new ByteArrayInputStream(data)); + + final DataInputStream dataStream = new DataInputStream(byteToInt); + for( int i = 0; i < N; i++ ) + assertEquals(i, dataStream.readInt()); + + dataStream.close(); + byteToInt.close(); + } + +} diff --git a/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedConvertedOutputStreamTest.java b/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedConvertedOutputStreamTest.java new file mode 100644 index 00000000..a457dfb1 --- /dev/null +++ b/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedConvertedOutputStreamTest.java @@ -0,0 +1,111 @@ +package org.janelia.saalfeldlab.n5.codec; + +import static org.junit.Assert.assertArrayEquals; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.stream.IntStream; + +import org.janelia.saalfeldlab.n5.codec.AsTypeCodec; +import org.janelia.saalfeldlab.n5.codec.FixedLengthConvertedOutputStream; +import org.junit.Test; + +public class FixedConvertedOutputStreamTest { + + @Test + public void testLengthOne() throws IOException + { + final int N = 2; + final byte expected = 5; + final byte expectedPlusOne = 6; + final byte[] expectedData = new byte[N]; + Arrays.fill(expectedData, expected); + + final byte[] expectedPlusOneData = new byte[N]; + Arrays.fill(expectedPlusOneData, expectedPlusOne); + + final ByteArrayOutputStream outId = new ByteArrayOutputStream(N); + final FixedLengthConvertedOutputStream convertedId = new FixedLengthConvertedOutputStream(1, 1, + AsTypeCodec.IDENTITY_ONE, + outId); + + convertedId.write(expectedData); + convertedId.flush(); + convertedId.close(); + + assertArrayEquals(expectedData, outId.toByteArray()); + + + final ByteArrayOutputStream outPlusOne = new ByteArrayOutputStream(N); + final FixedLengthConvertedOutputStream convertedPlusOne = new FixedLengthConvertedOutputStream(1, 1, + (x, y) -> { + y.put((byte)(x.get() + 1)); + }, + outPlusOne); + + convertedPlusOne.write(expectedData); + convertedPlusOne.close(); + assertArrayEquals(expectedPlusOneData, outPlusOne.toByteArray()); + } + + @Test + public void testIntToByte() throws IOException + { + + final int N = 16; + final ByteBuffer buf = ByteBuffer.allocate(Integer.BYTES * N); + IntStream.range(0, N).forEach( x -> { + buf.putInt(x); + }); + + final ByteBuffer expected = ByteBuffer.allocate(N); + IntStream.range(0, N).forEach( x -> { + expected.put((byte)x); + }); + + final ByteArrayOutputStream outStream = new ByteArrayOutputStream(N); + final FixedLengthConvertedOutputStream intToByte = new FixedLengthConvertedOutputStream( + 4, 1, + AsTypeCodec.INT_TO_BYTE, + outStream); + + intToByte.write(buf.array()); + intToByte.close(); + + System.out.println(Arrays.toString(buf.array())); + System.out.println(Arrays.toString(expected.array())); + System.out.println(Arrays.toString(outStream.toByteArray())); +// +// assertArrayEquals(expected.array(), outStream.toByteArray()); + } +// +// @Test +// public void testByteToInt() throws IOException +// { +// +// final int N = 16; +// final byte[] data = new byte[16]; +// for( int i = 0; i < N; i++ ) +// data[i] = (byte)i; +// +// FixedLengthConvertedInputStream byteToInt = new FixedLengthConvertedInputStream( +// 1, 4, +// (x, y) -> { +// y[0] = 0; // the setting to zero is not strictly necessary in this case +// y[1] = 0; +// y[2] = 0; +// y[3] = x[0]; +// }, +// new ByteArrayInputStream(data)); +// +// final DataInputStream dataStream = new DataInputStream(byteToInt); +// for( int i = 0; i < N; i++ ) +// assertEquals(i, dataStream.readInt()); +// +// dataStream.close(); +// byteToInt.close(); +// } + +} diff --git a/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetTests.java b/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetTests.java new file mode 100644 index 00000000..098d8ece --- /dev/null +++ b/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetTests.java @@ -0,0 +1,62 @@ +package org.janelia.saalfeldlab.n5.codec; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.util.stream.DoubleStream; + +import org.janelia.saalfeldlab.n5.DataType; +import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.codec.FixedScaleOffsetCodec; +import org.junit.Test; + +public class FixedScaleOffsetTests { + + @Test + public void testDouble2Byte() throws IOException { + + final int N = 16; + final double[] doubles = DoubleStream.iterate(0.0, x -> x + 1).limit(N).toArray(); + final ByteBuffer encodedDoubles = ByteBuffer.allocate(Double.BYTES * N); + final byte[] bytes = new byte[N]; + + final double scale = 2; + final double offset = 1; + + for (int i = 0; i < N; i++) { + final double val = (scale * doubles[i] + offset); + bytes[i] = (byte)val; + encodedDoubles.putDouble(i); + } + + final FixedScaleOffsetCodec double2Byte = new FixedScaleOffsetCodec(scale, offset, DataType.FLOAT64, DataType.UINT8); + testEncoding(double2Byte, bytes, encodedDoubles.array()); + testDecoding(double2Byte, encodedDoubles.array(), bytes); + } + + protected static void testDecoding( final Codec codec, final byte[] expected, final byte[] input ) throws IOException + { + final InputStream result = codec.decode(new ByteArrayInputStream(input)); + for (int i = 0; i < expected.length; i++) + assertEquals(expected[i], (byte)result.read()); + } + + protected static void testEncoding( final Codec codec, final byte[] expected, final byte[] data ) throws IOException + { + + final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(expected.length); + final OutputStream encodedStream = codec.encode(outputStream); + encodedStream.write(data); + encodedStream.flush(); + final byte[] convertedArr = outputStream.toByteArray(); + assertArrayEquals( expected, outputStream.toByteArray()); + encodedStream.close(); + } + +} From 14d3b696ae9c399f4ff0cab45c1dcd77ab55a1e7 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Tue, 28 May 2024 15:23:22 -0400 Subject: [PATCH 002/124] feat(wip): reading and writing blocks uses codecs --- .../saalfeldlab/n5/Bzip2Compression.java | 16 +- .../janelia/saalfeldlab/n5/CodecAdapter.java | 92 +++++ .../janelia/saalfeldlab/n5/Compression.java | 64 ++++ .../saalfeldlab/n5/DatasetAttributes.java | 48 +-- .../saalfeldlab/n5/DefaultBlockReader.java | 52 +++ .../saalfeldlab/n5/DefaultBlockWriter.java | 51 +++ .../saalfeldlab/n5/GsonKeyValueN5Reader.java | 2 +- .../saalfeldlab/n5/GsonKeyValueN5Writer.java | 5 +- .../janelia/saalfeldlab/n5/GsonN5Reader.java | 5 +- .../org/janelia/saalfeldlab/n5/GsonUtils.java | 3 + .../saalfeldlab/n5/GzipCompression.java | 17 +- .../saalfeldlab/n5/Lz4Compression.java | 16 +- .../saalfeldlab/n5/RawCompression.java | 17 +- .../janelia/saalfeldlab/n5/XzCompression.java | 16 +- .../saalfeldlab/n5/codec/AsTypeCodec.java | 358 ++++++++++++++---- .../janelia/saalfeldlab/n5/codec/Codec.java | 2 + .../saalfeldlab/n5/codec/ComposedCodec.java | 14 +- .../n5/codec/FixedScaleOffsetCodec.java | 82 +++- .../saalfeldlab/n5/codec/IdentityCodec.java | 8 + .../saalfeldlab/n5/codec/AsTypeTests.java | 53 ++- .../codec/FixedConvertedInputStreamTest.java | 8 +- .../codec/FixedConvertedOutputStreamTest.java | 6 +- .../n5/codec/FixedScaleOffsetTests.java | 52 ++- 23 files changed, 789 insertions(+), 198 deletions(-) create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/CodecAdapter.java diff --git a/src/main/java/org/janelia/saalfeldlab/n5/Bzip2Compression.java b/src/main/java/org/janelia/saalfeldlab/n5/Bzip2Compression.java index 5d3d6161..0c40d01a 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/Bzip2Compression.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/Bzip2Compression.java @@ -52,17 +52,29 @@ public Bzip2Compression() { } @Override - public InputStream getInputStream(final InputStream in) throws IOException { + public InputStream decode(final InputStream in) throws IOException { return new BZip2CompressorInputStream(in); } @Override - public OutputStream getOutputStream(final OutputStream out) throws IOException { + public InputStream getInputStream(final InputStream in) throws IOException { + + return decode(in); + } + + @Override + public OutputStream encode(final OutputStream out) throws IOException { return new BZip2CompressorOutputStream(out, blockSize); } + @Override + public OutputStream getOutputStream(final OutputStream out) throws IOException { + + return encode(out); + } + @Override public Bzip2Compression getReader() { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/CodecAdapter.java b/src/main/java/org/janelia/saalfeldlab/n5/CodecAdapter.java new file mode 100644 index 00000000..0933433e --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/CodecAdapter.java @@ -0,0 +1,92 @@ +/** + * Copyright (c) 2017, Stephan Saalfeld + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +package org.janelia.saalfeldlab.n5; + +import java.lang.reflect.Type; + +import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.codec.FixedScaleOffsetCodec; + +import com.google.gson.JsonDeserializationContext; +import com.google.gson.JsonDeserializer; +import com.google.gson.JsonElement; +import com.google.gson.JsonNull; +import com.google.gson.JsonObject; +import com.google.gson.JsonParseException; +import com.google.gson.JsonSerializationContext; +import com.google.gson.JsonSerializer; + +public class CodecAdapter implements JsonDeserializer, JsonSerializer { + + @Override + public JsonElement serialize( + final Codec codec, + final Type typeOfSrc, + final JsonSerializationContext context) { + + if (codec.getId().equals(FixedScaleOffsetCodec.FIXED_SCALE_OFFSET_CODEC_ID)) { + final FixedScaleOffsetCodec c = (FixedScaleOffsetCodec)codec; + final JsonObject obj = new JsonObject(); + obj.addProperty("id", c.getId()); + obj.addProperty("scale", c.getScale()); + obj.addProperty("offset", c.getOffset()); + obj.addProperty("type", c.getType().toString().toLowerCase()); + obj.addProperty("encodedType", c.getEncodedType().toString().toLowerCase()); + return obj; + } + + return JsonNull.INSTANCE; + } + + @Override + public Codec deserialize( + final JsonElement json, + final Type typeOfT, + final JsonDeserializationContext context) throws JsonParseException { + + if (json == null) + return null; + else if (!json.isJsonObject()) + return null; + + final JsonObject jsonObject = json.getAsJsonObject(); + if (jsonObject.has("id")) { + + final String id = jsonObject.get("id").getAsString(); + if (id.equals(FixedScaleOffsetCodec.FIXED_SCALE_OFFSET_CODEC_ID)) { + + return new FixedScaleOffsetCodec( + jsonObject.get("scale").getAsDouble(), + jsonObject.get("offset").getAsDouble(), + DataType.valueOf(jsonObject.get("type").getAsString().toUpperCase()), + DataType.valueOf(jsonObject.get("encodedType").getAsString().toUpperCase())); + } + } + + return null; + } + +} \ No newline at end of file diff --git a/src/main/java/org/janelia/saalfeldlab/n5/Compression.java b/src/main/java/org/janelia/saalfeldlab/n5/Compression.java index df0ca49e..3092bbe4 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/Compression.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/Compression.java @@ -25,6 +25,9 @@ */ package org.janelia.saalfeldlab.n5; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.io.Serializable; import java.lang.annotation.ElementType; import java.lang.annotation.Inherited; @@ -32,6 +35,7 @@ import java.lang.annotation.RetentionPolicy; import java.lang.annotation.Target; +import org.janelia.saalfeldlab.n5.codec.Codec; import org.scijava.annotations.Indexable; /** @@ -41,6 +45,12 @@ */ public interface Compression extends Serializable { + // @Override + // public default String getId() { + // + // return getType(); + // } + /** * Annotation for runtime discovery of compression schemes. * @@ -72,7 +82,61 @@ public default String getType() { return compressionType.value(); } + public BlockReader getReader(); public BlockWriter getWriter(); + + /** + * Decode an {@link InputStream}. + * + * @param in + * input stream + * @return the decoded input stream + */ + public InputStream decode(InputStream in) throws IOException; + + /** + * Encode an {@link OutputStream}. + * + * @param out + * the output stream + * @return the encoded output stream + */ + public OutputStream encode(OutputStream out) throws IOException; + + public static Codec getCompressionAsCodec(Compression compression) { + + return new CompressionCodec(compression); + } + + public static class CompressionCodec implements Codec { + + private static final long serialVersionUID = -7931131454184340637L; + private Compression compression; + + public CompressionCodec(Compression compression) { + + this.compression = compression; + } + + @Override + public InputStream decode(InputStream in) throws IOException { + + return compression.decode(in); + } + + @Override + public OutputStream encode(OutputStream out) throws IOException { + + return compression.encode(out); + } + + @Override + public String getId() { + + return compression.getType(); + } + + } } \ No newline at end of file diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java index 5fa19fc5..9f67d0d0 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java @@ -1,28 +1,3 @@ -/** - * Copyright (c) 2017, Stephan Saalfeld - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ package org.janelia.saalfeldlab.n5; import java.io.Serializable; @@ -30,6 +5,7 @@ import java.util.HashMap; import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.codec.ComposedCodec; /** * Mandatory dataset attributes: @@ -40,7 +16,7 @@ *
  • {@link DataType} : dataType
  • *
  • {@link Compression} : compression
  • * - * + * * Optional dataset attributes: *
      *
    1. {@link Codec}[] : codecs
    2. @@ -121,6 +97,24 @@ public Codec[] getCodecs() { return codecs; } + public Codec collectCodecs() { + + final Codec compressionCodec = Compression.getCompressionAsCodec(compression); + + if (codecs == null || codecs.length == 0) + return compressionCodec; + else if (codecs.length == 1) + return new ComposedCodec(codecs[0], compressionCodec); + else { + final Codec[] codecsAndCompresor = new Codec[codecs.length + 1]; + for (int i = 0; i < codecs.length; i++) + codecsAndCompresor[i] = codecs[i]; + + codecsAndCompresor[codecs.length] = compressionCodec; + return new ComposedCodec(codecsAndCompresor); + } + } + public HashMap asMap() { final HashMap map = new HashMap<>(); @@ -128,7 +122,7 @@ public HashMap asMap() { map.put(BLOCK_SIZE_KEY, blockSize); map.put(DATA_TYPE_KEY, dataType); map.put(COMPRESSION_KEY, compression); - map.put(CODEC_KEY, codecs); // TODO : consider not adding to map when null? + map.put(CODEC_KEY, codecs); // TODO : consider not adding to map when null return map; } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java index 58c59780..4e585819 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java @@ -95,4 +95,56 @@ public static DataBlock readBlock( reader.read(dataBlock, in); return dataBlock; } + + /** + * Reads a {@link DataBlock} from an {@link InputStream}. + * + * @param in + * the input stream + * @param datasetAttributes + * the dataset attributes + * @param gridPosition + * the grid position + * @return the block + * @throws IOException + * the exception + */ + public static DataBlock readBlockWithCodecs( + final InputStream in, + final DatasetAttributes datasetAttributes, + final long[] gridPosition) throws IOException { + + final DataInputStream dis = new DataInputStream(in); + final short mode = dis.readShort(); + final int numElements; + final DataBlock dataBlock; + if (mode != 2) { + final int nDim = dis.readShort(); + final int[] blockSize = new int[nDim]; + for (int d = 0; d < nDim; ++d) + blockSize[d] = dis.readInt(); + if (mode == 0) { + numElements = DataBlock.getNumElements(blockSize); + } else { + numElements = dis.readInt(); + } + dataBlock = datasetAttributes.getDataType().createDataBlock(blockSize, gridPosition, numElements); + } else { + numElements = dis.readInt(); + dataBlock = datasetAttributes.getDataType().createDataBlock(null, gridPosition, numElements); + } + + readFromStream(dataBlock, datasetAttributes.collectCodecs().decode(in)); + return dataBlock; + } + + public static > void readFromStream(final B dataBlock, final InputStream in) throws IOException { + + final ByteBuffer buffer = dataBlock.toByteBuffer(); + final DataInputStream dis = new DataInputStream(in); + dis.readFully(buffer.array()); + dataBlock.readData(buffer); + } + + } \ No newline at end of file diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java index c53aae2d..c5d985f9 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java @@ -95,4 +95,55 @@ else if (dataBlock.getNumElements() == DataBlock.getNumElements(dataBlock.getSiz final BlockWriter writer = datasetAttributes.getCompression().getWriter(); writer.write(dataBlock, out); } + + /** + * Writes a {@link DataBlock} into an {@link OutputStream}. + * + * @param + * the type of data + * @param out + * the output stream + * @param datasetAttributes + * the dataset attributes + * @param dataBlock + * the data block the block data type + * @throws IOException + * the exception + */ + public static void writeBlockWithCodecs( + final OutputStream out, + final DatasetAttributes datasetAttributes, + final DataBlock dataBlock) throws IOException { + + final DataOutputStream dos = new DataOutputStream(out); + + final int mode; + if (datasetAttributes.getDataType() == DataType.OBJECT || dataBlock.getSize() == null) + mode = 2; + else if (dataBlock.getNumElements() == DataBlock.getNumElements(dataBlock.getSize())) + mode = 0; + else + mode = 1; + dos.writeShort(mode); + + if (mode != 2) { + dos.writeShort(datasetAttributes.getNumDimensions()); + for (final int size : dataBlock.getSize()) + dos.writeInt(size); + } + + if (mode != 0) + dos.writeInt(dataBlock.getNumElements()); + + try (final OutputStream encodedStream = datasetAttributes.collectCodecs().encode(out)) { + writeFromStream(dataBlock, encodedStream); + out.flush(); + } + } + + public static void writeFromStream(final DataBlock dataBlock, final OutputStream out) throws IOException { + + final ByteBuffer buffer = dataBlock.toByteBuffer(); + out.write(buffer.array()); + } } \ No newline at end of file diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java index 644aca51..76aea228 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java @@ -98,7 +98,7 @@ default DataBlock readBlock( return null; try (final LockedChannel lockedChannel = getKeyValueAccess().lockForReading(path)) { - return DefaultBlockReader.readBlock(lockedChannel.newInputStream(), datasetAttributes, gridPosition); + return DefaultBlockReader.readBlockWithCodecs(lockedChannel.newInputStream(), datasetAttributes, gridPosition); } catch (final IOException | UncheckedIOException e) { throw new N5IOException( "Failed to read block " + Arrays.toString(gridPosition) + " from dataset " + path, diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java index 38c754be..a784dcc6 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java @@ -31,13 +31,13 @@ import java.util.List; import java.util.Map; -import com.google.gson.JsonSyntaxException; import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; import com.google.gson.Gson; import com.google.gson.JsonElement; import com.google.gson.JsonNull; import com.google.gson.JsonObject; +import com.google.gson.JsonSyntaxException; /** * Default implementation of {@link N5Writer} with JSON attributes parsed with @@ -218,7 +218,8 @@ default void writeBlock( final String blockPath = absoluteDataBlockPath(N5URI.normalizeGroupPath(path), dataBlock.getGridPosition()); try (final LockedChannel lock = getKeyValueAccess().lockForWriting(blockPath)) { - DefaultBlockWriter.writeBlock(lock.newOutputStream(), datasetAttributes, dataBlock); + DefaultBlockWriter.writeBlockWithCodecs(lock.newOutputStream(), datasetAttributes, dataBlock); + // DefaultBlockWriter.writeBlock(lock.newOutputStream(), datasetAttributes, dataBlock); } catch (final IOException | UncheckedIOException e) { throw new N5IOException( "Failed to write block " + Arrays.toString(dataBlock.getGridPosition()) + " into dataset " + path, diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonN5Reader.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonN5Reader.java index be16ed08..ea7ea878 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonN5Reader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonN5Reader.java @@ -28,6 +28,8 @@ import java.lang.reflect.Type; import java.util.Map; +import org.janelia.saalfeldlab.n5.codec.Codec; + import com.google.gson.Gson; import com.google.gson.JsonElement; import com.google.gson.JsonSyntaxException; @@ -69,13 +71,14 @@ default DatasetAttributes createDatasetAttributes(final JsonElement attributes) final int[] blockSize = GsonUtils.readAttribute(attributes, DatasetAttributes.BLOCK_SIZE_KEY, int[].class, getGson()); final Compression compression = GsonUtils.readAttribute(attributes, DatasetAttributes.COMPRESSION_KEY, Compression.class, getGson()); + final Codec[] codecs = GsonUtils.readAttribute(attributes, DatasetAttributes.CODEC_KEY, Codec[].class, getGson()); /* version 0 */ final String compressionVersion0Name = compression == null ? GsonUtils.readAttribute(attributes, DatasetAttributes.compressionTypeKey, String.class, getGson()) : null; - return DatasetAttributes.from(dimensions, dataType, blockSize, compression, compressionVersion0Name); + return DatasetAttributes.from(dimensions, dataType, blockSize, compression, compressionVersion0Name, codecs); } catch (JsonSyntaxException | NumberFormatException | ClassCastException e) { /* We cannot create a dataset, so return null. */ return null; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonUtils.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonUtils.java index d72307c0..f7b988de 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonUtils.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonUtils.java @@ -34,6 +34,8 @@ import java.util.Map; import java.util.regex.Matcher; +import org.janelia.saalfeldlab.n5.codec.Codec; + import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.google.gson.JsonArray; @@ -54,6 +56,7 @@ static Gson registerGson(final GsonBuilder gsonBuilder) { gsonBuilder.registerTypeAdapter(DataType.class, new DataType.JsonAdapter()); gsonBuilder.registerTypeHierarchyAdapter(Compression.class, CompressionAdapter.getJsonAdapter()); + gsonBuilder.registerTypeHierarchyAdapter(Codec.class, new CodecAdapter()); gsonBuilder.disableHtmlEscaping(); return gsonBuilder.create(); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GzipCompression.java b/src/main/java/org/janelia/saalfeldlab/n5/GzipCompression.java index b691a6d3..0b6d734d 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GzipCompression.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GzipCompression.java @@ -68,7 +68,7 @@ public GzipCompression(final int level, final boolean useZlib) { } @Override - public InputStream getInputStream(final InputStream in) throws IOException { + public InputStream decode(InputStream in) throws IOException { if (useZlib) { return new InflaterInputStream(in); @@ -78,7 +78,13 @@ public InputStream getInputStream(final InputStream in) throws IOException { } @Override - public OutputStream getOutputStream(final OutputStream out) throws IOException { + public InputStream getInputStream(final InputStream in) throws IOException { + + return decode(in); + } + + @Override + public OutputStream encode(OutputStream out) throws IOException { if (useZlib) { return new DeflaterOutputStream(out, new Deflater(level)); @@ -88,6 +94,12 @@ public OutputStream getOutputStream(final OutputStream out) throws IOException { } } + @Override + public OutputStream getOutputStream(final OutputStream out) throws IOException { + + return encode(out); + } + @Override public GzipCompression getReader() { @@ -116,4 +128,5 @@ public boolean equals(final Object other) { return useZlib == gz.useZlib && level == gz.level; } } + } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/Lz4Compression.java b/src/main/java/org/janelia/saalfeldlab/n5/Lz4Compression.java index d76e4fe5..0ba88e12 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/Lz4Compression.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/Lz4Compression.java @@ -53,17 +53,29 @@ public Lz4Compression() { } @Override - public InputStream getInputStream(final InputStream in) throws IOException { + public InputStream decode(final InputStream in) throws IOException { return new LZ4BlockInputStream(in); } @Override - public OutputStream getOutputStream(final OutputStream out) throws IOException { + public InputStream getInputStream(final InputStream in) throws IOException { + + return decode(in); + } + + @Override + public OutputStream encode(final OutputStream out) throws IOException { return new LZ4BlockOutputStream(out, blockSize); } + @Override + public OutputStream getOutputStream(final OutputStream out) throws IOException { + + return encode(out); + } + @Override public Lz4Compression getReader() { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/RawCompression.java b/src/main/java/org/janelia/saalfeldlab/n5/RawCompression.java index ffa674fc..7d1327b0 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/RawCompression.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/RawCompression.java @@ -37,17 +37,30 @@ public class RawCompression implements DefaultBlockReader, DefaultBlockWriter, C private static final long serialVersionUID = 7526445806847086477L; @Override - public InputStream getInputStream(final InputStream in) throws IOException { + public InputStream decode(final InputStream in) throws IOException { return in; } @Override - public OutputStream getOutputStream(final OutputStream out) throws IOException { + public InputStream getInputStream(final InputStream in) throws IOException { + + return decode(in); + } + + + @Override + public OutputStream encode(final OutputStream out) throws IOException { return out; } + @Override + public OutputStream getOutputStream(final OutputStream out) throws IOException { + + return encode(out); + } + @Override public RawCompression getReader() { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/XzCompression.java b/src/main/java/org/janelia/saalfeldlab/n5/XzCompression.java index 5204e799..ed73b7d5 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/XzCompression.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/XzCompression.java @@ -52,17 +52,29 @@ public XzCompression() { } @Override - public InputStream getInputStream(final InputStream in) throws IOException { + public InputStream decode(final InputStream in) throws IOException { return new XZCompressorInputStream(in); } @Override - public OutputStream getOutputStream(final OutputStream out) throws IOException { + public InputStream getInputStream(final InputStream in) throws IOException { + + return decode(in); + } + + @Override + public OutputStream encode(final OutputStream out) throws IOException { return new XZCompressorOutputStream(out, preset); } + @Override + public OutputStream getOutputStream(final OutputStream out) throws IOException { + + return encode(out); + } + @Override public XzCompression getReader() { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java index 78730849..d4852d4a 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java @@ -22,6 +22,8 @@ public class AsTypeCodec implements Codec { protected final DataType type; protected final DataType encodedType; + protected final String id = "astype"; + public AsTypeCodec( DataType type, DataType encodedType ) { this.type = type; @@ -30,23 +32,14 @@ public AsTypeCodec( DataType type, DataType encodedType ) numBytes = bytes(type); numEncodedBytes = bytes(encodedType); - // TODO fill this out - if (type == DataType.UINT8 && encodedType == DataType.UINT32) { - encoder = BYTE_TO_INT; - decoder = INT_TO_BYTE; - } else if (type == DataType.UINT32 && encodedType == DataType.UINT8) { - encoder = INT_TO_BYTE; - decoder = BYTE_TO_INT; - } else if (type == DataType.FLOAT64 && encodedType == DataType.INT8) { - encoder = DOUBLE_TO_BYTE; - decoder = BYTE_TO_DOUBLE; - } else if (type == DataType.FLOAT32 && encodedType == DataType.INT8) { - encoder = FLOAT_TO_BYTE; - decoder = BYTE_TO_FLOAT; - } else { - encoder = IDENTITY; - decoder = IDENTITY; - } + encoder = converter(type, encodedType); + decoder = converter(encodedType, type); + } + + @Override + public String getId() { + + return id; } @Override @@ -83,96 +76,301 @@ public static int bytes(DataType type) { } } - public static final BiConsumer IDENTITY_ARR = (x, y) -> { - System.arraycopy(x, 0, y, 0, y.length); - }; - - public static final BiConsumer IDENTITY_ONE_ARR = (x, y) -> { - y[0] = x[0]; - }; + public static BiConsumer converter(final DataType from, final DataType to) { + + // // TODO fill this out + + if (from == to) + return AsTypeCodec::IDENTITY; + else if (from == DataType.INT8) { + + if( to == DataType.INT16 ) + return AsTypeCodec::BYTE_TO_SHORT; + else if( to == DataType.INT32 ) + return AsTypeCodec::BYTE_TO_INT; + else if( to == DataType.INT64 ) + return AsTypeCodec::BYTE_TO_LONG; + else if( to == DataType.FLOAT32 ) + return AsTypeCodec::BYTE_TO_FLOAT; + else if( to == DataType.FLOAT64 ) + return AsTypeCodec::BYTE_TO_DOUBLE; + + } else if (from == DataType.INT16) { + + if (to == DataType.INT8) + return AsTypeCodec::SHORT_TO_BYTE; + else if (to == DataType.INT32) + return AsTypeCodec::SHORT_TO_INT; + else if (to == DataType.INT64) + return AsTypeCodec::SHORT_TO_LONG; + else if (to == DataType.FLOAT32) + return AsTypeCodec::SHORT_TO_FLOAT; + else if (to == DataType.FLOAT64) + return AsTypeCodec::SHORT_TO_DOUBLE; + + } else if (from == DataType.INT32) { + + if (to == DataType.INT8) + return AsTypeCodec::INT_TO_BYTE; + else if (to == DataType.INT16) + return AsTypeCodec::INT_TO_SHORT; + if (to == DataType.INT8) + return AsTypeCodec::DOUBLE_TO_BYTE; + else if (to == DataType.INT16) + return AsTypeCodec::DOUBLE_TO_SHORT; + else if (to == DataType.INT32) + return AsTypeCodec::DOUBLE_TO_INT; + else if (to == DataType.INT64) + return AsTypeCodec::DOUBLE_TO_LONG; + else if (to == DataType.FLOAT32) + return AsTypeCodec::DOUBLE_TO_FLOAT; + else if (to == DataType.INT64) + return AsTypeCodec::INT_TO_LONG; + else if (to == DataType.FLOAT32) + return AsTypeCodec::INT_TO_FLOAT; + else if (to == DataType.FLOAT64) + return AsTypeCodec::INT_TO_DOUBLE; + + } else if (from == DataType.INT64) { + + if (to == DataType.INT8) + return AsTypeCodec::LONG_TO_BYTE; + else if (to == DataType.INT16) + return AsTypeCodec::LONG_TO_SHORT; + else if (to == DataType.INT32) + return AsTypeCodec::LONG_TO_INT; + else if (to == DataType.FLOAT32) + return AsTypeCodec::LONG_TO_FLOAT; + else if (to == DataType.FLOAT64) + return AsTypeCodec::LONG_TO_DOUBLE; + + } else if (from == DataType.FLOAT32) { + + if (to == DataType.INT8) + return AsTypeCodec::FLOAT_TO_BYTE; + else if (to == DataType.INT16) + return AsTypeCodec::FLOAT_TO_SHORT; + else if (to == DataType.INT32) + return AsTypeCodec::FLOAT_TO_INT; + else if (to == DataType.INT64) + return AsTypeCodec::FLOAT_TO_LONG; + else if (to == DataType.FLOAT64) + return AsTypeCodec::FLOAT_TO_DOUBLE; + + } else if (from == DataType.FLOAT64) { + + if (to == DataType.INT8) + return AsTypeCodec::DOUBLE_TO_BYTE; + else if (to == DataType.INT16) + return AsTypeCodec::DOUBLE_TO_SHORT; + else if (to == DataType.INT32) + return AsTypeCodec::DOUBLE_TO_INT; + else if (to == DataType.INT64) + return AsTypeCodec::DOUBLE_TO_LONG; + else if (to == DataType.FLOAT32) + return AsTypeCodec::DOUBLE_TO_FLOAT; + } - public static final BiConsumer BYTE_TO_INT_ARR = (b, i) -> { - i[0] = 0; - i[1] = 0; - i[2] = 0; - i[3] = b[0]; - }; + return AsTypeCodec::IDENTITY; + } - public static final BiConsumer INT_TO_BYTE_ARR = (i, b) -> { - b[0] = i[3]; - }; + public static final void IDENTITY(final ByteBuffer x, final ByteBuffer y) { - public static final BiConsumer INT_TO_FLOAT_ARR = (i, f) -> { - ByteBuffer.wrap(f).putFloat( - (float)ByteBuffer.wrap(i).getInt()); - }; + for (int i = 0; i < y.capacity(); i++) + y.put(x.get()); + } - public static final BiConsumer FLOAT_TO_INT_ARR = (f, i) -> { - ByteBuffer.wrap(i).putInt( - (int)ByteBuffer.wrap(f).getFloat()); - }; + public static final void IDENTITY_ONE(final ByteBuffer x, final ByteBuffer y) { - public static final BiConsumer INT_TO_DOUBLE_ARR = (i, f) -> { - ByteBuffer.wrap(f).putDouble( - (float)ByteBuffer.wrap(i).getInt()); - }; + y.put(x.get()); + } - public static final BiConsumer DOUBLE_TO_INT_ARR = (f, i) -> { - ByteBuffer.wrap(i).putInt( - (int)ByteBuffer.wrap(f).getDouble()); - }; + public static final void BYTE_TO_SHORT(final ByteBuffer b, final ByteBuffer s) { - public static final BiConsumer IDENTITY = (x, y) -> { - for (int i = 0; i < y.capacity(); i++) - y.put(x.get()); - }; + final byte zero = 0; + s.put(zero); + s.put(b.get()); + } - public static final BiConsumer IDENTITY_ONE = (x, y) -> { - y.put(x.get()); - }; + public static final void BYTE_TO_INT(final ByteBuffer b, final ByteBuffer i) { - public static final BiConsumer BYTE_TO_INT = (b, i) -> { final byte zero = 0; i.put(zero); i.put(zero); i.put(zero); i.put(b.get()); - }; + } + + public static final void BYTE_TO_LONG(final ByteBuffer b, final ByteBuffer l) { + + final byte zero = 0; + l.put(zero); + l.put(zero); + l.put(zero); + l.put(zero); + l.put(zero); + l.put(zero); + l.put(zero); + l.put(b.get()); + } + + public static final void BYTE_TO_FLOAT(final ByteBuffer b, final ByteBuffer f) { + + f.putFloat((float)b.get()); + } + + public static final void BYTE_TO_DOUBLE(final ByteBuffer b, final ByteBuffer d) { + + d.putDouble((double)b.get()); + } + + public static final void SHORT_TO_BYTE(final ByteBuffer s, final ByteBuffer b) { + + final byte zero = 0; + b.put(zero); + b.put(s.get()); + } + + public static final void SHORT_TO_INT(final ByteBuffer s, final ByteBuffer i) { + + final byte zero = 0; + i.put(zero); + i.put(zero); + i.put(s.get()); + i.put(s.get()); + } + + public static final void SHORT_TO_LONG(final ByteBuffer s, final ByteBuffer l) { + + final byte zero = 0; + l.put(zero); + l.put(zero); + l.put(zero); + l.put(zero); + l.put(zero); + l.put(zero); + l.put(s.get()); + l.put(s.get()); + } + + public static final void SHORT_TO_FLOAT(final ByteBuffer s, final ByteBuffer f) { + + f.putFloat((float)s.getShort()); + } + + public static final void SHORT_TO_DOUBLE(final ByteBuffer s, final ByteBuffer d) { + + d.putDouble((double)s.getShort()); + } + + public static final void INT_TO_BYTE(final ByteBuffer i, final ByteBuffer b) { - public static final BiConsumer INT_TO_BYTE = (i, b) -> { b.put(i.get(3)); - }; + } + + public static final void INT_TO_SHORT(final ByteBuffer i, final ByteBuffer s) { + + s.put(i.get(2)); + s.put(i.get(3)); + } + + public static final void INT_TO_LONG(final ByteBuffer i, final ByteBuffer l) { + + final byte zero = 0; + l.put(zero); + l.put(zero); + l.put(zero); + l.put(zero); + l.put(i.get()); + l.put(i.get()); + l.put(i.get()); + l.put(i.get()); + } + + public static final void INT_TO_FLOAT(final ByteBuffer i, final ByteBuffer f) { - public static final BiConsumer INT_TO_FLOAT = (i, f) -> { f.putFloat((float)i.getInt()); - }; + } - public static final BiConsumer FLOAT_TO_INT = (f, i) -> { - i.putInt((int)f.getFloat()); - }; + public static final void INT_TO_DOUBLE(final ByteBuffer i, final ByteBuffer f) { - public static final BiConsumer INT_TO_DOUBLE = (i, f) -> { f.putDouble((float)i.getInt()); - }; + } - public static final BiConsumer DOUBLE_TO_INT = (f, i) -> { - i.putInt((int)f.getDouble()); - }; + public static final void LONG_TO_BYTE(final ByteBuffer l, final ByteBuffer b) { - public static final BiConsumer BYTE_TO_FLOAT = (b, f) -> { - f.putFloat((float)b.get()); - }; + b.put((byte)l.getLong()); + } + + public static final void LONG_TO_SHORT(final ByteBuffer l, final ByteBuffer s) { + + s.putShort((short)l.getLong()); + } + + public static final void LONG_TO_INT(final ByteBuffer l, final ByteBuffer i) { + + i.putInt((int)l.getLong()); + } + + public static final void LONG_TO_FLOAT(final ByteBuffer l, final ByteBuffer f) { + + f.putFloat((float)l.getLong()); + } + + public static final void LONG_TO_DOUBLE(final ByteBuffer l, final ByteBuffer f) { + + f.putDouble((float)l.getLong()); + } + + public static final void FLOAT_TO_BYTE(final ByteBuffer f, final ByteBuffer b) { - public static final BiConsumer FLOAT_TO_BYTE = (f, b) -> { b.put((byte)f.getFloat()); - }; + } - public static final BiConsumer BYTE_TO_DOUBLE = (b, d) -> { - d.putDouble((double)b.get()); - }; + public static final void FLOAT_TO_SHORT(final ByteBuffer f, final ByteBuffer s) { + + s.putShort((short)f.getFloat()); + } + + public static final void FLOAT_TO_INT(final ByteBuffer f, final ByteBuffer i) { + + i.putInt((int)f.getFloat()); + } + + public static final void FLOAT_TO_LONG(final ByteBuffer f, final ByteBuffer l) { + + l.putLong((long)f.getFloat()); + } + + public static final void FLOAT_TO_DOUBLE(final ByteBuffer f, final ByteBuffer d) { + + d.putDouble((double)f.getFloat()); + } + + public static final void DOUBLE_TO_BYTE(final ByteBuffer d, final ByteBuffer b) { - public static final BiConsumer DOUBLE_TO_BYTE = (d, b) -> { b.put((byte)d.getDouble()); - }; + } + + public static final void DOUBLE_TO_SHORT(final ByteBuffer d, final ByteBuffer s) { + + s.putShort((short)d.getDouble()); + } + + public static final void DOUBLE_TO_INT(final ByteBuffer d, final ByteBuffer i) { + + i.putInt((int)d.getDouble()); + } + + public static final void DOUBLE_TO_LONG(final ByteBuffer d, final ByteBuffer l) { + + l.putLong((long)d.getDouble()); + } + + public static final void DOUBLE_TO_FLOAT(final ByteBuffer d, final ByteBuffer f) { + + f.putFloat((float)d.getDouble()); + } + } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java index 95865dd3..a2e5373f 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java @@ -32,4 +32,6 @@ public interface Codec extends Serializable { */ public OutputStream encode(OutputStream out) throws IOException; + public String getId(); + } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/ComposedCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/ComposedCodec.java index ecf38837..49e6f923 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/ComposedCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/ComposedCodec.java @@ -12,17 +12,25 @@ public class ComposedCodec implements Codec { private static final long serialVersionUID = 5068349140842235924L; private final Codec[] filters; + protected String id = "composed"; + public ComposedCodec(final Codec... filters) { this.filters = filters; } + @Override + public String getId() { + + return id; + } + @Override public InputStream decode(InputStream in) throws IOException { - // DOCME : note that decoding is in reverse order + // note that decoding is in reverse order InputStream decoded = in; - for( int i = filters.length - 1; i >= 0; i-- ) + for (int i = filters.length - 1; i >= 0; i--) decoded = filters[i].decode(decoded); return decoded; @@ -32,7 +40,7 @@ public InputStream decode(InputStream in) throws IOException { public OutputStream encode(OutputStream out) throws IOException { OutputStream encoded = out; - for( int i = 0; i < filters.length; i++ ) + for (int i = 0; i < filters.length; i++) encoded = filters[i].encode(encoded); return encoded; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetCodec.java index 955be235..34b06ecd 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetCodec.java @@ -12,14 +12,25 @@ public class FixedScaleOffsetCodec extends AsTypeCodec { private static final long serialVersionUID = 8024945290803548528L; + public static transient final String FIXED_SCALE_OFFSET_CODEC_ID = "fixedscaleoffset"; + @SuppressWarnings("unused") private final double scale; @SuppressWarnings("unused") private final double offset; + protected final String id = FIXED_SCALE_OFFSET_CODEC_ID; + + private transient final ByteBuffer tmpEncoder; + private transient final ByteBuffer tmpDecoder; + public transient final BiConsumer encoder; + public transient final BiConsumer encoderPre; + public transient final BiConsumer encoderPost; public transient final BiConsumer decoder; + public transient final BiConsumer decoderPre; + public transient final BiConsumer decoderPost; public FixedScaleOffsetCodec(final double scale, final double offset, DataType type, DataType encodedType) { @@ -27,19 +38,74 @@ public FixedScaleOffsetCodec(final double scale, final double offset, DataType t this.scale = scale; this.offset = offset; - encoder = (f, i) -> { - final double in = f.getDouble(); - final byte res = (byte)(scale * in + offset); - i.put((byte)(scale * in + offset)); + tmpEncoder = ByteBuffer.wrap(new byte[Double.BYTES]); + tmpDecoder = ByteBuffer.wrap(new byte[Double.BYTES]); + + // encoder goes from type to encoded type + encoderPre = converter(type, DataType.FLOAT64); + encoderPost = converter(DataType.FLOAT64, encodedType); + + // decoder goes from encoded type to type + decoderPre = converter(encodedType, DataType.FLOAT64); + decoderPost = converter(DataType.FLOAT64, type); + + // convert from i type to double, apply scale and offset, then convert to type o + encoder = (i, o) -> { + tmpEncoder.rewind(); + encoderPre.accept(i, tmpEncoder); + tmpEncoder.rewind(); + final double x = tmpEncoder.getDouble(); + final double y = scale * x + offset; + System.out.println("encode: " + y); + tmpEncoder.rewind(); + tmpEncoder.putDouble(scale * x + offset); + tmpEncoder.rewind(); + encoderPost.accept(tmpEncoder, o); }; - decoder = (i, f) -> { - final byte in = i.get(); - final double conv = (((double)in) - offset) / scale; - f.putDouble(conv); + // convert from i type to double, apply scale and offset, then convert to type o + decoder = (i, o) -> { + // System.out.println("decode"); + // System.out.println(i.capacity()); + // System.out.println(tmpDecoder.capacity()); + // System.out.println(o.capacity()); + tmpDecoder.rewind(); + decoderPre.accept(i, tmpDecoder); + tmpDecoder.rewind(); + final double x = tmpDecoder.getDouble(); + tmpDecoder.rewind(); + tmpDecoder.putDouble((x - offset) / scale); + tmpDecoder.rewind(); + decoderPost.accept(tmpDecoder, o); }; } + public double getScale() { + + return scale; + } + + public double getOffset() { + + return offset; + } + + public DataType getType() { + + return super.type; + } + + public DataType getEncodedType() { + + return encodedType; + } + + @Override + public String getId() { + + return id; + } + @Override public InputStream decode(InputStream in) throws IOException { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java index 83e3925c..5164c547 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java @@ -8,6 +8,14 @@ public class IdentityCodec implements Codec { private static final long serialVersionUID = 8354269325800855621L; + protected final String id = "id"; + + @Override + public String getId() { + + return id; + } + @Override public InputStream decode(InputStream in) throws IOException { diff --git a/src/test/java/org/janelia/saalfeldlab/n5/codec/AsTypeTests.java b/src/test/java/org/janelia/saalfeldlab/n5/codec/AsTypeTests.java index 8cd38d83..9aeeaebc 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/codec/AsTypeTests.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/codec/AsTypeTests.java @@ -9,11 +9,8 @@ import java.io.InputStream; import java.io.OutputStream; import java.nio.ByteBuffer; -import java.util.stream.IntStream; import org.janelia.saalfeldlab.n5.DataType; -import org.janelia.saalfeldlab.n5.codec.AsTypeCodec; -import org.janelia.saalfeldlab.n5.codec.Codec; import org.junit.Test; public class AsTypeTests { @@ -22,52 +19,48 @@ public class AsTypeTests { public void testInt2Byte() throws IOException { final int N = 16; - final int[] ints = IntStream.rangeClosed(0, N).toArray(); - final ByteBuffer encodedInts = ByteBuffer.allocate(Integer.BYTES * N); - final byte[] bytes = new byte[N]; + final ByteBuffer intsAsBuffer = ByteBuffer.allocate(Integer.BYTES * N); + final byte[] encodedBytes = new byte[N]; for (int i = 0; i < N; i++) { - - bytes[i] = (byte)ints[i]; - encodedInts.putInt(ints[i]); + intsAsBuffer.putInt(i); + encodedBytes[i] = (byte)i; } - final AsTypeCodec int2Byte = new AsTypeCodec(DataType.UINT32, DataType.UINT8); - testEncoding( int2Byte, bytes, encodedInts.array()); - testDecoding( int2Byte, encodedInts.array(), bytes); - - final AsTypeCodec byte2Int = new AsTypeCodec(DataType.UINT8, DataType.UINT32); - testEncoding( byte2Int, encodedInts.array(), bytes); - testDecoding( byte2Int, bytes, encodedInts.array()); + final byte[] decodedInts = intsAsBuffer.array(); + testEncodingAndDecoding(new AsTypeCodec(DataType.INT32, DataType.INT8), encodedBytes, decodedInts); + testEncodingAndDecoding(new AsTypeCodec(DataType.INT8, DataType.INT32), decodedInts, encodedBytes); } @Test public void testDouble2Byte() throws IOException { final int N = 16; - final double[] doubles = new double[N]; - final byte[] bytes = new byte[N]; - final ByteBuffer encodedDoubles = ByteBuffer.allocate(Double.BYTES * N); + final ByteBuffer doublesAsBuffer = ByteBuffer.allocate(Double.BYTES * N); + final byte[] encodedBytes = new byte[N]; for (int i = 0; i < N; i++) { - doubles[i] = i; - encodedDoubles.putDouble(doubles[i]); - - bytes[i] = (byte)i; + doublesAsBuffer.putDouble(i); + encodedBytes[i] = (byte)i; } + final byte[] decodedDoubles = doublesAsBuffer.array(); - final AsTypeCodec double2Byte = new AsTypeCodec(DataType.FLOAT64, DataType.INT8); - testEncoding(double2Byte, bytes, encodedDoubles.array()); - testDecoding(double2Byte, encodedDoubles.array(), bytes); + testEncodingAndDecoding(new AsTypeCodec(DataType.FLOAT64, DataType.INT8), encodedBytes, decodedDoubles); + testEncodingAndDecoding(new AsTypeCodec(DataType.INT8, DataType.FLOAT64), decodedDoubles, encodedBytes); } - protected static void testDecoding( final Codec codec, final byte[] expected, final byte[] input ) throws IOException - { + public static void testEncodingAndDecoding(Codec codec, byte[] encodedBytes, byte[] decodedBytes) throws IOException { + + testEncoding(codec, encodedBytes, decodedBytes); + testDecoding(codec, decodedBytes, encodedBytes); + } + + public static void testDecoding(final Codec codec, final byte[] expected, final byte[] input) throws IOException { + final InputStream result = codec.decode(new ByteArrayInputStream(input)); for (int i = 0; i < expected.length; i++) assertEquals(expected[i], (byte)result.read()); } - protected static void testEncoding( final Codec codec, final byte[] expected, final byte[] data ) throws IOException - { + public static void testEncoding(final Codec codec, final byte[] expected, final byte[] data) throws IOException { final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(expected.length); final OutputStream encodedStream = codec.encode(outputStream); diff --git a/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedConvertedInputStreamTest.java b/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedConvertedInputStreamTest.java index e23ab07d..27c744fa 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedConvertedInputStreamTest.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedConvertedInputStreamTest.java @@ -9,8 +9,6 @@ import java.util.Arrays; import java.util.stream.IntStream; -import org.janelia.saalfeldlab.n5.codec.AsTypeCodec; -import org.janelia.saalfeldlab.n5.codec.FixedLengthConvertedInputStream; import org.junit.Test; public class FixedConvertedInputStreamTest { @@ -24,7 +22,7 @@ public void testLengthOne() throws IOException Arrays.fill(data, expected); final FixedLengthConvertedInputStream convertedId = new FixedLengthConvertedInputStream(1, 1, - AsTypeCodec.IDENTITY_ONE, + AsTypeCodec::IDENTITY_ONE, new ByteArrayInputStream(data)); final FixedLengthConvertedInputStream convertedPlusOne = new FixedLengthConvertedInputStream(1, 1, @@ -55,7 +53,7 @@ public void testIntToByte() throws IOException final byte[] data = buf.array(); final FixedLengthConvertedInputStream intToByte = new FixedLengthConvertedInputStream( 4, 1, - AsTypeCodec.INT_TO_BYTE, + AsTypeCodec::INT_TO_BYTE, new ByteArrayInputStream(data)); for( int i = 0; i < N; i++ ) @@ -74,7 +72,7 @@ public void testByteToInt() throws IOException data[i] = (byte)i; final FixedLengthConvertedInputStream byteToInt = new FixedLengthConvertedInputStream( - 1, 4, AsTypeCodec.BYTE_TO_INT, + 1, 4, AsTypeCodec::BYTE_TO_INT, new ByteArrayInputStream(data)); final DataInputStream dataStream = new DataInputStream(byteToInt); diff --git a/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedConvertedOutputStreamTest.java b/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedConvertedOutputStreamTest.java index a457dfb1..1035e271 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedConvertedOutputStreamTest.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedConvertedOutputStreamTest.java @@ -8,8 +8,6 @@ import java.util.Arrays; import java.util.stream.IntStream; -import org.janelia.saalfeldlab.n5.codec.AsTypeCodec; -import org.janelia.saalfeldlab.n5.codec.FixedLengthConvertedOutputStream; import org.junit.Test; public class FixedConvertedOutputStreamTest { @@ -28,7 +26,7 @@ public void testLengthOne() throws IOException final ByteArrayOutputStream outId = new ByteArrayOutputStream(N); final FixedLengthConvertedOutputStream convertedId = new FixedLengthConvertedOutputStream(1, 1, - AsTypeCodec.IDENTITY_ONE, + AsTypeCodec::IDENTITY_ONE, outId); convertedId.write(expectedData); @@ -68,7 +66,7 @@ public void testIntToByte() throws IOException final ByteArrayOutputStream outStream = new ByteArrayOutputStream(N); final FixedLengthConvertedOutputStream intToByte = new FixedLengthConvertedOutputStream( 4, 1, - AsTypeCodec.INT_TO_BYTE, + AsTypeCodec::INT_TO_BYTE, outStream); intToByte.write(buf.array()); diff --git a/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetTests.java b/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetTests.java index 098d8ece..6de67690 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetTests.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetTests.java @@ -1,19 +1,10 @@ package org.janelia.saalfeldlab.n5.codec; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; import java.nio.ByteBuffer; import java.util.stream.DoubleStream; import org.janelia.saalfeldlab.n5.DataType; -import org.janelia.saalfeldlab.n5.codec.Codec; -import org.janelia.saalfeldlab.n5.codec.FixedScaleOffsetCodec; import org.junit.Test; public class FixedScaleOffsetTests { @@ -35,28 +26,35 @@ public void testDouble2Byte() throws IOException { encodedDoubles.putDouble(i); } - final FixedScaleOffsetCodec double2Byte = new FixedScaleOffsetCodec(scale, offset, DataType.FLOAT64, DataType.UINT8); - testEncoding(double2Byte, bytes, encodedDoubles.array()); - testDecoding(double2Byte, encodedDoubles.array(), bytes); + final FixedScaleOffsetCodec double2Byte = new FixedScaleOffsetCodec(scale, offset, DataType.FLOAT64, DataType.INT8); + AsTypeTests.testEncoding(double2Byte, bytes, encodedDoubles.array()); + AsTypeTests.testDecoding(double2Byte, encodedDoubles.array(), bytes); } - protected static void testDecoding( final Codec codec, final byte[] expected, final byte[] input ) throws IOException - { - final InputStream result = codec.decode(new ByteArrayInputStream(input)); - for (int i = 0; i < expected.length; i++) - assertEquals(expected[i], (byte)result.read()); - } + @Test + public void testLong2Short() throws IOException { - protected static void testEncoding( final Codec codec, final byte[] expected, final byte[] data ) throws IOException - { + final int N = 16; + final ByteBuffer encodedLongs = ByteBuffer.allocate(Double.BYTES * N); + final ByteBuffer encodedShorts = ByteBuffer.allocate(Short.BYTES * N); + + final long scale = 2; + final long offset = 1; - final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(expected.length); - final OutputStream encodedStream = codec.encode(outputStream); - encodedStream.write(data); - encodedStream.flush(); - final byte[] convertedArr = outputStream.toByteArray(); - assertArrayEquals( expected, outputStream.toByteArray()); - encodedStream.close(); + for (int i = 0; i < N; i++) { + final long val = (scale * i + offset); + encodedShorts.putShort((short)val); + encodedLongs.putLong(i); + } + + final byte[] shortBytes = encodedShorts.array(); + final byte[] longBytes = encodedLongs.array(); + + final FixedScaleOffsetCodec long2short = new FixedScaleOffsetCodec(scale, offset, DataType.INT64, DataType.INT16); + AsTypeTests.testEncoding(long2short, shortBytes, longBytes); + AsTypeTests.testDecoding(long2short, longBytes, shortBytes); } + + } From adb84bb7c3dcd282a1e5dae2cc06ce89db0144d3 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Thu, 18 Jul 2024 17:54:21 -0400 Subject: [PATCH 003/124] feat: add ChecksumCodec * DeterministicSizeCodec and Crc32cChecksumCodec --- .../n5/codec/DeterministicSizeCodec.java | 13 ++++ .../n5/codec/checksum/ChecksumCodec.java | 73 +++++++++++++++++++ .../codec/checksum/ChecksumInputStream.java | 50 +++++++++++++ .../codec/checksum/ChecksumOutputStream.java | 33 +++++++++ .../codec/checksum/Crc32cChecksumCodec.java | 41 +++++++++++ 5 files changed, 210 insertions(+) create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/codec/DeterministicSizeCodec.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumCodec.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumInputStream.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumOutputStream.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/Crc32cChecksumCodec.java diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/DeterministicSizeCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/DeterministicSizeCodec.java new file mode 100644 index 00000000..9ac0a1fe --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/DeterministicSizeCodec.java @@ -0,0 +1,13 @@ +package org.janelia.saalfeldlab.n5.codec; + +/** + * A {@link Codec} that can deterministically determine the size of encoded data from the size of the raw data and vice versa from the data length alone (i.e. encoding is data + * independent). + */ +public interface DeterministicSizeCodec extends Codec { + + public abstract long encodedSize(long size); + + public abstract long decodedSize(long size); + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumCodec.java new file mode 100644 index 00000000..a6bd6a84 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumCodec.java @@ -0,0 +1,73 @@ +package org.janelia.saalfeldlab.n5.codec.checksum; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.zip.Checksum; + +import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; + +/** + * A {@link Codec} that appends a checksum to data when encoding and can validate against that checksum when decoding. + */ +public abstract class ChecksumCodec implements DeterministicSizeCodec { + + private static final long serialVersionUID = 3141427377277375077L; + + private int numChecksumBytes; + + private Checksum checksum; + + public ChecksumCodec(Checksum checksum, int numChecksumBytes) { + + this.checksum = checksum; + this.numChecksumBytes = numChecksumBytes; + } + + public Checksum getChecksum() { + + return checksum; + } + + public int numChecksumBytes() { + + return numChecksumBytes; + } + + @Override + public ChecksumInputStream decode(final InputStream in) throws IOException { + + // TODO get the correct expected checksum + // TODO write a test with nested checksum codecs + + // has to know the number of it needs to read? + return new ChecksumInputStream(getChecksum(), in); + } + + @Override + public ChecksumOutputStream encode(final OutputStream out) throws IOException { + + // when do we validate + return new ChecksumOutputStream(getChecksum(), out); + } + + @Override + public long encodedSize(final long size) { + + return size + numChecksumBytes(); + } + + @Override + public long decodedSize(final long size) { + + return size - numChecksumBytes(); + } + + public boolean validate() { + + // TODO implement + // does validate go here or in ChecksumOutputStream + return true; + } + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumInputStream.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumInputStream.java new file mode 100644 index 00000000..af748819 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumInputStream.java @@ -0,0 +1,50 @@ +package org.janelia.saalfeldlab.n5.codec.checksum; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.util.zip.Checksum; + +public class ChecksumInputStream extends InputStream { + + private final Checksum checksum; + // private final long expected; + private InputStream in; + + public ChecksumInputStream(Checksum checksum, InputStream in) { + + // this needs to know how many bytes are in its checksum + // Maybe pass the codec here instead of the checksum + this.checksum = checksum; + this.in = in; + } + + @Override + public int read() throws IOException { + + // returns -1 if end of the stream is reached + final int b = in.read(); + checksum.update(b); + return b; + } + + protected long readChecksum() throws IOException { + + final byte[] checksum = new byte[getChecksumSize()]; + in.read(checksum); + return ByteBuffer.wrap(checksum).getLong(); + } + + public boolean validate() throws IOException { + + // TODO consider reading N more bytes (the checksum) + // set expected to that, then validate + return readChecksum() == checksum.getValue(); + } + + public int getChecksumSize() { + + return -1; + } + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumOutputStream.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumOutputStream.java new file mode 100644 index 00000000..4fac8f62 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumOutputStream.java @@ -0,0 +1,33 @@ +package org.janelia.saalfeldlab.n5.codec.checksum; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.util.zip.Checksum; + +public class ChecksumOutputStream extends OutputStream { + + private final Checksum checksum; + private OutputStream out; + + public ChecksumOutputStream(Checksum checksum, OutputStream out) { + + this.checksum = checksum; + this.out = out; + } + + @Override + public void write(int b) throws IOException { + + checksum.update(b); + out.write(b); + } + + public void finish() throws IOException { + + final ByteBuffer buf = ByteBuffer.allocate(8); + buf.asLongBuffer().put(checksum.getValue()); + out.write(buf.array()); + } + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/Crc32cChecksumCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/Crc32cChecksumCodec.java new file mode 100644 index 00000000..51171510 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/Crc32cChecksumCodec.java @@ -0,0 +1,41 @@ +package org.janelia.saalfeldlab.n5.codec.checksum; + +import java.util.zip.CRC32; + +public class Crc32cChecksumCodec extends ChecksumCodec { + + private static final long serialVersionUID = 7424151868725442500L; + + public static transient final String CRC32C_CHECKSUM_CODEC_ID = "crc32c"; + + public Crc32cChecksumCodec() { + + super(new CRC32(), 4); + } + + @Override + public String getId() { + + return CRC32C_CHECKSUM_CODEC_ID; + } + + @Override + public long encodedSize(final long size) { + + return size + numChecksumBytes(); + } + + @Override + public long decodedSize(final long size) { + + return size - numChecksumBytes(); + } + + @Override + public boolean validate() { + + // TODO implement me + return true; + } + +} From c54bdb98b9e8270b86c7bcf3fc644bca79c30edf Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Fri, 19 Jul 2024 10:09:41 -0400 Subject: [PATCH 004/124] wip: use CheckedInput/Output streams from java.util.zip * move some functionality to ChecksumCodec --- .../n5/codec/checksum/ChecksumCodec.java | 58 ++++++++++++++++--- .../codec/checksum/ChecksumInputStream.java | 50 ---------------- .../codec/checksum/ChecksumOutputStream.java | 33 ----------- .../codec/checksum/Crc32cChecksumCodec.java | 8 ++- 4 files changed, 54 insertions(+), 95 deletions(-) delete mode 100644 src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumInputStream.java delete mode 100644 src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumOutputStream.java diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumCodec.java index a6bd6a84..4b30265b 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumCodec.java @@ -3,6 +3,9 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.util.zip.CheckedInputStream; +import java.util.zip.CheckedOutputStream; import java.util.zip.Checksum; import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; @@ -35,20 +38,39 @@ public int numChecksumBytes() { } @Override - public ChecksumInputStream decode(final InputStream in) throws IOException { + public CheckedInputStream decode(final InputStream in) throws IOException { // TODO get the correct expected checksum // TODO write a test with nested checksum codecs // has to know the number of it needs to read? - return new ChecksumInputStream(getChecksum(), in); + return new CheckedInputStream(in, getChecksum()); } @Override - public ChecksumOutputStream encode(final OutputStream out) throws IOException { + public CheckedOutputStream encode(final OutputStream out) throws IOException { - // when do we validate - return new ChecksumOutputStream(getChecksum(), out); + // when do we validate? + return new CheckedOutputStream(out, getChecksum()); + } + + public void encode(final OutputStream out, ByteBuffer buffer) throws IOException { + + final CheckedOutputStream cout = new CheckedOutputStream(out, getChecksum()); + cout.write(buffer.array()); + writeChecksum(out); + } + + public ByteBuffer decodeAndValidate(final InputStream in, int numBytes) throws IOException { + + final CheckedInputStream cin = decode(in); + final byte[] data = new byte[numBytes]; + cin.read(data); + + if (!valid(in)) + throw new IOException("Invalid checksum"); + + return ByteBuffer.wrap(data); } @Override @@ -63,11 +85,29 @@ public long decodedSize(final long size) { return size - numChecksumBytes(); } - public boolean validate() { + protected boolean valid(InputStream in) throws IOException { - // TODO implement - // does validate go here or in ChecksumOutputStream - return true; + return readChecksum(in) == getChecksum().getValue(); } + protected long readChecksum(InputStream in) throws IOException { + + final byte[] checksum = new byte[numChecksumBytes()]; + in.read(checksum); + return ByteBuffer.wrap(checksum).getLong(); + } + + /** + * Return the value of the checksum as a {@link ByteBuffer} to be serialized. + * + * @return a ByteBuffer representing the checksum value + */ + public abstract ByteBuffer getChecksumValue(); + + public void writeChecksum(OutputStream out) throws IOException { + + out.write(getChecksumValue().array()); + } + + } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumInputStream.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumInputStream.java deleted file mode 100644 index af748819..00000000 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumInputStream.java +++ /dev/null @@ -1,50 +0,0 @@ -package org.janelia.saalfeldlab.n5.codec.checksum; - -import java.io.IOException; -import java.io.InputStream; -import java.nio.ByteBuffer; -import java.util.zip.Checksum; - -public class ChecksumInputStream extends InputStream { - - private final Checksum checksum; - // private final long expected; - private InputStream in; - - public ChecksumInputStream(Checksum checksum, InputStream in) { - - // this needs to know how many bytes are in its checksum - // Maybe pass the codec here instead of the checksum - this.checksum = checksum; - this.in = in; - } - - @Override - public int read() throws IOException { - - // returns -1 if end of the stream is reached - final int b = in.read(); - checksum.update(b); - return b; - } - - protected long readChecksum() throws IOException { - - final byte[] checksum = new byte[getChecksumSize()]; - in.read(checksum); - return ByteBuffer.wrap(checksum).getLong(); - } - - public boolean validate() throws IOException { - - // TODO consider reading N more bytes (the checksum) - // set expected to that, then validate - return readChecksum() == checksum.getValue(); - } - - public int getChecksumSize() { - - return -1; - } - -} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumOutputStream.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumOutputStream.java deleted file mode 100644 index 4fac8f62..00000000 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumOutputStream.java +++ /dev/null @@ -1,33 +0,0 @@ -package org.janelia.saalfeldlab.n5.codec.checksum; - -import java.io.IOException; -import java.io.OutputStream; -import java.nio.ByteBuffer; -import java.util.zip.Checksum; - -public class ChecksumOutputStream extends OutputStream { - - private final Checksum checksum; - private OutputStream out; - - public ChecksumOutputStream(Checksum checksum, OutputStream out) { - - this.checksum = checksum; - this.out = out; - } - - @Override - public void write(int b) throws IOException { - - checksum.update(b); - out.write(b); - } - - public void finish() throws IOException { - - final ByteBuffer buf = ByteBuffer.allocate(8); - buf.asLongBuffer().put(checksum.getValue()); - out.write(buf.array()); - } - -} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/Crc32cChecksumCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/Crc32cChecksumCodec.java index 51171510..bab39e41 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/Crc32cChecksumCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/Crc32cChecksumCodec.java @@ -1,5 +1,6 @@ package org.janelia.saalfeldlab.n5.codec.checksum; +import java.nio.ByteBuffer; import java.util.zip.CRC32; public class Crc32cChecksumCodec extends ChecksumCodec { @@ -32,10 +33,11 @@ public long decodedSize(final long size) { } @Override - public boolean validate() { + public ByteBuffer getChecksumValue() { - // TODO implement me - return true; + final ByteBuffer buf = ByteBuffer.allocate(numChecksumBytes()); + buf.putInt((int)getChecksum().getValue()); + return buf; } } From e756f8e18b18013f1cc442963bf42548779cd73e Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Fri, 19 Jul 2024 10:24:56 -0400 Subject: [PATCH 005/124] wip: add ChecksumException --- .../saalfeldlab/n5/codec/checksum/ChecksumCodec.java | 4 ++-- .../n5/codec/checksum/ChecksumException.java | 12 ++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumException.java diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumCodec.java index 4b30265b..84555dd3 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumCodec.java @@ -61,14 +61,14 @@ public void encode(final OutputStream out, ByteBuffer buffer) throws IOException writeChecksum(out); } - public ByteBuffer decodeAndValidate(final InputStream in, int numBytes) throws IOException { + public ByteBuffer decodeAndValidate(final InputStream in, int numBytes) throws IOException, ChecksumException { final CheckedInputStream cin = decode(in); final byte[] data = new byte[numBytes]; cin.read(data); if (!valid(in)) - throw new IOException("Invalid checksum"); + throw new ChecksumException("Invalid checksum"); return ByteBuffer.wrap(data); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumException.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumException.java new file mode 100644 index 00000000..034343c4 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumException.java @@ -0,0 +1,12 @@ +package org.janelia.saalfeldlab.n5.codec.checksum; + +public class ChecksumException extends Exception { + + private static final long serialVersionUID = 905130066386622561L; + + public ChecksumException(final String message) { + + super(message); + } + +} From 5a821593f31b1b82377834c56ea5ac8888c88992 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Tue, 23 Jul 2024 13:35:14 -0400 Subject: [PATCH 006/124] refactor: codec getId to getName * to match zarr v3 --- .../org/janelia/saalfeldlab/n5/CodecAdapter.java | 8 ++++---- .../org/janelia/saalfeldlab/n5/Compression.java | 2 +- .../janelia/saalfeldlab/n5/codec/AsTypeCodec.java | 6 +++--- .../org/janelia/saalfeldlab/n5/codec/Codec.java | 2 +- .../saalfeldlab/n5/codec/ComposedCodec.java | 6 +++--- .../n5/codec/FixedScaleOffsetCodec.java | 14 +++----------- .../saalfeldlab/n5/codec/IdentityCodec.java | 6 +++--- .../n5/codec/checksum/Crc32cChecksumCodec.java | 6 ++++-- 8 files changed, 22 insertions(+), 28 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/CodecAdapter.java b/src/main/java/org/janelia/saalfeldlab/n5/CodecAdapter.java index 0933433e..0390d9a3 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/CodecAdapter.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/CodecAdapter.java @@ -47,10 +47,10 @@ public JsonElement serialize( final Type typeOfSrc, final JsonSerializationContext context) { - if (codec.getId().equals(FixedScaleOffsetCodec.FIXED_SCALE_OFFSET_CODEC_ID)) { + if (codec.getName().equals(FixedScaleOffsetCodec.FIXED_SCALE_OFFSET_CODEC_ID)) { final FixedScaleOffsetCodec c = (FixedScaleOffsetCodec)codec; final JsonObject obj = new JsonObject(); - obj.addProperty("id", c.getId()); + obj.addProperty("name", c.getName()); obj.addProperty("scale", c.getScale()); obj.addProperty("offset", c.getOffset()); obj.addProperty("type", c.getType().toString().toLowerCase()); @@ -73,9 +73,9 @@ else if (!json.isJsonObject()) return null; final JsonObject jsonObject = json.getAsJsonObject(); - if (jsonObject.has("id")) { + if (jsonObject.has("name")) { - final String id = jsonObject.get("id").getAsString(); + final String id = jsonObject.get("name").getAsString(); if (id.equals(FixedScaleOffsetCodec.FIXED_SCALE_OFFSET_CODEC_ID)) { return new FixedScaleOffsetCodec( diff --git a/src/main/java/org/janelia/saalfeldlab/n5/Compression.java b/src/main/java/org/janelia/saalfeldlab/n5/Compression.java index 3092bbe4..a6ca1be0 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/Compression.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/Compression.java @@ -133,7 +133,7 @@ public OutputStream encode(OutputStream out) throws IOException { } @Override - public String getId() { + public String getName() { return compression.getType(); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java index d4852d4a..f7bf9945 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java @@ -22,7 +22,7 @@ public class AsTypeCodec implements Codec { protected final DataType type; protected final DataType encodedType; - protected final String id = "astype"; + protected final String name = "astype"; public AsTypeCodec( DataType type, DataType encodedType ) { @@ -37,9 +37,9 @@ public AsTypeCodec( DataType type, DataType encodedType ) } @Override - public String getId() { + public String getName() { - return id; + return name; } @Override diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java index a2e5373f..b3225b7e 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java @@ -32,6 +32,6 @@ public interface Codec extends Serializable { */ public OutputStream encode(OutputStream out) throws IOException; - public String getId(); + public String getName(); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/ComposedCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/ComposedCodec.java index 49e6f923..10f8adb0 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/ComposedCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/ComposedCodec.java @@ -12,7 +12,7 @@ public class ComposedCodec implements Codec { private static final long serialVersionUID = 5068349140842235924L; private final Codec[] filters; - protected String id = "composed"; + protected String name = "composed"; public ComposedCodec(final Codec... filters) { @@ -20,9 +20,9 @@ public ComposedCodec(final Codec... filters) { } @Override - public String getId() { + public String getName() { - return id; + return name; } @Override diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetCodec.java index 34b06ecd..0538498d 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetCodec.java @@ -14,13 +14,11 @@ public class FixedScaleOffsetCodec extends AsTypeCodec { public static transient final String FIXED_SCALE_OFFSET_CODEC_ID = "fixedscaleoffset"; - @SuppressWarnings("unused") private final double scale; - @SuppressWarnings("unused") private final double offset; - protected final String id = FIXED_SCALE_OFFSET_CODEC_ID; + protected final String name = FIXED_SCALE_OFFSET_CODEC_ID; private transient final ByteBuffer tmpEncoder; private transient final ByteBuffer tmpDecoder; @@ -55,8 +53,6 @@ public FixedScaleOffsetCodec(final double scale, final double offset, DataType t encoderPre.accept(i, tmpEncoder); tmpEncoder.rewind(); final double x = tmpEncoder.getDouble(); - final double y = scale * x + offset; - System.out.println("encode: " + y); tmpEncoder.rewind(); tmpEncoder.putDouble(scale * x + offset); tmpEncoder.rewind(); @@ -65,10 +61,6 @@ public FixedScaleOffsetCodec(final double scale, final double offset, DataType t // convert from i type to double, apply scale and offset, then convert to type o decoder = (i, o) -> { - // System.out.println("decode"); - // System.out.println(i.capacity()); - // System.out.println(tmpDecoder.capacity()); - // System.out.println(o.capacity()); tmpDecoder.rewind(); decoderPre.accept(i, tmpDecoder); tmpDecoder.rewind(); @@ -101,9 +93,9 @@ public DataType getEncodedType() { } @Override - public String getId() { + public String getName() { - return id; + return name; } @Override diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java index 5164c547..4383669a 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java @@ -8,12 +8,12 @@ public class IdentityCodec implements Codec { private static final long serialVersionUID = 8354269325800855621L; - protected final String id = "id"; + protected final String name = "id"; @Override - public String getId() { + public String getName() { - return id; + return name; } @Override diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/Crc32cChecksumCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/Crc32cChecksumCodec.java index bab39e41..0a16d435 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/Crc32cChecksumCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/Crc32cChecksumCodec.java @@ -9,15 +9,17 @@ public class Crc32cChecksumCodec extends ChecksumCodec { public static transient final String CRC32C_CHECKSUM_CODEC_ID = "crc32c"; + private final String name = CRC32C_CHECKSUM_CODEC_ID; + public Crc32cChecksumCodec() { super(new CRC32(), 4); } @Override - public String getId() { + public String getName() { - return CRC32C_CHECKSUM_CODEC_ID; + return name; } @Override From 1cd307c42879dbda6a7ce7b8d73443dee776eeec Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Tue, 23 Jul 2024 13:43:55 -0400 Subject: [PATCH 007/124] feat: add BytesCodec --- .../saalfeldlab/n5/codec/BytesCodec.java | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java new file mode 100644 index 00000000..65cff6a2 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java @@ -0,0 +1,21 @@ +package org.janelia.saalfeldlab.n5.codec; + + +public class BytesCodec extends IdentityCodec { + + private static final long serialVersionUID = 3523505403978222360L; + + public static final String ID = "bytes"; + + protected final String name = ID; + + protected final String endian = "little"; + + // TODO implement me + + @Override + public String getName() { + + return name; + } +} From 72cd669af0ecfc8e122b3b7b7d8c9f77ef6b1363 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Tue, 23 Jul 2024 13:44:16 -0400 Subject: [PATCH 008/124] feat: add ShardingCodec --- .../saalfeldlab/n5/shard/ShardingCodec.java | 86 ++++++++++++++++++ .../n5/shard/ShardingConfiguration.java | 89 +++++++++++++++++++ 2 files changed, 175 insertions(+) create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingConfiguration.java diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java new file mode 100644 index 00000000..9f9f978a --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java @@ -0,0 +1,86 @@ +package org.janelia.saalfeldlab.n5.shard; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.lang.reflect.Type; + +import org.janelia.saalfeldlab.n5.codec.Codec; + +import com.google.gson.JsonDeserializationContext; +import com.google.gson.JsonDeserializer; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonParseException; +import com.google.gson.JsonSerializationContext; +import com.google.gson.JsonSerializer; + +public class ShardingCodec implements Codec { + + private static final long serialVersionUID = -5879797314954717810L; + + public static final String ID = "sharding_indexed"; + + private final ShardingConfiguration configuration; + + private final String name = ID; + + public ShardingCodec(ShardingConfiguration configuration) { + + this.configuration = configuration; + } + + public ShardingConfiguration getConfiguration() { + + return configuration; + } + + @Override + public InputStream decode(InputStream in) throws IOException { + + // TODO Auto-generated method stub + return in; + } + + @Override + public OutputStream encode(OutputStream out) throws IOException { + + // TODO Auto-generated method stub + return out; + } + + @Override + public String getName() { + + return name; + } + + public static boolean isShardingCodec(final Codec codec) { + + return codec instanceof ShardingCodec; + } + + // public static void TypeAd + public static class ShardingCodecAdapter implements JsonDeserializer, JsonSerializer { + + @Override + public JsonElement serialize(ShardingCodec src, Type typeOfSrc, JsonSerializationContext context) { + + final JsonObject jsonObj = new JsonObject(); + + jsonObj.addProperty("name", ShardingCodec.ID); + // context.serialize(typeOfSrc); + + return jsonObj; + } + + @Override + public ShardingCodec deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) + throws JsonParseException { + + return null; + } + + } + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingConfiguration.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingConfiguration.java new file mode 100644 index 00000000..2a29a116 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingConfiguration.java @@ -0,0 +1,89 @@ +package org.janelia.saalfeldlab.n5.shard; + +import java.lang.reflect.Type; +import java.util.Arrays; + +import org.janelia.saalfeldlab.n5.codec.Codec; + +import com.google.gson.JsonDeserializationContext; +import com.google.gson.JsonDeserializer; +import com.google.gson.JsonElement; +import com.google.gson.JsonNull; +import com.google.gson.JsonObject; +import com.google.gson.JsonParseException; +import com.google.gson.JsonSerializationContext; +import com.google.gson.JsonSerializer; + +public class ShardingConfiguration { + + public static final String CHUNK_SHAPE_KEY = "chunk_shape"; + public static final String INDEX_LOCATION_KEY = "index_location"; + public static final String CODECS_KEY = "codecs"; + public static final String INDEX_CODECS_KEY = "index_codecs"; + + public static enum IndexLocation { + start, end + }; + + protected int[] blockSize; + protected Codec[] codecs; + protected Codec[] indexCodecs; + protected IndexLocation indexLocation; + + public ShardingConfiguration(final int[] blockSize, final Codec[] codecs, final Codec[] indexCodecs, + final IndexLocation indexLocation) { + + this.blockSize = blockSize; + this.codecs = codecs; + this.indexCodecs = indexCodecs; + this.indexLocation = indexLocation; + } + + public int[] getBlockSize() { + + return blockSize; + } + + public boolean areIndexesAtStart() { + + return indexLocation == IndexLocation.start; + } + + public static class ShardingConfigurationAdapter + implements JsonDeserializer, JsonSerializer { + + @Override + public JsonElement serialize(ShardingConfiguration src, Type typeOfSrc, JsonSerializationContext context) { + + if( anyShardingCodecs(src.codecs) || anyShardingCodecs(src.indexCodecs)) + return JsonNull.INSTANCE; + + final JsonObject jsonObj = new JsonObject(); + jsonObj.add(CHUNK_SHAPE_KEY, context.serialize(src.blockSize)); + jsonObj.add(INDEX_LOCATION_KEY, context.serialize(src.indexLocation.toString())); + jsonObj.add(CODECS_KEY, context.serialize(src.codecs)); + jsonObj.add(INDEX_CODECS_KEY, context.serialize(src.indexCodecs)); + + return jsonObj; + } + + @Override + public ShardingConfiguration deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) + throws JsonParseException { + + return null; + } + + public boolean anyShardingCodecs(final Codec[] codecs) { + + if (codecs == null) + return false; + + return Arrays.stream(codecs).anyMatch(c -> { + return (c instanceof ShardingCodec); + }); + } + + } + +} From 6fbb729271104f2a1ef27c2827019ac1f9b58670 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Tue, 23 Jul 2024 13:45:00 -0400 Subject: [PATCH 009/124] feat(wip): add prelim Shard classes --- .../n5/ShardedDatasetAttributes.java | 90 ++++++++++++ .../saalfeldlab/n5/shard/AbstractShard.java | 50 +++++++ .../saalfeldlab/n5/shard/InMemoryShard.java | 28 ++++ .../janelia/saalfeldlab/n5/shard/Shard.java | 111 ++++++++++++++ .../saalfeldlab/n5/shard/ShardException.java | 14 ++ .../saalfeldlab/n5/shard/ShardImpl.java | 20 +++ .../saalfeldlab/n5/shard/ShardIndex.java | 120 +++++++++++++++ .../saalfeldlab/n5/shard/ShardReader.java | 90 ++++++++++++ .../saalfeldlab/n5/shard/ShardWriter.java | 137 ++++++++++++++++++ .../janelia/saalfeldlab/n5/shard/Shards.java | 106 ++++++++++++++ 10 files changed, 766 insertions(+) create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/shard/ShardException.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/shard/ShardImpl.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/shard/Shards.java diff --git a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java new file mode 100644 index 00000000..9b69ad6e --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java @@ -0,0 +1,90 @@ +package org.janelia.saalfeldlab.n5; + +import java.util.Arrays; + +import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.shard.ShardingCodec; +import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration; +import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration.IndexLocation; +import org.janelia.saalfeldlab.n5.shard.Shards; + +public class ShardedDatasetAttributes extends DatasetAttributes { + + private static final long serialVersionUID = -4559068841006651814L; + + private final int[] shardSize; + + private final IndexLocation indexLocation; + + public ShardedDatasetAttributes(final long[] dimensions, final int[] shardSize, final DataType dataType, + final Compression compression, + final Codec[] codecs) { + + super(dimensions, getBlockSize(codecs), dataType, compression, codecs); + final ShardingConfiguration config = getShardConfiguration(codecs); + this.indexLocation = config.areIndexesAtStart() ? IndexLocation.start : IndexLocation.end; + this.shardSize = shardSize; + } + + public ShardedDatasetAttributes(final long[] dimensions, + final int[] shardSize, + final int[] blockSize, + final IndexLocation shardIndexLocation, + final DataType dataType, + final Compression compression, + final Codec[] codecs) { + + super(dimensions, blockSize, dataType, compression, codecs); + this.shardSize = shardSize; + this.indexLocation = shardIndexLocation; + // this.config = new ShardingConfiguration(blockSize, null, null, shardIndexLocation); + + // TODO figure out codecs + } + + public int[] getShardSize() { + + return shardSize; + } + + public Shards getShards() { + + return new Shards(this); + } + + public ShardingConfiguration getShardingConfiguration() { + + return Arrays.stream(getCodecs()) + .filter(ShardingCodec::isShardingCodec) + .map(x -> { + return ((ShardingCodec)x).getConfiguration(); + }) + .findFirst().orElse(null); + } + + public static boolean isSharded(Codec[] codecs) { + + return Arrays.stream(codecs).anyMatch(ShardingCodec::isShardingCodec); + } + + public static ShardingConfiguration getShardConfiguration(Codec[] codecs) { + + return Arrays.stream(codecs) + .filter(ShardingCodec::isShardingCodec) + .map(x -> { + return ((ShardingCodec)x).getConfiguration(); + }) + .findFirst().orElse(null); + } + + public static int[] getBlockSize(Codec[] codecs) { + + return Arrays.stream(codecs) + .filter(ShardingCodec::isShardingCodec) + .map(x -> { + return ((ShardingCodec)x).getConfiguration(); + }) + .map(ShardingConfiguration::getBlockSize).findFirst().orElse(null); + } + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java new file mode 100644 index 00000000..05d63144 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java @@ -0,0 +1,50 @@ +package org.janelia.saalfeldlab.n5.shard; + +import org.janelia.saalfeldlab.n5.DataBlock; + +public abstract class AbstractShard implements Shard { + + protected final long[] size; + protected final long[] gridPosition; + protected final int[] blockSize; + + public AbstractShard(final long[] size, final long[] gridPosition, + final int[] blockSize, final T type) { + + this.size = size; + this.gridPosition = gridPosition; + this.blockSize = blockSize; + } + + @Override + public long[] getSize() { + + return size; + } + + @Override + public int[] getBlockSize() { + + return blockSize; + } + + @Override + public long[] getGridPosition() { + + return gridPosition; + } + + @Override + public DataBlock getBlock(int... position) { + + return null; + } + + @Override + public ShardIndex getIndexes() { + + return null; + } + + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java new file mode 100644 index 00000000..72f102b5 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java @@ -0,0 +1,28 @@ +package org.janelia.saalfeldlab.n5.shard; + +import java.util.ArrayList; +import java.util.List; + +import org.janelia.saalfeldlab.n5.DataBlock; +import org.janelia.saalfeldlab.n5.DatasetAttributes; + +public class InMemoryShard extends AbstractShard { + + private List> blocks; + + private ShardIndex shardIndex; + + public InMemoryShard(final long[] size, final long[] gridPosition, final int[] blockSize, T type) { + + super(size, gridPosition, blockSize, type); + blocks = new ArrayList<>(); + } + + @Override + public DataBlock readBlock(String pathName, DatasetAttributes datasetAttributes, long... gridPosition) { + + // TODO Auto-generated method stub + return null; + } + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java new file mode 100644 index 00000000..a587f735 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java @@ -0,0 +1,111 @@ +package org.janelia.saalfeldlab.n5.shard; + +import java.util.Arrays; + +import org.janelia.saalfeldlab.n5.DataBlock; +import org.janelia.saalfeldlab.n5.DatasetAttributes; + +public interface Shard { + + /** + * Returns the number of blocks this shard contains along all dimensions. + * + * The size of a shard expected to be smaller than or equal to the spacing of the shard grid. The dimensionality of size is expected to be equal to the dimensionality of the + * dataset. Consistency is not enforced. + * + * @return size of the shard in units of blocks + */ + default int[] getBlockGridSize() { + + final long[] sz = getSize(); + final int[] blkSz = getBlockSize(); + final int[] blockGridSize = new int[sz.length]; + for (int i = 0; i < sz.length; i++) + blockGridSize[i] = (int)(sz[i] / blkSz[i]); + + return blockGridSize; + } + + /** + * Returns the size of shards in pixel units. + * + * @return shard size + */ + public long[] getSize(); + + /** + * Returns the size of blocks in pixel units. + * + * @return block size + */ + public int[] getBlockSize(); + + /** + * Returns the position of this shard on the shard grid. + * + * The dimensionality of the grid position is expected to be equal to the dimensionality of the dataset. Consistency is not enforced. + * + * @return position on the shard grid + */ + public long[] getGridPosition(); + + /** + * Returns of the block at the given position relative to this shard, or null if this shard does not contain the given block. + * + * @return the shard position + */ + default int[] getBlockPosition(long... blockPosition) { + + final long[] shardPos = getShard(blockPosition); + if (!Arrays.equals(getGridPosition(), shardPos)) + return null; + + final long[] shardSize = getSize(); + final int[] blkSize = getBlockSize(); + final int[] blkGridSize = getBlockGridSize(); + + final int[] blockShardPos = new int[shardSize.length]; + for (int i = 0; i < shardSize.length; i++) { + final long shardP = shardPos[i] * shardSize[i]; + final long blockP = blockPosition[i] * blkSize[i]; + blockShardPos[i] = (int)((blockP - shardP) / blkGridSize[i]); + } + + return blockShardPos; + } + + /** + * Returns the position of the shard containing the block with the given block position. + * + * @return the shard position + */ + default long[] getShard(long... blockPosition) { + + final int[] shardBlockDimensions = getBlockGridSize(); + final long[] shardGridPosition = new long[shardBlockDimensions.length]; + for (int i = 0; i < shardGridPosition.length; i++) { + shardGridPosition[i] = (long)Math.floor((double)blockPosition[i] / shardBlockDimensions[i]); + } + + return shardGridPosition; + } + + public DataBlock getBlock(int... position); + + public ShardIndex getIndexes(); + + public DataBlock readBlock(final String pathName, final DatasetAttributes datasetAttributes, long... gridPosition); + + + /** + * Say we want async datablock access + * + * Say we construct shard then getBlockAt + * + * (this could be how we do the aggregation) multiple getblockAt calls don't trigger reading read triggers reading of all blocks that were requested + * + * Shard doesn't hold the data directly, but is the metadata about how the blocks are stored + * + */ + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardException.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardException.java new file mode 100644 index 00000000..d208c62e --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardException.java @@ -0,0 +1,14 @@ +package org.janelia.saalfeldlab.n5.shard; + +import org.janelia.saalfeldlab.n5.N5Exception; + +public class ShardException extends N5Exception { + + private static final long serialVersionUID = -77907634621557855L; + + public static class IndexException extends ShardException { + + private static final long serialVersionUID = 3924426352575114063L; + + } +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardImpl.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardImpl.java new file mode 100644 index 00000000..11803b3b --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardImpl.java @@ -0,0 +1,20 @@ +package org.janelia.saalfeldlab.n5.shard; + +import org.janelia.saalfeldlab.n5.DataBlock; +import org.janelia.saalfeldlab.n5.DatasetAttributes; + +public class ShardImpl extends AbstractShard { + + public ShardImpl(final long[] size, final long[] gridPosition, final int[] blockSize, T type) { + + super(size, gridPosition, blockSize, type); + } + + @Override + public DataBlock readBlock(String pathName, DatasetAttributes datasetAttributes, long... gridPosition) { + + // TODO Auto-generated method stub + return null; + } + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java new file mode 100644 index 00000000..b2a19472 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java @@ -0,0 +1,120 @@ +package org.janelia.saalfeldlab.n5.shard; + +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.channels.Channels; +import java.nio.channels.FileChannel; +import java.util.Arrays; + +import org.janelia.saalfeldlab.n5.LongArrayDataBlock; +import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; + +public class ShardIndex extends LongArrayDataBlock { + + private static final int BYTES_PER_LONG = 8; + + public ShardIndex(int[] size, long[] data) { + + super(indexBlockSize(size), new long[]{0}, data); + } + + public ShardIndex(int[] size) { + + super(indexBlockSize(size), new long[]{0}, createData(size)); + } + + private static long[] createData(final int[] size) { + + final int N = 2 * Arrays.stream(size).reduce(1, (x, y) -> x * y); + return new long[N]; + } + + private static int[] indexBlockSize(final int[] size) { + + final int[] indexBlockSize = new int[size.length + 1]; + indexBlockSize[0] = 2; + System.arraycopy(size, 0, indexBlockSize, 1, size.length); + return indexBlockSize; + } + + public long getOffset(long... gridPosition) { + + return data[getOffsetIndex(gridPosition)]; + } + + public long getNumBytes(long... gridPosition) { + + return data[getNumBytesIndex(gridPosition)]; + } + + public void set(long offset, long nbytes, long[] gridPosition) { + + final int i = getOffsetIndex(gridPosition); + data[i] = offset; + data[i + 1] = nbytes; + } + + + private int getOffsetIndex(long... gridPosition) { + + int idx = 0; + long stride = 2; + for (int i = 0; i < gridPosition.length; i++) { + idx += gridPosition[i] * stride; + stride *= size[i]; + } + + return idx; + } + + private int getNumBytesIndex(long... gridPosition) { + + return getOffsetIndex() + 1; + } + + public void printData() { + + System.out.println(Arrays.toString(data)); + } + + public static ShardIndex read(FileChannel channel, ShardedDatasetAttributes datasetAttributes) throws IOException { + + // TODO need codecs + // TODO FileChannel is too specific - generalize + final Shards shards = new Shards(datasetAttributes); + final int[] indexShape = indexBlockSize(shards.getShardBlockGridSize()); + final int indexSize = (int)Arrays.stream(indexShape).reduce(1, (x, y) -> x * y); + final int indexBytes = BYTES_PER_LONG * indexSize; + + if (!datasetAttributes.getShardingConfiguration().areIndexesAtStart()) { + channel.position(channel.size() - indexBytes); + } + + final InputStream is = Channels.newInputStream(channel); + final DataInputStream dis = new DataInputStream(is); + + final long[] indexes = new long[indexSize]; + for (int i = 0; i < indexSize; i++) { + indexes[i] = dis.readLong(); + } + + return new ShardIndex(indexShape, indexes); + } + + public static void main(String[] args) { + + final ShardIndex ib = new ShardIndex(new int[]{2, 2}); + + ib.set(8, 9, new long[]{1, 1}); + ib.printData(); + + // System.out.println(ib.getIndex(0, 0)); + // System.out.println(ib.getIndex(1, 0)); + // System.out.println(ib.getIndex(0, 1)); + // System.out.println(ib.getIndex(1, 1)); + + System.out.println("done"); + } + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java new file mode 100644 index 00000000..d68a03c0 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java @@ -0,0 +1,90 @@ +package org.janelia.saalfeldlab.n5.shard; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.channels.Channels; +import java.nio.channels.FileChannel; + +import org.janelia.saalfeldlab.n5.DataBlock; +import org.janelia.saalfeldlab.n5.DataType; +import org.janelia.saalfeldlab.n5.DatasetAttributes; +import org.janelia.saalfeldlab.n5.DefaultBlockReader; +import org.janelia.saalfeldlab.n5.RawCompression; +import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; +import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration.IndexLocation; + +public class ShardReader { + + + private final ShardedDatasetAttributes datasetAttributes; + private long[] indexes; + private Shards shards; + + public ShardReader(final ShardedDatasetAttributes datasetAttributes) { + + this.datasetAttributes = datasetAttributes; + this.shards = new Shards(datasetAttributes); + } + + public ShardIndex readIndexes(FileChannel channel) throws IOException { + + return ShardIndex.read(channel, datasetAttributes); + } + + public InMemoryShard readShardFully( + final FileChannel channel, + long... gridPosition) throws IOException { + + final DatasetAttributes dsetAttrs = shards.getDatasetAttributes(); + + final ShardIndex si = readIndexes(channel); + return null; + } + + public DataBlock readBlock( + final FileChannel in, + long... blockPosition) throws IOException { + + // TODO generalize from FileChannel + // TODO this assumes the "file" holding the shard is known, + // the logic to figure that out will have to go somewhere + + final ShardIndex index = readIndexes(in); + + final long[] shardPosition = shards.getShardPositionForBlock(blockPosition); + in.position(index.getOffset(shardPosition)); + final InputStream is = Channels.newInputStream(in); + return DefaultBlockReader.readBlock(is, datasetAttributes, indexes); + } + + private long getIndexIndex(long... shardPosition) { + + final int[] indexDimensions = shards.getShardBlockGridSize(); + long idx = 0; + for (int i = 0; i < indexDimensions.length; i++) { + idx += shardPosition[i] * indexDimensions[i]; + } + + return idx; + } + + public static void main(String[] args) { + + final ShardReader reader = new ShardReader(buildTestAttributes()); + + System.out.println(reader.getIndexIndex(0, 0)); + System.out.println(reader.getIndexIndex(0, 1)); + System.out.println(reader.getIndexIndex(1, 0)); + System.out.println(reader.getIndexIndex(1, 1)); + } + + private static ShardedDatasetAttributes buildTestAttributes() { + + final Codec[] codecs = new Codec[]{ + new ShardingCodec(new ShardingConfiguration(new int[]{2, 2}, null, null, IndexLocation.end))}; + + return new ShardedDatasetAttributes(new long[]{4, 4}, new int[]{2, 2}, DataType.INT32, new RawCompression(), codecs); + } + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java new file mode 100644 index 00000000..5705d583 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java @@ -0,0 +1,137 @@ +package org.janelia.saalfeldlab.n5.shard; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +import org.janelia.saalfeldlab.n5.DataBlock; +import org.janelia.saalfeldlab.n5.DefaultBlockWriter; +import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; + +public class ShardWriter { + + private static final int BYTES_PER_LONG = 8; + + private final List> blocks; + + private final ShardedDatasetAttributes datasetAttributes; + + private ByteBuffer blockSizes; + + private ByteBuffer blockIndexes; + + private ShardIndex indexData; + + private List blockBytes; + + public ShardWriter(final ShardedDatasetAttributes datasetAttributes) { + + blocks = new ArrayList<>(); + this.datasetAttributes = datasetAttributes; + } + + public void reset() { + + blocks.clear(); + blockSizes = null; + blockBytes.clear(); + + indexData = null; + } + + public void addBlock(final DataBlock block) { + + blocks.add(block); + } + + public void write(final OutputStream out) throws IOException { + + // TODO need codecs + + // prepareForWriting(); + // if (datasetAttributes.getShardingConfiguration().areIndexesAtStart()) { + // writeIndexes(out); + // writeBlocks(out); + // } else { + // writeBlocks(out); + // writeIndexes(out); + // } + + prepareForWritingDataBlock(); + if (datasetAttributes.getShardingConfiguration().areIndexesAtStart()) { + writeIndexBlock(out); + writeBlocks(out); + } else { + writeBlocks(out); + writeIndexBlock(out); + } + } + + private void prepareForWritingDataBlock() throws IOException { + + // final ShardingProperties shardProps = new ShardingProperties(datasetAttributes); + // indexData = new ShardIndexDataBlock(shardProps.getIndexDimensions()); + + indexData = new ShardIndex(new int[]{blocks.size()}); + blockBytes = new ArrayList<>(); + long cumulativeBytes = 0; + final long[] shardPosition = new long[1]; + for (int i = 0; i < blocks.size(); i++) { + + final ByteArrayOutputStream blockOut = new ByteArrayOutputStream(); + DefaultBlockWriter.writeBlock(blockOut, datasetAttributes, blocks.get(i)); + System.out.println(String.format("block %d is %d bytes", i, blockOut.size())); + + shardPosition[0] = i; + indexData.set(cumulativeBytes, blockOut.size(), shardPosition); + cumulativeBytes += blockOut.size(); + + blockBytes.add(blockOut.toByteArray()); + } + + indexData.printData(); + } + + private void prepareForWriting() throws IOException { + + blockSizes = ByteBuffer.allocate(BYTES_PER_LONG * blocks.size()); + blockIndexes = ByteBuffer.allocate(BYTES_PER_LONG * blocks.size()); + blockBytes = new ArrayList<>(); + long cumulativeBytes = 0; + for (int i = 0; i < blocks.size(); i++) { + + final ByteArrayOutputStream blockOut = new ByteArrayOutputStream(); + DefaultBlockWriter.writeBlock(blockOut, datasetAttributes, blocks.get(i)); + System.out.println(String.format("block %d is %d bytes", i, blockOut.size())); + + blockIndexes.putLong(cumulativeBytes); + blockSizes.putLong(blockOut.size()); + cumulativeBytes += blockOut.size(); + + blockBytes.add(blockOut.toByteArray()); + } + } + + private void writeBlocks(final OutputStream out) throws IOException { + + for (final byte[] bytes : blockBytes) + out.write(bytes); + } + + private void writeIndexes(final OutputStream out) throws IOException { + + out.write(blockSizes.array()); + } + + private void writeIndexBlock(final OutputStream out) throws IOException { + + final DataOutputStream dos = new DataOutputStream(out); + for (final long l : indexData.getData()) + dos.writeLong(l); + } + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shards.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shards.java new file mode 100644 index 00000000..fb0f6aca --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shards.java @@ -0,0 +1,106 @@ +package org.janelia.saalfeldlab.n5.shard; + +import java.util.Arrays; + +import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; + +/** + * Manages the set of shards that comprise a dataset. + */ +public class Shards { + + public static long EMPTY_INDEX_NBYTES = 0xFFFFFFFFFFFFFFFFL; + + private final ShardedDatasetAttributes datasetAttributes; + + public Shards(final ShardedDatasetAttributes datasetAttributes) { + + this.datasetAttributes = datasetAttributes; + } + + public ShardedDatasetAttributes getDatasetAttributes() { + + return datasetAttributes; + } + + public int[] getShardSize() { + + return getDatasetAttributes().getShardSize(); + } + + public int[] getBlockSize() { + + return getDatasetAttributes().getBlockSize(); + } + + /** + * Returns the number of blocks a shard contains along all dimensions. + * + * @return the size of the block grid of a shard + */ + public int[] getShardBlockGridSize() { + + final int nd = getDatasetAttributes().getNumDimensions(); + final int[] shardBlockGridSize = new int[nd]; + final int[] blockSize = getBlockSize(); + for (int i = 0; i < nd; i++) + shardBlockGridSize[i] = (int)(Math + .ceil((double)getDatasetAttributes().getDimensions()[i] / blockSize[i])); + + return shardBlockGridSize; + } + + /** + * Given a block's position relative to the array, returns the position of the shard containing that block relative to the shard grid. + * + * @param gridPosition + * position of a block relative to the array + * @return the position of the containing shard in the shard grid + */ + public long[] getShardPositionForBlock(final long... blockGridPosition) { + + // TODO have this return a shard + final int[] shardBlockDimensions = getShardBlockGridSize(); + final long[] shardGridPosition = new long[blockGridPosition.length]; + for (int i = 0; i < shardGridPosition.length; i++) { + shardGridPosition[i] = (int)Math.floor((double)blockGridPosition[i] / shardBlockDimensions[i]); + } + + return shardGridPosition; + } + + /** + * Returns of the block at the given position relative to this shard, or null if this shard does not contain the given block. + * + * @return the shard position + */ + public int[] getBlockPositionInShard(final long[] shardPosition, final long[] blockPosition) { + + final long[] shardPos = getShardPositionForBlock(blockPosition); + if (!Arrays.equals(shardPosition, shardPos)) + return null; + + final int[] shardSize = getShardSize(); + final int[] blkSize = getBlockSize(); + final int[] blkGridSize = getShardBlockGridSize(); + + final int[] blockShardPos = new int[shardSize.length]; + for (int i = 0; i < shardSize.length; i++) { + final long shardP = shardPos[i] * shardSize[i]; + final long blockP = blockPosition[i] * blkSize[i]; + blockShardPos[i] = (int)((blockP - shardP) / blkGridSize[i]); + } + + return blockShardPos; + } + + /** + * @return the number of blocks per shard + */ + public long getNumBlocks() { + + return Arrays.stream(getShardBlockGridSize()).reduce(1, (x, y) -> x * y); + } + + +} From a2131902c62774cb22be894e034cfc212fa71f87 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Tue, 23 Jul 2024 13:45:38 -0400 Subject: [PATCH 010/124] feat(wip): update core n5 api with sharding --- .../janelia/saalfeldlab/n5/CodecAdapter.java | 29 +++++++++++++++ .../saalfeldlab/n5/DefaultBlockReader.java | 16 ++++++-- .../n5/FileSystemKeyValueAccess.java | 16 ++++++-- .../saalfeldlab/n5/GsonKeyValueN5Reader.java | 25 +++++++++---- .../saalfeldlab/n5/GsonKeyValueN5Writer.java | 13 +++++++ .../org/janelia/saalfeldlab/n5/GsonUtils.java | 3 ++ .../org/janelia/saalfeldlab/n5/N5Writer.java | 37 +++++++++++++++++++ 7 files changed, 125 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/CodecAdapter.java b/src/main/java/org/janelia/saalfeldlab/n5/CodecAdapter.java index 0390d9a3..db8a6daa 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/CodecAdapter.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/CodecAdapter.java @@ -27,8 +27,11 @@ import java.lang.reflect.Type; +import org.janelia.saalfeldlab.n5.codec.BytesCodec; import org.janelia.saalfeldlab.n5.codec.Codec; import org.janelia.saalfeldlab.n5.codec.FixedScaleOffsetCodec; +import org.janelia.saalfeldlab.n5.shard.ShardingCodec; +import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration; import com.google.gson.JsonDeserializationContext; import com.google.gson.JsonDeserializer; @@ -57,6 +60,24 @@ public JsonElement serialize( obj.addProperty("encodedType", c.getEncodedType().toString().toLowerCase()); return obj; } + else if (codec.getName().equals(ShardingCodec.ID)) { + final ShardingCodec sharding = (ShardingCodec)codec; + final JsonObject obj = new JsonObject(); + obj.addProperty("name", sharding.getName()); + obj.add("configuration", context.serialize(sharding.getConfiguration())); + return obj; + } + else if (codec.getName().equals(BytesCodec.ID)) { + final BytesCodec bytes = (BytesCodec)codec; + final JsonObject obj = new JsonObject(); + obj.addProperty("name", bytes.getName()); + + final JsonObject config = new JsonObject(); + config.addProperty("endian", bytes.getName()); + obj.add("configuration", config); + + return obj; + } return JsonNull.INSTANCE; } @@ -84,6 +105,14 @@ else if (!json.isJsonObject()) DataType.valueOf(jsonObject.get("type").getAsString().toUpperCase()), DataType.valueOf(jsonObject.get("encodedType").getAsString().toUpperCase())); } + else if (id.equals(ShardingCodec.ID)) { + return new ShardingCodec( + context.deserialize(jsonObject.get("configuration"), ShardingConfiguration.class)); + } else if (id.equals(BytesCodec.ID)) { + + // TODO + return new BytesCodec(); + } } return null; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java index 4e585819..0f70baea 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java @@ -30,6 +30,8 @@ import java.io.InputStream; import java.nio.ByteBuffer; +import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration; + /** * Default implementation of {@link BlockReader}. * @@ -46,10 +48,11 @@ public default > void read( final InputStream in) throws IOException { final ByteBuffer buffer = dataBlock.toByteBuffer(); - try (final InputStream inflater = getInputStream(in)) { - final DataInputStream dis = new DataInputStream(inflater); - dis.readFully(buffer.array()); - } + + // do not try with this input stream because subsequent block reads may happen if the stream points to a shard + final InputStream inflater = getInputStream(in); + final DataInputStream dis = new DataInputStream(inflater); + dis.readFully(buffer.array()); dataBlock.readData(buffer); } @@ -146,5 +149,10 @@ public static > void readFromStream(final B dataBlock, dataBlock.readData(buffer); } + public static long getShardIndex(final ShardingConfiguration shardingConfiguration, final long[] gridPosition) { + + // TODO implement + return -1; + } } \ No newline at end of file diff --git a/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java b/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java index cfb45592..b7ea36d5 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java @@ -73,11 +73,21 @@ protected class LockedFileChannel implements LockedChannel { protected LockedFileChannel(final String path, final boolean readOnly) throws IOException { - this(fileSystem.getPath(path), readOnly); + this(fileSystem.getPath(path), readOnly, -1, -1); + } + + protected LockedFileChannel(final String path, final boolean readOnly, final long startByte, final long lastByte) throws IOException { + + this(fileSystem.getPath(path), readOnly, startByte, lastByte); } protected LockedFileChannel(final Path path, final boolean readOnly) throws IOException { + this(path, readOnly, -1, -1); + } + + protected LockedFileChannel(final Path path, final boolean readOnly, final long startByte, final long lastByte) throws IOException { + final OpenOption[] options; if (readOnly) { options = new OpenOption[]{StandardOpenOption.READ}; @@ -162,7 +172,7 @@ public LockedFileChannel lockForReading(final String normalPath) throws IOExcept try { return new LockedFileChannel(normalPath, true); - } catch (NoSuchFileException e) { + } catch (final NoSuchFileException e) { throw new N5Exception.N5NoSuchKeyException("No such file", e); } } @@ -177,7 +187,7 @@ public LockedFileChannel lockForReading(final Path path) throws IOException { try { return new LockedFileChannel(path, true); - } catch (NoSuchFileException e) { + } catch (final NoSuchFileException e) { throw new N5Exception.N5NoSuchKeyException("No such file", e); } } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java index 12867071..9b8e288c 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java @@ -30,6 +30,8 @@ import java.util.Arrays; import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; +import org.janelia.saalfeldlab.n5.shard.Shard; +import org.janelia.saalfeldlab.n5.shard.Shards; import com.google.gson.Gson; import com.google.gson.JsonElement; @@ -86,24 +88,32 @@ default JsonElement getAttributes(final String pathName) throws N5Exception { } + default Shard readShard(final String pathName, + final ShardedDatasetAttributes datasetAttributes, + long... gridPosition) { + + final Shards shards = new Shards(datasetAttributes); + // TODO throw exception if this dataset is not sharded? + + return null; + } + @Override default DataBlock readBlock( final String pathName, final DatasetAttributes datasetAttributes, final long... gridPosition) throws N5Exception { + if (ShardedDatasetAttributes.isSharded(datasetAttributes)) { + // TODO + } + final String path = absoluteDataBlockPath(N5URI.normalizeGroupPath(pathName), gridPosition); try (final LockedChannel lockedChannel = getKeyValueAccess().lockForReading(path)) { -<<<<<<< HEAD - return DefaultBlockReader.readBlock(lockedChannel.newInputStream(), datasetAttributes, gridPosition); + return DefaultBlockReader.readBlockWithCodecs(lockedChannel.newInputStream(), datasetAttributes, gridPosition); } catch (final N5Exception.N5NoSuchKeyException e) { return null; -||||||| 6b6d4d2 - return DefaultBlockReader.readBlock(lockedChannel.newInputStream(), datasetAttributes, gridPosition); -======= - return DefaultBlockReader.readBlockWithCodecs(lockedChannel.newInputStream(), datasetAttributes, gridPosition); ->>>>>>> origin/codecs } catch (final IOException | UncheckedIOException e) { throw new N5IOException( "Failed to read block " + Arrays.toString(gridPosition) + " from dataset " + path, @@ -151,6 +161,7 @@ default String absoluteDataBlockPath( return getKeyValueAccess().compose(getURI(), components); } + /** * Constructs the absolute path (in terms of this store) for the group or * dataset. diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java index a784dcc6..9e5253d7 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java @@ -32,6 +32,7 @@ import java.util.Map; import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; +import org.janelia.saalfeldlab.n5.shard.Shard; import com.google.gson.Gson; import com.google.gson.JsonElement; @@ -227,6 +228,18 @@ default void writeBlock( } } + @Override + default void writeShard( + final String path, + final DatasetAttributes datasetAttributes, + final Shard shard) throws N5Exception { + + if (!(datasetAttributes instanceof ShardedDatasetAttributes)) + throw new N5IOException("Can not write shard into non-sharded dataset " + path); + + // TODO implement me + } + @Override default boolean remove(final String path) throws N5Exception { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonUtils.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonUtils.java index 96cbeff9..7423b57e 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonUtils.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonUtils.java @@ -35,6 +35,7 @@ import java.util.regex.Matcher; import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration; import com.google.gson.Gson; import com.google.gson.GsonBuilder; @@ -56,6 +57,8 @@ static Gson registerGson(final GsonBuilder gsonBuilder) { gsonBuilder.registerTypeAdapter(DataType.class, new DataType.JsonAdapter()); gsonBuilder.registerTypeHierarchyAdapter(Compression.class, CompressionAdapter.getJsonAdapter()); + gsonBuilder.registerTypeHierarchyAdapter(ShardingConfiguration.class, + new ShardingConfiguration.ShardingConfigurationAdapter()); gsonBuilder.registerTypeHierarchyAdapter(Codec.class, new CodecAdapter()); gsonBuilder.disableHtmlEscaping(); return gsonBuilder.create(); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java index 867044db..6067b9f1 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java @@ -35,6 +35,10 @@ import java.util.Map; import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.shard.Shard; +import org.janelia.saalfeldlab.n5.shard.ShardingCodec; +import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration; +import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration.IndexLocation; /** * A simple structured container API for hierarchies of chunked @@ -208,6 +212,20 @@ default void createDataset( setDatasetAttributes(normalPath, datasetAttributes); } + default void createDataset( + final String datasetPath, + final long[] dimensions, + final int[] shardSize, + final int[] blockSize, + final DataType dataType, + final Compression compression) throws N5Exception { + + final Codec[] codecs = new Codec[]{new ShardingCodec( + new ShardingConfiguration(blockSize, null, null, IndexLocation.end))}; + + createDataset(datasetPath, new DatasetAttributes(dimensions, shardSize, dataType, compression, codecs)); + } + /** * Creates a dataset. This does not create any data but the path and * mandatory attributes only. @@ -266,6 +284,25 @@ void writeBlock( final DatasetAttributes datasetAttributes, final DataBlock dataBlock) throws N5Exception; + /** + * Writes a complete {@link Shard} to a dataset. + * + * @param datasetPath + * dataset path + * @param datasetAttributes + * the dataset attributes + * @param dataBlock + * the data block + * @param + * the data block data type + * @throws N5Exception + * if the requested dataset is not sharded + */ + void writeShard( + final String datasetPath, + final DatasetAttributes datasetAttributes, + final Shard shard) throws N5Exception; + /** * Deletes the block at {@code gridPosition} * From 352427e09285da36f2f7b45999b894143e9946da Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Tue, 23 Jul 2024 15:49:51 -0400 Subject: [PATCH 011/124] wip: more shard/codec work --- .../saalfeldlab/n5/GsonKeyValueN5Reader.java | 6 +- .../org/janelia/saalfeldlab/n5/N5Writer.java | 2 +- .../n5/ShardedDatasetAttributes.java | 102 +++++++++++------ .../saalfeldlab/n5/shard/AbstractShard.java | 16 +-- .../saalfeldlab/n5/shard/InMemoryShard.java | 11 +- .../janelia/saalfeldlab/n5/shard/Shard.java | 30 +++-- .../saalfeldlab/n5/shard/ShardImpl.java | 20 ---- .../saalfeldlab/n5/shard/ShardIndex.java | 5 +- .../saalfeldlab/n5/shard/ShardReader.java | 33 ++++-- .../saalfeldlab/n5/shard/ShardWriter.java | 2 +- .../n5/shard/ShardingConfiguration.java | 13 ++- .../janelia/saalfeldlab/n5/shard/Shards.java | 106 ------------------ 12 files changed, 137 insertions(+), 209 deletions(-) delete mode 100644 src/main/java/org/janelia/saalfeldlab/n5/shard/ShardImpl.java delete mode 100644 src/main/java/org/janelia/saalfeldlab/n5/shard/Shards.java diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java index 9b8e288c..2aeb21b1 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java @@ -31,7 +31,6 @@ import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; import org.janelia.saalfeldlab.n5.shard.Shard; -import org.janelia.saalfeldlab.n5.shard.Shards; import com.google.gson.Gson; import com.google.gson.JsonElement; @@ -88,11 +87,10 @@ default JsonElement getAttributes(final String pathName) throws N5Exception { } - default Shard readShard(final String pathName, + default Shard getShard(final String pathName, final ShardedDatasetAttributes datasetAttributes, long... gridPosition) { - final Shards shards = new Shards(datasetAttributes); // TODO throw exception if this dataset is not sharded? return null; @@ -104,7 +102,7 @@ default DataBlock readBlock( final DatasetAttributes datasetAttributes, final long... gridPosition) throws N5Exception { - if (ShardedDatasetAttributes.isSharded(datasetAttributes)) { + if (datasetAttributes instanceof ShardedDatasetAttributes) { // TODO } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java index 6067b9f1..f4645a80 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java @@ -221,7 +221,7 @@ default void createDataset( final Compression compression) throws N5Exception { final Codec[] codecs = new Codec[]{new ShardingCodec( - new ShardingConfiguration(blockSize, null, null, IndexLocation.end))}; + new ShardingConfiguration(blockSize, null, null, IndexLocation.END))}; createDataset(datasetPath, new DatasetAttributes(dimensions, shardSize, dataType, compression, codecs)); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java index 9b69ad6e..2825eff9 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java @@ -6,7 +6,6 @@ import org.janelia.saalfeldlab.n5.shard.ShardingCodec; import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration; import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration.IndexLocation; -import org.janelia.saalfeldlab.n5.shard.Shards; public class ShardedDatasetAttributes extends DatasetAttributes { @@ -16,17 +15,8 @@ public class ShardedDatasetAttributes extends DatasetAttributes { private final IndexLocation indexLocation; - public ShardedDatasetAttributes(final long[] dimensions, final int[] shardSize, final DataType dataType, - final Compression compression, - final Codec[] codecs) { - - super(dimensions, getBlockSize(codecs), dataType, compression, codecs); - final ShardingConfiguration config = getShardConfiguration(codecs); - this.indexLocation = config.areIndexesAtStart() ? IndexLocation.start : IndexLocation.end; - this.shardSize = shardSize; - } - - public ShardedDatasetAttributes(final long[] dimensions, + public ShardedDatasetAttributes( + final long[] dimensions, final int[] shardSize, final int[] blockSize, final IndexLocation shardIndexLocation, @@ -37,7 +27,6 @@ public ShardedDatasetAttributes(final long[] dimensions, super(dimensions, blockSize, dataType, compression, codecs); this.shardSize = shardSize; this.indexLocation = shardIndexLocation; - // this.config = new ShardingConfiguration(blockSize, null, null, shardIndexLocation); // TODO figure out codecs } @@ -47,44 +36,85 @@ public int[] getShardSize() { return shardSize; } - public Shards getShards() { - - return new Shards(this); - } + /** + * Returns the number of blocks a shard contains along all dimensions. + * + * @return the size of the block grid of a shard + */ + public int[] getShardBlockGridSize() { - public ShardingConfiguration getShardingConfiguration() { + final int nd = getNumDimensions(); + final int[] shardBlockGridSize = new int[nd]; + final int[] blockSize = getBlockSize(); + for (int i = 0; i < nd; i++) + shardBlockGridSize[i] = (int)(Math.ceil((double)getDimensions()[i] / blockSize[i])); - return Arrays.stream(getCodecs()) - .filter(ShardingCodec::isShardingCodec) - .map(x -> { - return ((ShardingCodec)x).getConfiguration(); - }) - .findFirst().orElse(null); + return shardBlockGridSize; } - public static boolean isSharded(Codec[] codecs) { + /** + * Given a block's position relative to the array, returns the position of the shard containing that block relative to the shard grid. + * + * @param blockGridPosition + * position of a block relative to the array + * @return the position of the containing shard in the shard grid + */ + public long[] getShardPositionForBlock(final long... blockGridPosition) { + + // TODO have this return a shard + final int[] shardBlockDimensions = getShardBlockGridSize(); + final long[] shardGridPosition = new long[blockGridPosition.length]; + for (int i = 0; i < shardGridPosition.length; i++) { + shardGridPosition[i] = (int)Math.floor((double)blockGridPosition[i] / shardBlockDimensions[i]); + } + + return shardGridPosition; + } - return Arrays.stream(codecs).anyMatch(ShardingCodec::isShardingCodec); + /** + * Returns of the block at the given position relative to this shard, or null if this shard does not contain the given block. + * + * @return the shard position + */ + public int[] getBlockPositionInShard(final long[] shardPosition, final long[] blockPosition) { + + final long[] shardPos = getShardPositionForBlock(blockPosition); + if (!Arrays.equals(shardPosition, shardPos)) + return null; + + final int[] shardSize = getShardSize(); + final int[] blkSize = getBlockSize(); + final int[] blkGridSize = getShardBlockGridSize(); + + final int[] blockShardPos = new int[shardSize.length]; + for (int i = 0; i < shardSize.length; i++) { + final long shardP = shardPos[i] * shardSize[i]; + final long blockP = blockPosition[i] * blkSize[i]; + blockShardPos[i] = (int)((blockP - shardP) / blkGridSize[i]); + } + + return blockShardPos; } - public static ShardingConfiguration getShardConfiguration(Codec[] codecs) { + /** + * @return the number of blocks per shard + */ + public long getNumBlocks() { - return Arrays.stream(codecs) - .filter(ShardingCodec::isShardingCodec) - .map(x -> { - return ((ShardingCodec)x).getConfiguration(); - }) - .findFirst().orElse(null); + return Arrays.stream(getShardBlockGridSize()).reduce(1, (x, y) -> x * y); } public static int[] getBlockSize(Codec[] codecs) { + //TODO Caleb: Move this? return Arrays.stream(codecs) .filter(ShardingCodec::isShardingCodec) - .map(x -> { - return ((ShardingCodec)x).getConfiguration(); - }) + .map(x -> ((ShardingCodec)x).getConfiguration()) .map(ShardingConfiguration::getBlockSize).findFirst().orElse(null); } + public IndexLocation getIndexLocation() { + + return indexLocation; + } } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java index 05d63144..34237084 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java @@ -4,20 +4,22 @@ public abstract class AbstractShard implements Shard { - protected final long[] size; + protected final int[] size; protected final long[] gridPosition; protected final int[] blockSize; + private final ShardIndex index; - public AbstractShard(final long[] size, final long[] gridPosition, - final int[] blockSize, final T type) { + public AbstractShard(final int[] shardSize, final long[] gridPosition, + final int[] blockSize, final ShardIndex index) { - this.size = size; + this.size = shardSize; this.gridPosition = gridPosition; this.blockSize = blockSize; + this.index = index; } @Override - public long[] getSize() { + public int[] getSize() { return size; } @@ -41,9 +43,9 @@ public DataBlock getBlock(int... position) { } @Override - public ShardIndex getIndexes() { + public ShardIndex getIndex() { - return null; + return index; } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java index 72f102b5..977620e1 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java @@ -12,17 +12,10 @@ public class InMemoryShard extends AbstractShard { private ShardIndex shardIndex; - public InMemoryShard(final long[] size, final long[] gridPosition, final int[] blockSize, T type) { + public InMemoryShard(final int[] shardSize, final long[] gridPosition, final int[] blockSize, ShardIndex index) { - super(size, gridPosition, blockSize, type); + super(shardSize, gridPosition, blockSize, index); blocks = new ArrayList<>(); } - @Override - public DataBlock readBlock(String pathName, DatasetAttributes datasetAttributes, long... gridPosition) { - - // TODO Auto-generated method stub - return null; - } - } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java index a587f735..716d6d4b 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java @@ -3,10 +3,12 @@ import java.util.Arrays; import org.janelia.saalfeldlab.n5.DataBlock; -import org.janelia.saalfeldlab.n5.DatasetAttributes; +import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; public interface Shard { + long EMPTY_INDEX_NBYTES = 0xFFFFFFFFFFFFFFFFL; + /** * Returns the number of blocks this shard contains along all dimensions. * @@ -17,7 +19,7 @@ public interface Shard { */ default int[] getBlockGridSize() { - final long[] sz = getSize(); + final int[] sz = getSize(); final int[] blkSz = getBlockSize(); final int[] blockGridSize = new int[sz.length]; for (int i = 0; i < sz.length; i++) @@ -31,7 +33,7 @@ default int[] getBlockGridSize() { * * @return shard size */ - public long[] getSize(); + public int[] getSize(); /** * Returns the size of blocks in pixel units. @@ -60,7 +62,7 @@ default int[] getBlockPosition(long... blockPosition) { if (!Arrays.equals(getGridPosition(), shardPos)) return null; - final long[] shardSize = getSize(); + final int[] shardSize = getSize(); final int[] blkSize = getBlockSize(); final int[] blkGridSize = getBlockGridSize(); @@ -92,10 +94,25 @@ default long[] getShard(long... blockPosition) { public DataBlock getBlock(int... position); - public ShardIndex getIndexes(); + default DataBlock[] getAllBlocks(int... position) { + //TODO Caleb: Do we want this? + return null; + } + + public ShardIndex getIndex(); - public DataBlock readBlock(final String pathName, final DatasetAttributes datasetAttributes, long... gridPosition); + public static Shard createEmpty(final ShardedDatasetAttributes attributes, long... shardPosition) { + final long[] emptyIndex = new long[(int)(2 * attributes.getNumBlocks())]; + Arrays.fill(emptyIndex, EMPTY_INDEX_NBYTES); + final ShardIndex shardIndex = new ShardIndex(attributes.getShardBlockGridSize(), emptyIndex); + + return new InMemoryShard( + attributes.getShardSize(), + shardPosition, + attributes.getBlockSize(), + shardIndex); + } /** * Say we want async datablock access @@ -107,5 +124,4 @@ default long[] getShard(long... blockPosition) { * Shard doesn't hold the data directly, but is the metadata about how the blocks are stored * */ - } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardImpl.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardImpl.java deleted file mode 100644 index 11803b3b..00000000 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardImpl.java +++ /dev/null @@ -1,20 +0,0 @@ -package org.janelia.saalfeldlab.n5.shard; - -import org.janelia.saalfeldlab.n5.DataBlock; -import org.janelia.saalfeldlab.n5.DatasetAttributes; - -public class ShardImpl extends AbstractShard { - - public ShardImpl(final long[] size, final long[] gridPosition, final int[] blockSize, T type) { - - super(size, gridPosition, blockSize, type); - } - - @Override - public DataBlock readBlock(String pathName, DatasetAttributes datasetAttributes, long... gridPosition) { - - // TODO Auto-generated method stub - return null; - } - -} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java index b2a19472..113a8ad4 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java @@ -82,12 +82,11 @@ public static ShardIndex read(FileChannel channel, ShardedDatasetAttributes data // TODO need codecs // TODO FileChannel is too specific - generalize - final Shards shards = new Shards(datasetAttributes); - final int[] indexShape = indexBlockSize(shards.getShardBlockGridSize()); + final int[] indexShape = indexBlockSize(datasetAttributes.getShardBlockGridSize()); final int indexSize = (int)Arrays.stream(indexShape).reduce(1, (x, y) -> x * y); final int indexBytes = BYTES_PER_LONG * indexSize; - if (!datasetAttributes.getShardingConfiguration().areIndexesAtStart()) { + if (datasetAttributes.getIndexLocation() == ShardingConfiguration.IndexLocation.END) { channel.position(channel.size() - indexBytes); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java index d68a03c0..530bb942 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java @@ -5,26 +5,28 @@ import java.nio.channels.Channels; import java.nio.channels.FileChannel; +import org.janelia.saalfeldlab.n5.Compression; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DataType; import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.DefaultBlockReader; +import org.janelia.saalfeldlab.n5.N5FSReader; +import org.janelia.saalfeldlab.n5.N5Reader; import org.janelia.saalfeldlab.n5.RawCompression; import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.codec.IdentityCodec; +import org.janelia.saalfeldlab.n5.codec.checksum.Crc32cChecksumCodec; import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration.IndexLocation; public class ShardReader { - private final ShardedDatasetAttributes datasetAttributes; private long[] indexes; - private Shards shards; public ShardReader(final ShardedDatasetAttributes datasetAttributes) { this.datasetAttributes = datasetAttributes; - this.shards = new Shards(datasetAttributes); } public ShardIndex readIndexes(FileChannel channel) throws IOException { @@ -36,8 +38,6 @@ public InMemoryShard readShardFully( final FileChannel channel, long... gridPosition) throws IOException { - final DatasetAttributes dsetAttrs = shards.getDatasetAttributes(); - final ShardIndex si = readIndexes(channel); return null; } @@ -52,7 +52,7 @@ public DataBlock readBlock( final ShardIndex index = readIndexes(in); - final long[] shardPosition = shards.getShardPositionForBlock(blockPosition); + final long[] shardPosition = datasetAttributes.getShardPositionForBlock(blockPosition); in.position(index.getOffset(shardPosition)); final InputStream is = Channels.newInputStream(in); return DefaultBlockReader.readBlock(is, datasetAttributes, indexes); @@ -60,7 +60,7 @@ public DataBlock readBlock( private long getIndexIndex(long... shardPosition) { - final int[] indexDimensions = shards.getShardBlockGridSize(); + final int[] indexDimensions = datasetAttributes.getShardBlockGridSize(); long idx = 0; for (int i = 0; i < indexDimensions.length; i++) { idx += shardPosition[i] * indexDimensions[i]; @@ -77,14 +77,27 @@ public static void main(String[] args) { System.out.println(reader.getIndexIndex(0, 1)); System.out.println(reader.getIndexIndex(1, 0)); System.out.println(reader.getIndexIndex(1, 1)); + + final N5Reader n5 = new N5FSReader("shard.n5"); + final ShardedDatasetAttributes datasetAttributes = buildTestAttributes(); + n5.readBlock("dataset", datasetAttributes, 0, 0, 0); + } private static ShardedDatasetAttributes buildTestAttributes() { final Codec[] codecs = new Codec[]{ - new ShardingCodec(new ShardingConfiguration(new int[]{2, 2}, null, null, IndexLocation.end))}; - - return new ShardedDatasetAttributes(new long[]{4, 4}, new int[]{2, 2}, DataType.INT32, new RawCompression(), codecs); + new IdentityCodec(), + new ShardingCodec( + new ShardingConfiguration( + new int[]{2, 2}, + new Codec[]{new Compression.CompressionCodec(new RawCompression()), new IdentityCodec()}, + new Codec[]{new Crc32cChecksumCodec()}, + IndexLocation.END) + ) + }; + + return new ShardedDatasetAttributes(new long[]{4, 4}, new int[]{2, 2}, new int[]{2, 2}, IndexLocation.END, DataType.INT32, new RawCompression(), codecs); } } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java index 5705d583..24d82a4c 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java @@ -62,7 +62,7 @@ public void write(final OutputStream out) throws IOException { // } prepareForWritingDataBlock(); - if (datasetAttributes.getShardingConfiguration().areIndexesAtStart()) { + if (datasetAttributes.getIndexLocation() == ShardingConfiguration.IndexLocation.START) { writeIndexBlock(out); writeBlocks(out); } else { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingConfiguration.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingConfiguration.java index 2a29a116..ce520700 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingConfiguration.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingConfiguration.java @@ -21,16 +21,19 @@ public class ShardingConfiguration { public static final String CODECS_KEY = "codecs"; public static final String INDEX_CODECS_KEY = "index_codecs"; - public static enum IndexLocation { - start, end - }; + public enum IndexLocation { + START, END + } protected int[] blockSize; protected Codec[] codecs; protected Codec[] indexCodecs; protected IndexLocation indexLocation; - public ShardingConfiguration(final int[] blockSize, final Codec[] codecs, final Codec[] indexCodecs, + public ShardingConfiguration( + final int[] blockSize, + final Codec[] codecs, + final Codec[] indexCodecs, final IndexLocation indexLocation) { this.blockSize = blockSize; @@ -46,7 +49,7 @@ public int[] getBlockSize() { public boolean areIndexesAtStart() { - return indexLocation == IndexLocation.start; + return indexLocation == IndexLocation.START; } public static class ShardingConfigurationAdapter diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shards.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shards.java deleted file mode 100644 index fb0f6aca..00000000 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shards.java +++ /dev/null @@ -1,106 +0,0 @@ -package org.janelia.saalfeldlab.n5.shard; - -import java.util.Arrays; - -import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; - -/** - * Manages the set of shards that comprise a dataset. - */ -public class Shards { - - public static long EMPTY_INDEX_NBYTES = 0xFFFFFFFFFFFFFFFFL; - - private final ShardedDatasetAttributes datasetAttributes; - - public Shards(final ShardedDatasetAttributes datasetAttributes) { - - this.datasetAttributes = datasetAttributes; - } - - public ShardedDatasetAttributes getDatasetAttributes() { - - return datasetAttributes; - } - - public int[] getShardSize() { - - return getDatasetAttributes().getShardSize(); - } - - public int[] getBlockSize() { - - return getDatasetAttributes().getBlockSize(); - } - - /** - * Returns the number of blocks a shard contains along all dimensions. - * - * @return the size of the block grid of a shard - */ - public int[] getShardBlockGridSize() { - - final int nd = getDatasetAttributes().getNumDimensions(); - final int[] shardBlockGridSize = new int[nd]; - final int[] blockSize = getBlockSize(); - for (int i = 0; i < nd; i++) - shardBlockGridSize[i] = (int)(Math - .ceil((double)getDatasetAttributes().getDimensions()[i] / blockSize[i])); - - return shardBlockGridSize; - } - - /** - * Given a block's position relative to the array, returns the position of the shard containing that block relative to the shard grid. - * - * @param gridPosition - * position of a block relative to the array - * @return the position of the containing shard in the shard grid - */ - public long[] getShardPositionForBlock(final long... blockGridPosition) { - - // TODO have this return a shard - final int[] shardBlockDimensions = getShardBlockGridSize(); - final long[] shardGridPosition = new long[blockGridPosition.length]; - for (int i = 0; i < shardGridPosition.length; i++) { - shardGridPosition[i] = (int)Math.floor((double)blockGridPosition[i] / shardBlockDimensions[i]); - } - - return shardGridPosition; - } - - /** - * Returns of the block at the given position relative to this shard, or null if this shard does not contain the given block. - * - * @return the shard position - */ - public int[] getBlockPositionInShard(final long[] shardPosition, final long[] blockPosition) { - - final long[] shardPos = getShardPositionForBlock(blockPosition); - if (!Arrays.equals(shardPosition, shardPos)) - return null; - - final int[] shardSize = getShardSize(); - final int[] blkSize = getBlockSize(); - final int[] blkGridSize = getShardBlockGridSize(); - - final int[] blockShardPos = new int[shardSize.length]; - for (int i = 0; i < shardSize.length; i++) { - final long shardP = shardPos[i] * shardSize[i]; - final long blockP = blockPosition[i] * blkSize[i]; - blockShardPos[i] = (int)((blockP - shardP) / blkGridSize[i]); - } - - return blockShardPos; - } - - /** - * @return the number of blocks per shard - */ - public long getNumBlocks() { - - return Arrays.stream(getShardBlockGridSize()).reduce(1, (x, y) -> x * y); - } - - -} From ce0b25dcf87d3eae0ffd9f0d441d6d470b827d93 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Wed, 24 Jul 2024 12:00:55 -0400 Subject: [PATCH 012/124] feat/wip: add size partial read lockForReading methods --- .../n5/FileSystemKeyValueAccess.java | 32 +++++++++++++++++-- .../saalfeldlab/n5/KeyValueAccess.java | 8 +++++ .../janelia/saalfeldlab/n5/LockedChannel.java | 2 ++ 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java b/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java index b7ea36d5..d5d38ffe 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java @@ -86,7 +86,8 @@ protected LockedFileChannel(final Path path, final boolean readOnly) throws IOEx this(path, readOnly, -1, -1); } - protected LockedFileChannel(final Path path, final boolean readOnly, final long startByte, final long lastByte) throws IOException { + protected LockedFileChannel(final Path path, final boolean readOnly, final long startByte, final long endByte) + throws IOException { final OpenOption[] options; if (readOnly) { @@ -106,10 +107,13 @@ protected LockedFileChannel(final Path path, final boolean readOnly, final long } } + if (startByte != 0) + channel.position(startByte); + for (boolean waiting = true; waiting;) { waiting = false; try { - channel.lock(0L, Long.MAX_VALUE, readOnly); + channel.lock(startByte, endByte, readOnly); } catch (final OverlappingFileLockException e) { waiting = true; try { @@ -122,6 +126,12 @@ protected LockedFileChannel(final Path path, final boolean readOnly, final long } } + @Override + public long size() throws IOException { + + return channel.size(); + } + @Override public Reader newReader() throws IOException { @@ -177,6 +187,17 @@ public LockedFileChannel lockForReading(final String normalPath) throws IOExcept } } + @Override + public LockedFileChannel lockForReading(final String normalPath, final long startByte, final long endByte) + throws IOException { + + try { + return new LockedFileChannel(normalPath, true, startByte, endByte); + } catch (final NoSuchFileException e) { + throw new N5Exception.N5NoSuchKeyException("No such file", e); + } + } + @Override public LockedFileChannel lockForWriting(final String normalPath) throws IOException { @@ -218,6 +239,13 @@ public boolean exists(final String normalPath) { return Files.exists(path); } + @Override + public long size(final String normalPath) throws IOException { + + final Path path = fileSystem.getPath(normalPath); + return Files.size(path); + } + @Override public String[] listDirectories(final String normalPath) throws IOException { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/KeyValueAccess.java b/src/main/java/org/janelia/saalfeldlab/n5/KeyValueAccess.java index ea09269b..8b6fe5aa 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/KeyValueAccess.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/KeyValueAccess.java @@ -30,6 +30,8 @@ import java.net.URISyntaxException; import java.nio.file.FileSystem; +import org.janelia.saalfeldlab.n5.FileSystemKeyValueAccess.LockedFileChannel; + /** * Key value read primitives used by {@link N5KeyValueReader} * implementations. This interface implements a subset of access primitives @@ -117,6 +119,8 @@ public default String compose(final URI uri, final String... components) { */ public boolean exists(final String normalPath); + public long size(final String normalPath) throws IOException; + /** * Test whether the path is a directory. * @@ -155,6 +159,9 @@ public default String compose(final URI uri, final String... components) { */ public LockedChannel lockForReading(final String normalPath) throws IOException; + public LockedFileChannel lockForReading(String normalPath, final long startByte, final long endByte) + throws IOException; + /** * Create an exclusive lock on a path for writing. If the file doesn't * exist yet, it will be created, including all directories leading up to @@ -222,4 +229,5 @@ public default String compose(final URI uri, final String... components) { * if an error occurs during deletion */ public void delete(final String normalPath) throws IOException; + } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/LockedChannel.java b/src/main/java/org/janelia/saalfeldlab/n5/LockedChannel.java index bd34a59d..c3c53f50 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/LockedChannel.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/LockedChannel.java @@ -40,6 +40,8 @@ */ public interface LockedChannel extends Closeable { + public long size() throws IOException; + /** * Create a UTF-8 {@link Reader}. * From 8aabcf43cad0a553f773349ab775c06c706e5c7c Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Wed, 24 Jul 2024 12:04:20 -0400 Subject: [PATCH 013/124] wip/feat: ShardIndex progress, VirtualShard progress --- .../saalfeldlab/n5/GsonKeyValueN5Reader.java | 14 +- .../saalfeldlab/n5/shard/AbstractShard.java | 28 ++-- .../saalfeldlab/n5/shard/InMemoryShard.java | 9 +- .../janelia/saalfeldlab/n5/shard/Shard.java | 26 ++-- .../saalfeldlab/n5/shard/ShardIndex.java | 122 ++++++++++++++---- .../saalfeldlab/n5/shard/ShardReader.java | 9 -- .../saalfeldlab/n5/shard/ShardWriter.java | 3 +- 7 files changed, 149 insertions(+), 62 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java index 2aeb21b1..d12c6e7e 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java @@ -31,6 +31,7 @@ import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; import org.janelia.saalfeldlab.n5.shard.Shard; +import org.janelia.saalfeldlab.n5.shard.VirtualShard; import com.google.gson.Gson; import com.google.gson.JsonElement; @@ -87,13 +88,13 @@ default JsonElement getAttributes(final String pathName) throws N5Exception { } + @SuppressWarnings("rawtypes") default Shard getShard(final String pathName, final ShardedDatasetAttributes datasetAttributes, - long... gridPosition) { + long... shardGridPosition) { - // TODO throw exception if this dataset is not sharded? - - return null; + final String path = absoluteDataBlockPath(N5URI.normalizeGroupPath(pathName), shardGridPosition); + return new VirtualShard(datasetAttributes, shardGridPosition, getKeyValueAccess(), path); } @Override @@ -103,7 +104,10 @@ default DataBlock readBlock( final long... gridPosition) throws N5Exception { if (datasetAttributes instanceof ShardedDatasetAttributes) { - // TODO + final ShardedDatasetAttributes shardedAttrs = (ShardedDatasetAttributes)datasetAttributes; + final long[] shardPosition = shardedAttrs.getShardPositionForBlock(gridPosition); + final Shard shard = getShard(pathName, shardedAttrs, shardPosition); + return shard.getBlock(gridPosition); } final String path = absoluteDataBlockPath(N5URI.normalizeGroupPath(pathName), gridPosition); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java index 34237084..270358b5 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java @@ -1,33 +1,39 @@ package org.janelia.saalfeldlab.n5.shard; import org.janelia.saalfeldlab.n5.DataBlock; +import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; public abstract class AbstractShard implements Shard { - protected final int[] size; - protected final long[] gridPosition; - protected final int[] blockSize; - private final ShardIndex index; + protected final ShardedDatasetAttributes datasetAttributes; - public AbstractShard(final int[] shardSize, final long[] gridPosition, - final int[] blockSize, final ShardIndex index) { + protected final ShardIndex index; - this.size = shardSize; + private final long[] gridPosition; + + public AbstractShard(final ShardedDatasetAttributes datasetAttributes, final long[] gridPosition, + final ShardIndex index) { + + this.datasetAttributes = datasetAttributes; this.gridPosition = gridPosition; - this.blockSize = blockSize; this.index = index; } + public ShardedDatasetAttributes getDatasetAttributes() { + + return datasetAttributes; + } + @Override public int[] getSize() { - return size; + return datasetAttributes.getShardSize(); } @Override public int[] getBlockSize() { - return blockSize; + return datasetAttributes.getShardSize(); } @Override @@ -37,7 +43,7 @@ public long[] getGridPosition() { } @Override - public DataBlock getBlock(int... position) { + public DataBlock getBlock(long... position) { return null; } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java index 977620e1..346e69db 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java @@ -4,17 +4,16 @@ import java.util.List; import org.janelia.saalfeldlab.n5.DataBlock; -import org.janelia.saalfeldlab.n5.DatasetAttributes; +import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; public class InMemoryShard extends AbstractShard { private List> blocks; - private ShardIndex shardIndex; + public InMemoryShard(final ShardedDatasetAttributes datasetAttributes, final long[] gridPosition, + ShardIndex index) { - public InMemoryShard(final int[] shardSize, final long[] gridPosition, final int[] blockSize, ShardIndex index) { - - super(shardSize, gridPosition, blockSize, index); + super(datasetAttributes, gridPosition, index); blocks = new ArrayList<>(); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java index 716d6d4b..27c3cd22 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java @@ -28,6 +28,8 @@ default int[] getBlockGridSize() { return blockGridSize; } + public ShardedDatasetAttributes getDatasetAttributes(); + /** * Returns the size of shards in pixel units. * @@ -56,7 +58,7 @@ default int[] getBlockGridSize() { * * @return the shard position */ - default int[] getBlockPosition(long... blockPosition) { + default long[] getBlockPosition(long... blockPosition) { final long[] shardPos = getShard(blockPosition); if (!Arrays.equals(getGridPosition(), shardPos)) @@ -66,7 +68,7 @@ default int[] getBlockPosition(long... blockPosition) { final int[] blkSize = getBlockSize(); final int[] blkGridSize = getBlockGridSize(); - final int[] blockShardPos = new int[shardSize.length]; + final long[] blockShardPos = new long[shardSize.length]; for (int i = 0; i < shardSize.length; i++) { final long shardP = shardPos[i] * shardSize[i]; final long blockP = blockPosition[i] * blkSize[i]; @@ -92,9 +94,9 @@ default long[] getShard(long... blockPosition) { return shardGridPosition; } - public DataBlock getBlock(int... position); + public DataBlock getBlock(long... position); - default DataBlock[] getAllBlocks(int... position) { + default DataBlock[] getAllBlocks(long... position) { //TODO Caleb: Do we want this? return null; } @@ -106,12 +108,18 @@ public static Shard createEmpty(final ShardedDatasetAttributes attributes final long[] emptyIndex = new long[(int)(2 * attributes.getNumBlocks())]; Arrays.fill(emptyIndex, EMPTY_INDEX_NBYTES); final ShardIndex shardIndex = new ShardIndex(attributes.getShardBlockGridSize(), emptyIndex); + return new InMemoryShard(attributes, shardPosition, shardIndex); + } - return new InMemoryShard( - attributes.getShardSize(), - shardPosition, - attributes.getBlockSize(), - shardIndex); + public static long flatIndex(long[] gridPosition, int[] gridSize) { + + long index = gridPosition[0]; + long cumSizes = gridSize[0]; + for (int i = 1; i < gridSize.length; i++) { + index += gridPosition[i] * cumSizes; + cumSizes *= gridSize[i]; + } + return index; } /** diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java index 113a8ad4..95bd4b49 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java @@ -3,39 +3,40 @@ import java.io.DataInputStream; import java.io.IOException; import java.io.InputStream; +import java.io.UncheckedIOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.nio.channels.Channels; import java.nio.channels.FileChannel; import java.util.Arrays; +import org.janelia.saalfeldlab.n5.DataType; +import org.janelia.saalfeldlab.n5.DatasetAttributes; +import org.janelia.saalfeldlab.n5.KeyValueAccess; +import org.janelia.saalfeldlab.n5.LockedChannel; import org.janelia.saalfeldlab.n5.LongArrayDataBlock; +import org.janelia.saalfeldlab.n5.N5Exception; +import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; +import org.janelia.saalfeldlab.n5.RawCompression; import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; +import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration.IndexLocation; public class ShardIndex extends LongArrayDataBlock { private static final int BYTES_PER_LONG = 8; - public ShardIndex(int[] size, long[] data) { + private static final int LONGS_PER_BLOCK = 2; - super(indexBlockSize(size), new long[]{0}, data); - } - - public ShardIndex(int[] size) { - - super(indexBlockSize(size), new long[]{0}, createData(size)); - } + private static final long[] DUMMY_GRID_POSITION = null; - private static long[] createData(final int[] size) { + public ShardIndex(int[] shardBlockGridSize, long[] data) { - final int N = 2 * Arrays.stream(size).reduce(1, (x, y) -> x * y); - return new long[N]; + super(prepend(LONGS_PER_BLOCK, shardBlockGridSize), DUMMY_GRID_POSITION, data); } - private static int[] indexBlockSize(final int[] size) { + public ShardIndex(int[] shardBlockGridSize) { - final int[] indexBlockSize = new int[size.length + 1]; - indexBlockSize[0] = 2; - System.arraycopy(size, 0, indexBlockSize, 1, size.length); - return indexBlockSize; + super(prepend(LONGS_PER_BLOCK, shardBlockGridSize), DUMMY_GRID_POSITION, emptyIndexData(shardBlockGridSize)); } public long getOffset(long... gridPosition) { @@ -55,7 +56,6 @@ public void set(long offset, long nbytes, long[] gridPosition) { data[i + 1] = nbytes; } - private int getOffsetIndex(long... gridPosition) { int idx = 0; @@ -73,20 +73,83 @@ private int getNumBytesIndex(long... gridPosition) { return getOffsetIndex() + 1; } - public void printData() { + public static ShardIndex read(final KeyValueAccess keyValueAccess, final String key, + final ShardedDatasetAttributes datasetAttributes) throws IOException { + + return read(keyValueAccess, key, datasetAttributes.getShardBlockGridSize(), + datasetAttributes.getIndexLocation()); + } + + public static ShardIndex read(final KeyValueAccess keyValueAccess, final String key, final int[] shardBlockGridSize, + final IndexLocation indexLocation) throws IOException { + + final ShardIndex idx = new ShardIndex(shardBlockGridSize); + final IndexByteBounds byteBounds = byteBounds(idx.getSize(), indexLocation, keyValueAccess.size(key)); + try (final LockedChannel lockedChannel = keyValueAccess.lockForReading(key, byteBounds.start, byteBounds.end)) { + + final byte[] bytes = new byte[idx.getNumElements() * ShardIndex.BYTES_PER_LONG]; + lockedChannel.newInputStream().read(bytes); + idx.readData(ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN)); // TODO generalize byte order + return idx; + + } catch (final N5Exception.N5NoSuchKeyException e) { + return null; + } catch (final IOException | UncheckedIOException e) { + throw new N5IOException("Failed to read from " + key, e); + } + } + + public static DatasetAttributes indexDatasetAttributes(final int[] indexBlockSize) { + + final int[] blkSize = new int[indexBlockSize.length]; + final long[] size = new long[indexBlockSize.length]; + for (int i = 0; i < blkSize.length; i++) { + blkSize[i] = (int)indexBlockSize[i]; + } + + // TODO codecs + return new DatasetAttributes(size, blkSize, DataType.UINT64, new RawCompression(), null); + } + + public static IndexByteBounds byteBounds(ShardedDatasetAttributes datasetAttributes, final long objectSize) { - System.out.println(Arrays.toString(data)); + final int[] indexShape = prepend(2, datasetAttributes.getShardBlockGridSize()); + return byteBounds(indexShape, datasetAttributes.getIndexLocation(), objectSize); + } + + public static IndexByteBounds byteBounds(final int[] indexShape, final IndexLocation indexLocation, + final long objectSize) { + + final int indexSize = (int)Arrays.stream(indexShape).reduce(1, (x, y) -> x * y); + + if (indexLocation == IndexLocation.START) { + return new IndexByteBounds(0L, indexSize); + } else { + return new IndexByteBounds(objectSize - (BYTES_PER_LONG * indexSize), objectSize - 1); + } + } + + private static class IndexByteBounds { + + private final long start; + private final long end; + + private IndexByteBounds(long start, long end) { + + this.start = start; + this.end = end; + } } public static ShardIndex read(FileChannel channel, ShardedDatasetAttributes datasetAttributes) throws IOException { // TODO need codecs // TODO FileChannel is too specific - generalize - final int[] indexShape = indexBlockSize(datasetAttributes.getShardBlockGridSize()); + final int[] indexShape = prepend(2, datasetAttributes.getShardBlockGridSize()); final int indexSize = (int)Arrays.stream(indexShape).reduce(1, (x, y) -> x * y); final int indexBytes = BYTES_PER_LONG * indexSize; - if (datasetAttributes.getIndexLocation() == ShardingConfiguration.IndexLocation.END) { + if (datasetAttributes.getIndexLocation() == IndexLocation.END) { channel.position(channel.size() - indexBytes); } @@ -101,12 +164,27 @@ public static ShardIndex read(FileChannel channel, ShardedDatasetAttributes data return new ShardIndex(indexShape, indexes); } + private static long[] emptyIndexData(final int[] size) { + + final int N = 2 * Arrays.stream(size).reduce(1, (x, y) -> x * y); + final long[] data = new long[N]; + Arrays.fill(data, Shard.EMPTY_INDEX_NBYTES); + return data; + } + + private static int[] prepend(final int value, final int[] array) { + + final int[] indexBlockSize = new int[array.length + 1]; + indexBlockSize[0] = value; + System.arraycopy(array, 0, indexBlockSize, 1, array.length); + return indexBlockSize; + } + public static void main(String[] args) { final ShardIndex ib = new ShardIndex(new int[]{2, 2}); ib.set(8, 9, new long[]{1, 1}); - ib.printData(); // System.out.println(ib.getIndex(0, 0)); // System.out.println(ib.getIndex(1, 0)); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java index 530bb942..ed4e7128 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java @@ -8,7 +8,6 @@ import org.janelia.saalfeldlab.n5.Compression; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DataType; -import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.DefaultBlockReader; import org.janelia.saalfeldlab.n5.N5FSReader; import org.janelia.saalfeldlab.n5.N5Reader; @@ -34,14 +33,6 @@ public ShardIndex readIndexes(FileChannel channel) throws IOException { return ShardIndex.read(channel, datasetAttributes); } - public InMemoryShard readShardFully( - final FileChannel channel, - long... gridPosition) throws IOException { - - final ShardIndex si = readIndexes(channel); - return null; - } - public DataBlock readBlock( final FileChannel in, long... blockPosition) throws IOException { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java index 24d82a4c..d1ade67f 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java @@ -6,6 +6,7 @@ import java.io.OutputStream; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import org.janelia.saalfeldlab.n5.DataBlock; @@ -93,7 +94,7 @@ private void prepareForWritingDataBlock() throws IOException { blockBytes.add(blockOut.toByteArray()); } - indexData.printData(); + System.out.println(Arrays.toString(indexData.getData())); } private void prepareForWriting() throws IOException { From 77c096d96ccead5050bcae516e07fb12470bd67f Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Wed, 24 Jul 2024 12:41:56 -0400 Subject: [PATCH 014/124] feat/wip: add VirtualShard --- .../saalfeldlab/n5/shard/VirtualShard.java | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java new file mode 100644 index 00000000..2dab08cc --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java @@ -0,0 +1,70 @@ +package org.janelia.saalfeldlab.n5.shard; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Arrays; + +import org.janelia.saalfeldlab.n5.DataBlock; +import org.janelia.saalfeldlab.n5.DatasetAttributes; +import org.janelia.saalfeldlab.n5.DefaultBlockReader; +import org.janelia.saalfeldlab.n5.KeyValueAccess; +import org.janelia.saalfeldlab.n5.LockedChannel; +import org.janelia.saalfeldlab.n5.N5Exception; +import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; +import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; + +public class VirtualShard extends AbstractShard { + + private KeyValueAccess keyValueAccess; + private String path; + + public VirtualShard(final ShardedDatasetAttributes datasetAttributes, long[] gridPosition, + final KeyValueAccess keyValueAccess, final String path) { + + super(datasetAttributes, gridPosition, null); + this.keyValueAccess = keyValueAccess; + this.path = path; + } + + @SuppressWarnings("unchecked") + @Override + public DataBlock getBlock(long... blockGridPosition) { + + final long[] relativePosition = getBlockPosition(blockGridPosition); + final ShardIndex idx = getIndex(); + final long startByte = idx.getOffset(relativePosition); + final long endByte = startByte + idx.getNumBytes(relativePosition); + try (final LockedChannel lockedChannel = keyValueAccess.lockForReading(path, startByte, endByte)) { + + // TODO add codecs, generalize to use any BlockReader + final DataBlock dataBlock = (DataBlock)datasetAttributes.getDataType().createDataBlock( + datasetAttributes.getBlockSize(), + blockGridPosition, + numBlockElements(datasetAttributes)); + + DefaultBlockReader.readFromStream(dataBlock, lockedChannel.newInputStream()); + return dataBlock; + + } catch (final N5Exception.N5NoSuchKeyException e) { + return null; + } catch (final IOException | UncheckedIOException e) { + throw new N5IOException("Failed to read block from " + path, e); + } + } + + private static int numBlockElements(DatasetAttributes datasetAttributes) { + + return Arrays.stream(datasetAttributes.getBlockSize()).reduce(1, (x, y) -> x * y); + } + + @Override + public ShardIndex getIndex() { + + try { + return ShardIndex.read(keyValueAccess, path, datasetAttributes); + } catch (final IOException e) { + throw new N5IOException("Failed to read index at " + path, e); + } + } + +} From 6874c39981c6341751cbc3f1a6828e798306f395 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Wed, 24 Jul 2024 13:27:08 -0400 Subject: [PATCH 015/124] test: reading a zarr shard demo --- .../saalfeldlab/n5/shard/ShardDemos.java | 35 ++++++++++++ .../shardExamples/test.zarr/mid_sharded/c/0/0 | Bin 0 -> 88 bytes .../test.zarr/mid_sharded/zarr.json | 54 ++++++++++++++++++ 3 files changed, 89 insertions(+) create mode 100644 src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java create mode 100644 src/test/resources/shardExamples/test.zarr/mid_sharded/c/0/0 create mode 100644 src/test/resources/shardExamples/test.zarr/mid_sharded/zarr.json diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java new file mode 100644 index 00000000..d969a3ab --- /dev/null +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java @@ -0,0 +1,35 @@ +package org.janelia.saalfeldlab.n5.shard; + +import java.net.MalformedURLException; +import java.nio.file.FileSystems; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; + +import org.janelia.saalfeldlab.n5.DataBlock; +import org.janelia.saalfeldlab.n5.DataType; +import org.janelia.saalfeldlab.n5.FileSystemKeyValueAccess; +import org.janelia.saalfeldlab.n5.RawCompression; +import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; +import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration.IndexLocation; + +public class ShardDemos { + + public static void main(String[] args) throws MalformedURLException { + + final Path p = Paths.get("src/test/resources/shardExamples/test.zarr/mid_sharded/c/0/0"); + System.out.println(p); + + final String key = p.toString(); + final ShardedDatasetAttributes dsetAttrs = new ShardedDatasetAttributes(new long[]{6, 4}, new int[]{6, 4}, + new int[]{3, 2}, IndexLocation.END, DataType.UINT8, new RawCompression(), null); + + final FileSystemKeyValueAccess kva = new FileSystemKeyValueAccess(FileSystems.getDefault()); + + final VirtualShard shard = new VirtualShard(dsetAttrs, new long[]{0, 0}, kva, key); + final DataBlock blk00 = shard.getBlock(0, 0); + System.out.println(Arrays.toString((byte[])blk00.getData())); + + } + +} diff --git a/src/test/resources/shardExamples/test.zarr/mid_sharded/c/0/0 b/src/test/resources/shardExamples/test.zarr/mid_sharded/c/0/0 new file mode 100644 index 0000000000000000000000000000000000000000..eafe18073dbcea989cb3bb6ee0d9246357a15a45 GIT binary patch literal 88 tcmZQzWMX6I;Nj&H5*A@*VddoF<`)nY6%%KG0yZekgT{x6!)PHic>q@K0Z{+| literal 0 HcmV?d00001 diff --git a/src/test/resources/shardExamples/test.zarr/mid_sharded/zarr.json b/src/test/resources/shardExamples/test.zarr/mid_sharded/zarr.json new file mode 100644 index 00000000..a80cb9d9 --- /dev/null +++ b/src/test/resources/shardExamples/test.zarr/mid_sharded/zarr.json @@ -0,0 +1,54 @@ +{ + "shape": [ + 4, + 6 + ], + "fill_value": 0, + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 4, + 6 + ] + } + }, + "attributes": {}, + "zarr_format": 3, + "data_type": "uint8", + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 2, + 3 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + } + ], + "index_location": "end" + } + } + ], + "node_type": "array" +} From be870132f371e9cb2feb9603f03c461d49f29644 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Wed, 24 Jul 2024 17:25:28 -0400 Subject: [PATCH 016/124] feat: make partial writes possible for key value access --- .../n5/FileSystemKeyValueAccess.java | 26 ++++++++++++++++--- .../saalfeldlab/n5/KeyValueAccess.java | 3 +++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java b/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java index d5d38ffe..fdaae9e0 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java @@ -71,6 +71,8 @@ protected class LockedFileChannel implements LockedChannel { protected final FileChannel channel; + protected final boolean truncate; + protected LockedFileChannel(final String path, final boolean readOnly) throws IOException { this(fileSystem.getPath(path), readOnly, -1, -1); @@ -89,6 +91,11 @@ protected LockedFileChannel(final Path path, final boolean readOnly) throws IOEx protected LockedFileChannel(final Path path, final boolean readOnly, final long startByte, final long endByte) throws IOException { + truncate = (startByte < 0 && endByte < 0); + + final long start = startByte < 0 ? 0L : startByte; + final long end = endByte < 0 ? Long.MAX_VALUE : endByte; + final OpenOption[] options; if (readOnly) { options = new OpenOption[]{StandardOpenOption.READ}; @@ -108,12 +115,12 @@ protected LockedFileChannel(final Path path, final boolean readOnly, final long } if (startByte != 0) - channel.position(startByte); + channel.position(start); for (boolean waiting = true; waiting;) { waiting = false; try { - channel.lock(startByte, endByte, readOnly); + channel.lock(start, end, readOnly); } catch (final OverlappingFileLockException e) { waiting = true; try { @@ -141,7 +148,9 @@ public Reader newReader() throws IOException { @Override public Writer newWriter() throws IOException { - channel.truncate(0); + if (truncate) + channel.truncate(0); + return Channels.newWriter(channel, StandardCharsets.UTF_8.name()); } @@ -154,7 +163,9 @@ public InputStream newInputStream() throws IOException { @Override public OutputStream newOutputStream() throws IOException { - channel.truncate(0); + if (truncate) + channel.truncate(0); + return Channels.newOutputStream(channel); } @@ -204,6 +215,13 @@ public LockedFileChannel lockForWriting(final String normalPath) throws IOExcept return new LockedFileChannel(normalPath, false); } + @Override + public LockedFileChannel lockForWriting(final String normalPath, final long startByte, final long endByte) + throws IOException { + + return new LockedFileChannel(normalPath, false, startByte, endByte); + } + public LockedFileChannel lockForReading(final Path path) throws IOException { try { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/KeyValueAccess.java b/src/main/java/org/janelia/saalfeldlab/n5/KeyValueAccess.java index 8b6fe5aa..bd20d575 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/KeyValueAccess.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/KeyValueAccess.java @@ -182,6 +182,9 @@ public LockedFileChannel lockForReading(String normalPath, final long startByte, */ public LockedChannel lockForWriting(final String normalPath) throws IOException; + public LockedFileChannel lockForWriting(String normalPath, final long startByte, final long endByte) + throws IOException; + /** * List all 'directory'-like children of a path. * From c5ee84dc59ee86150cbdee6a929c786e391b4340 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Wed, 24 Jul 2024 17:26:44 -0400 Subject: [PATCH 017/124] fix: AbstractShard getBlockSize --- .../java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java index 270358b5..f642075a 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java @@ -19,6 +19,7 @@ public AbstractShard(final ShardedDatasetAttributes datasetAttributes, final lon this.index = index; } + @Override public ShardedDatasetAttributes getDatasetAttributes() { return datasetAttributes; @@ -33,7 +34,7 @@ public int[] getSize() { @Override public int[] getBlockSize() { - return datasetAttributes.getShardSize(); + return datasetAttributes.getBlockSize(); } @Override From 0b4d73c33d38e9a5853ba64e893a14d7493d2289 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Wed, 24 Jul 2024 17:31:24 -0400 Subject: [PATCH 018/124] wip: toward block writing through shard --- .../saalfeldlab/n5/shard/InMemoryShard.java | 14 +++++++ .../janelia/saalfeldlab/n5/shard/Shard.java | 6 ++- .../saalfeldlab/n5/shard/ShardIndex.java | 29 +++++++++++++- .../saalfeldlab/n5/shard/VirtualShard.java | 39 +++++++++++++++++++ .../saalfeldlab/n5/shard/ShardDemos.java | 19 +++++++-- 5 files changed, 102 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java index 346e69db..baacc8c5 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java @@ -17,4 +17,18 @@ public InMemoryShard(final ShardedDatasetAttributes datasetAttributes, final lon blocks = new ArrayList<>(); } + @Override + public void writeBlock(DataBlock block) { + + // TODO Auto-generated method stub + + } + + @Override + public void writeShard() { + + // TODO Auto-generated method stub + + } + } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java index 27c3cd22..43a90ca6 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java @@ -88,7 +88,7 @@ default long[] getShard(long... blockPosition) { final int[] shardBlockDimensions = getBlockGridSize(); final long[] shardGridPosition = new long[shardBlockDimensions.length]; for (int i = 0; i < shardGridPosition.length; i++) { - shardGridPosition[i] = (long)Math.floor((double)blockPosition[i] / shardBlockDimensions[i]); + shardGridPosition[i] = (long)Math.floor((double)(blockPosition[i]) / shardBlockDimensions[i]); } return shardGridPosition; @@ -96,6 +96,10 @@ default long[] getShard(long... blockPosition) { public DataBlock getBlock(long... position); + public void writeBlock(DataBlock block); + + public void writeShard(); + default DataBlock[] getAllBlocks(long... position) { //TODO Caleb: Do we want this? return null; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java index 95bd4b49..1d0da8a4 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java @@ -3,6 +3,7 @@ import java.io.DataInputStream; import java.io.IOException; import java.io.InputStream; +import java.io.OutputStream; import java.io.UncheckedIOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; @@ -39,6 +40,12 @@ public ShardIndex(int[] shardBlockGridSize) { super(prepend(LONGS_PER_BLOCK, shardBlockGridSize), DUMMY_GRID_POSITION, emptyIndexData(shardBlockGridSize)); } + public boolean exists(long... gridPosition) { + + return getOffset(gridPosition) != Shard.EMPTY_INDEX_NBYTES && + getNumBytes(gridPosition) != Shard.EMPTY_INDEX_NBYTES; + } + public long getOffset(long... gridPosition) { return data[getOffsetIndex(gridPosition)]; @@ -80,7 +87,10 @@ public static ShardIndex read(final KeyValueAccess keyValueAccess, final String datasetAttributes.getIndexLocation()); } - public static ShardIndex read(final KeyValueAccess keyValueAccess, final String key, final int[] shardBlockGridSize, + public static ShardIndex read( + final KeyValueAccess keyValueAccess, + final String key, + final int[] shardBlockGridSize, final IndexLocation indexLocation) throws IOException { final ShardIndex idx = new ShardIndex(shardBlockGridSize); @@ -99,6 +109,23 @@ public static ShardIndex read(final KeyValueAccess keyValueAccess, final String } } + public static void write(ShardIndex index, + final KeyValueAccess keyValueAccess, + final String key, + final int[] shardBlockGridSize, + final IndexLocation indexLocation) throws IOException { + + final IndexByteBounds byteBounds = byteBounds(index.getSize(), indexLocation, keyValueAccess.size(key)); + try (final LockedChannel lockedChannel = keyValueAccess.lockForWriting(key, byteBounds.start, byteBounds.end)) { + + final OutputStream os = lockedChannel.newOutputStream(); + os.write(index.toByteBuffer().array()); + + } catch (final IOException | UncheckedIOException e) { + throw new N5IOException("Failed to read from " + key, e); + } + } + public static DatasetAttributes indexDatasetAttributes(final int[] indexBlockSize) { final int[] blkSize = new int[indexBlockSize.length]; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java index 2dab08cc..fd2f3854 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java @@ -31,6 +31,9 @@ public VirtualShard(final ShardedDatasetAttributes datasetAttributes, long[] gri public DataBlock getBlock(long... blockGridPosition) { final long[] relativePosition = getBlockPosition(blockGridPosition); + if (relativePosition == null) + throw new N5IOException("Attempted to read a block from the wrong shard."); + final ShardIndex idx = getIndex(); final long startByte = idx.getOffset(relativePosition); final long endByte = startByte + idx.getNumBytes(relativePosition); @@ -52,6 +55,42 @@ public DataBlock getBlock(long... blockGridPosition) { } } + @Override + public void writeBlock(final DataBlock block) { + + final long[] relativePosition = getBlockPosition(block.getGridPosition()); + if (relativePosition == null) + throw new N5IOException("Attempted to write block in the wrong shard."); + + final ShardIndex idx = getIndex(); + final long startByte = idx.getOffset(relativePosition); + final long endByte = startByte + idx.getNumBytes(relativePosition); + + // TODO this assumes that the block exists in the shard and + // that the available space is sufficient. Should generalize + + // // A starting point: + // if (!idx.exists(block.getGridPosition())) { + // + // } + try (final LockedChannel lockedChannel = keyValueAccess.lockForWriting(path, startByte, endByte)) { + + // TODO codecs + datasetAttributes.getCompression().getWriter().write(block, lockedChannel.newOutputStream()); + + // TODO update index when we know how many bytes were written + + } catch (final IOException | UncheckedIOException e) { + throw new N5IOException("Failed to read block from " + path, e); + } + + } + + @Override + public void writeShard() { + + } + private static int numBlockElements(DatasetAttributes datasetAttributes) { return Arrays.stream(datasetAttributes.getBlockSize()).reduce(1, (x, y) -> x * y); diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java index d969a3ab..2568eb6e 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java @@ -20,16 +20,29 @@ public static void main(String[] args) throws MalformedURLException { final Path p = Paths.get("src/test/resources/shardExamples/test.zarr/mid_sharded/c/0/0"); System.out.println(p); + final String key = p.toString(); final ShardedDatasetAttributes dsetAttrs = new ShardedDatasetAttributes(new long[]{6, 4}, new int[]{6, 4}, new int[]{3, 2}, IndexLocation.END, DataType.UINT8, new RawCompression(), null); final FileSystemKeyValueAccess kva = new FileSystemKeyValueAccess(FileSystems.getDefault()); + final VirtualShard shard = new VirtualShard<>(dsetAttrs, new long[]{0, 0}, kva, key); + + final DataBlock blk = shard.getBlock(0, 0); + + final byte[] data = (byte[])blk.getData(); + System.out.println(Arrays.toString(data)); + + // fill the block with a weird value + Arrays.fill(data, (byte)123); - final VirtualShard shard = new VirtualShard(dsetAttrs, new long[]{0, 0}, kva, key); - final DataBlock blk00 = shard.getBlock(0, 0); - System.out.println(Arrays.toString((byte[])blk00.getData())); + // write the block + shard.writeBlock(blk); + // re-read the block and check the data it contains + final DataBlock blkReread = shard.getBlock(0, 0); + final byte[] dataReRead = (byte[])blkReread.getData(); + System.out.println(Arrays.toString(dataReRead)); } } From 083cb548e5c68af48d63f7ac3f2b370360f267fb Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Thu, 1 Aug 2024 14:49:43 -0400 Subject: [PATCH 019/124] fix: partial write defaults with 0 test,build: include n5-universe for tests --- pom.xml | 27 ++++++++++++------ .../n5/FileSystemKeyValueAccess.java | 18 ++++++------ .../saalfeldlab/n5/GsonKeyValueN5Reader.java | 28 +++++++++++++++++++ 3 files changed, 57 insertions(+), 16 deletions(-) diff --git a/pom.xml b/pom.xml index 8b683eb4..be2913ba 100644 --- a/pom.xml +++ b/pom.xml @@ -161,6 +161,14 @@ com.google.code.gson gson + + org.scijava + scijava-common + + + org.apache.commons + commons-compress + @@ -168,6 +176,17 @@ junit test + + org.janelia.saalfeldlab + n5-universe + + + org.janelia.saalfeldlab + n5 + + + test + net.imagej ij @@ -194,14 +213,6 @@ ${commons-collections4.version} test - - org.scijava - scijava-common - - - org.apache.commons - commons-compress - diff --git a/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java b/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java index fdaae9e0..967cb6e2 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java @@ -75,26 +75,28 @@ protected class LockedFileChannel implements LockedChannel { protected LockedFileChannel(final String path, final boolean readOnly) throws IOException { - this(fileSystem.getPath(path), readOnly, -1, -1); + this(fileSystem.getPath(path), readOnly, 0, 0); } - protected LockedFileChannel(final String path, final boolean readOnly, final long startByte, final long lastByte) throws IOException { + protected LockedFileChannel(final String path, final boolean readOnly, final long startByte, final long size) throws IOException { - this(fileSystem.getPath(path), readOnly, startByte, lastByte); + this(fileSystem.getPath(path), readOnly, startByte, size); } protected LockedFileChannel(final Path path, final boolean readOnly) throws IOException { - this(path, readOnly, -1, -1); + this(path, readOnly, 0, 0); } - protected LockedFileChannel(final Path path, final boolean readOnly, final long startByte, final long endByte) + protected LockedFileChannel(final Path path, final boolean readOnly, final long startByte, final long size) throws IOException { - truncate = (startByte < 0 && endByte < 0); final long start = startByte < 0 ? 0L : startByte; - final long end = endByte < 0 ? Long.MAX_VALUE : endByte; + final long len = size < 0 ? 0L : size; + + //TODO Caleb: How does this handle if manually overwriting the entire file? (e.g. len > file size) + truncate = (start == 0 && len == 0); final OpenOption[] options; if (readOnly) { @@ -120,7 +122,7 @@ protected LockedFileChannel(final Path path, final boolean readOnly, final long for (boolean waiting = true; waiting;) { waiting = false; try { - channel.lock(start, end, readOnly); + channel.lock(start, len, readOnly); } catch (final OverlappingFileLockException e) { waiting = true; try { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java index d12c6e7e..3f44eebc 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java @@ -163,6 +163,34 @@ default String absoluteDataBlockPath( return getKeyValueAccess().compose(getURI(), components); } + /** + * Constructs the path for a shard in a dataset at a given grid position. + *

      + * The returned path is + * + *

      +	 * $basePath/datasetPathName/$shardPosition[0]/$shardPosition[1]/.../$shardPosition[n]
      +	 * 
      + *

      + * This is the file into which the shard will be stored. + * + * @param normalPath normalized dataset path + * @param shardGridPosition to the target shard + * @return the absolute path to the shard at shardGridPosition + */ + default String absoluteShardPath( + final String normalPath, + final long... shardGridPosition) { + + final String[] components = new String[shardGridPosition.length + 1]; + components[0] = normalPath; + int i = 0; + for (final long p : shardGridPosition) + components[++i] = Long.toString(p); + + return getKeyValueAccess().compose(getURI(), components); + } + /** * Constructs the absolute path (in terms of this store) for the group or From bdadb582ec849dbc65497bb465dc19d28b690a0c Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Fri, 2 Aug 2024 16:17:23 -0400 Subject: [PATCH 020/124] wip: zone serialization and sharding? --- .../saalfeldlab/n5/GsonKeyValueN5Writer.java | 11 ++ .../saalfeldlab/n5/shard/ShardDemos.java | 120 +++++++++++++++++- .../test.zarr/mid_sharded/attributes.json | 54 ++++++++ .../shardExamples/test.zarr/mid_sharded/c/0/0 | Bin 88 -> 88 bytes 4 files changed, 179 insertions(+), 6 deletions(-) create mode 100644 src/test/resources/shardExamples/test.zarr/mid_sharded/attributes.json diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java index 9e5253d7..69e77fc2 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java @@ -39,6 +39,7 @@ import com.google.gson.JsonNull; import com.google.gson.JsonObject; import com.google.gson.JsonSyntaxException; +import org.janelia.saalfeldlab.n5.shard.VirtualShard; /** * Default implementation of {@link N5Writer} with JSON attributes parsed with @@ -217,6 +218,16 @@ default void writeBlock( final DatasetAttributes datasetAttributes, final DataBlock dataBlock) throws N5Exception { + /* Delegate to shard for writing block? How to know what type of shard? */ + if (datasetAttributes instanceof ShardedDatasetAttributes) { + ShardedDatasetAttributes shardDatasetAttrs = (ShardedDatasetAttributes)datasetAttributes; + final long[] shardPos = shardDatasetAttrs.getShardPositionForBlock(dataBlock.getGridPosition()); + final String shardPath = absoluteShardPath(N5URI.normalizeGroupPath(path), dataBlock.getGridPosition()); + final VirtualShard shard = new VirtualShard<>(shardDatasetAttrs, shardPos, getKeyValueAccess(), shardPath); + shard.writeBlock(dataBlock); + return; + } + final String blockPath = absoluteDataBlockPath(N5URI.normalizeGroupPath(path), dataBlock.getGridPosition()); try (final LockedChannel lock = getKeyValueAccess().lockForWriting(blockPath)) { DefaultBlockWriter.writeBlockWithCodecs(lock.newOutputStream(), datasetAttributes, dataBlock); diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java index 2568eb6e..31491634 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java @@ -1,17 +1,34 @@ package org.janelia.saalfeldlab.n5.shard; -import java.net.MalformedURLException; -import java.nio.file.FileSystems; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.Arrays; - +import com.fasterxml.jackson.databind.JsonDeserializer; +import com.fasterxml.jackson.databind.JsonSerializer; +import com.google.gson.GsonBuilder; +import com.google.gson.JsonDeserializationContext; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonParseException; +import com.google.gson.JsonSerializationContext; +import org.janelia.saalfeldlab.n5.Compression; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DataType; import org.janelia.saalfeldlab.n5.FileSystemKeyValueAccess; +import org.janelia.saalfeldlab.n5.N5Reader; +import org.janelia.saalfeldlab.n5.N5Writer; import org.janelia.saalfeldlab.n5.RawCompression; import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; +import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.codec.IdentityCodec; +import org.janelia.saalfeldlab.n5.codec.checksum.Crc32cChecksumCodec; import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration.IndexLocation; +import org.janelia.saalfeldlab.n5.universe.N5Factory; +import org.junit.Test; + +import java.lang.reflect.Type; +import java.net.MalformedURLException; +import java.nio.file.FileSystems; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; public class ShardDemos { @@ -45,4 +62,95 @@ public static void main(String[] args) throws MalformedURLException { System.out.println(Arrays.toString(dataReRead)); } + @Test + public void writeReadBlockTest() { + + final N5Writer writer = N5Factory.createWriter("src/test/resources/shardExamples/test.n5"); + + final ShardedDatasetAttributes datasetAttributes = new ShardedDatasetAttributes( + new long[]{8, 8}, + new int[]{4, 4}, + new int[]{2, 2}, + IndexLocation.END, + DataType.UINT8, + new RawCompression(), + new Codec[]{ + new IdentityCodec(), + new ShardingCodec( + new ShardingConfiguration( + new int[]{2, 2}, + new Codec[]{new Compression.CompressionCodec(new RawCompression()), new IdentityCodec()}, + new Codec[]{new Crc32cChecksumCodec()}, + IndexLocation.END) + ) + } + ); + writer.createDataset("shard", datasetAttributes); + final DataBlock dataBlock = datasetAttributes.getDataType().createDataBlock(datasetAttributes.getBlockSize(), new long[]{0, 0}, 2 * 2); + + writer.writeBlock("shard", datasetAttributes, dataBlock); + writer.readBlock("shard", datasetAttributes, 0,0); + } + + private static class ZarrConfig { + final String name; + final T configuration; + + private ZarrConfig() { + name = ""; + configuration = null; + } + } + + private class GridConfig extends ZarrConfig {} + private class KeyEncodingConfig extends ZarrConfig {} + + private class ZarrChunk {} + + private class ZarrChunkAdapter implements com.google.gson.JsonSerializer, com.google.gson.JsonDeserializer { + final ZarrConfig grid; + final ZarrConfig keyEncoding; + + public ZarrChunkAdapter() { + grid = null; + keyEncoding = null; + } + public ZarrChunkAdapter(ZarrConfig grid, ZarrConfig key_encoding) { + + this.grid = grid; + this.keyEncoding = key_encoding; + } + + @Override public ZarrChunk deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) throws JsonParseException { + + if (!json.isJsonObject()) return null; + + final JsonObject obj = json.getAsJsonObject(); + final JsonObject grid = obj.getAsJsonObject("chunk_grid"); + + return null; + } + + @Override public JsonElement serialize(ZarrChunk src, Type typeOfSrc, JsonSerializationContext context) { + + return null; + } + } + + + @Test + public void nameConfigurationGsonTest() { + + final N5Factory factory = new N5Factory(); + final GsonBuilder gson = new GsonBuilder(); + + + gson.registerTypeHierarchyAdapter() + factory.gsonBuilder(gson); + final N5Reader n5 = factory.openReader("src/test/resources/shardExamples/test.zarr/mid_sharded"); + + final JsonObject zarrJson = n5.getAttribute("/", "/", JsonObject.class); + zarrJson.remove("shard") + } + } diff --git a/src/test/resources/shardExamples/test.zarr/mid_sharded/attributes.json b/src/test/resources/shardExamples/test.zarr/mid_sharded/attributes.json new file mode 100644 index 00000000..a80cb9d9 --- /dev/null +++ b/src/test/resources/shardExamples/test.zarr/mid_sharded/attributes.json @@ -0,0 +1,54 @@ +{ + "shape": [ + 4, + 6 + ], + "fill_value": 0, + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 4, + 6 + ] + } + }, + "attributes": {}, + "zarr_format": 3, + "data_type": "uint8", + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 2, + 3 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + } + ], + "index_location": "end" + } + } + ], + "node_type": "array" +} diff --git a/src/test/resources/shardExamples/test.zarr/mid_sharded/c/0/0 b/src/test/resources/shardExamples/test.zarr/mid_sharded/c/0/0 index eafe18073dbcea989cb3bb6ee0d9246357a15a45..19ad91e1627a7c0e559a2b3e7fb030cbb62f9934 100644 GIT binary patch delta 12 Pcma!uV5 Date: Fri, 2 Aug 2024 16:23:22 -0400 Subject: [PATCH 021/124] wip/feat: add VirtualShard.createIndex * edit behavior of getIndex --- .../saalfeldlab/n5/shard/VirtualShard.java | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java index fd2f3854..699c45bf 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java @@ -68,11 +68,6 @@ public void writeBlock(final DataBlock block) { // TODO this assumes that the block exists in the shard and // that the available space is sufficient. Should generalize - - // // A starting point: - // if (!idx.exists(block.getGridPosition())) { - // - // } try (final LockedChannel lockedChannel = keyValueAccess.lockForWriting(path, startByte, endByte)) { // TODO codecs @@ -89,6 +84,7 @@ public void writeBlock(final DataBlock block) { @Override public void writeShard() { + // TODO } private static int numBlockElements(DatasetAttributes datasetAttributes) { @@ -96,11 +92,18 @@ private static int numBlockElements(DatasetAttributes datasetAttributes) { return Arrays.stream(datasetAttributes.getBlockSize()).reduce(1, (x, y) -> x * y); } + public ShardIndex createIndex() { + + // Empty index of the correct size + return new ShardIndex(datasetAttributes.getShardBlockGridSize()); + } + @Override public ShardIndex getIndex() { try { - return ShardIndex.read(keyValueAccess, path, datasetAttributes); + final ShardIndex result = ShardIndex.read(keyValueAccess, path, datasetAttributes); + return result == null ? createIndex() : result; } catch (final IOException e) { throw new N5IOException("Failed to read index at " + path, e); } From 1931530a758e87350fc29698eee9ce304bf383aa Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Fri, 2 Aug 2024 16:23:58 -0400 Subject: [PATCH 022/124] fix: return KVA return for types for ranged lockForReading/Writing --- .../java/org/janelia/saalfeldlab/n5/KeyValueAccess.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/KeyValueAccess.java b/src/main/java/org/janelia/saalfeldlab/n5/KeyValueAccess.java index bd20d575..d199bf67 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/KeyValueAccess.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/KeyValueAccess.java @@ -30,8 +30,6 @@ import java.net.URISyntaxException; import java.nio.file.FileSystem; -import org.janelia.saalfeldlab.n5.FileSystemKeyValueAccess.LockedFileChannel; - /** * Key value read primitives used by {@link N5KeyValueReader} * implementations. This interface implements a subset of access primitives @@ -159,7 +157,7 @@ public default String compose(final URI uri, final String... components) { */ public LockedChannel lockForReading(final String normalPath) throws IOException; - public LockedFileChannel lockForReading(String normalPath, final long startByte, final long endByte) + public LockedChannel lockForReading(String normalPath, final long startByte, final long endByte) throws IOException; /** @@ -182,7 +180,7 @@ public LockedFileChannel lockForReading(String normalPath, final long startByte, */ public LockedChannel lockForWriting(final String normalPath) throws IOException; - public LockedFileChannel lockForWriting(String normalPath, final long startByte, final long endByte) + public LockedChannel lockForWriting(String normalPath, final long startByte, final long endByte) throws IOException; /** From 786ec1fb5545319167d860465282a7b5f313a8c5 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Mon, 5 Aug 2024 12:50:36 -0400 Subject: [PATCH 023/124] feat: add getAttributesKey * this may make more sense in a more basic interface --- .../n5/CachedGsonKeyValueN5Reader.java | 16 +++++++++------- .../n5/CachedGsonKeyValueN5Writer.java | 10 +++++----- .../janelia/saalfeldlab/n5/N5KeyValueReader.java | 6 ++++++ 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/CachedGsonKeyValueN5Reader.java b/src/main/java/org/janelia/saalfeldlab/n5/CachedGsonKeyValueN5Reader.java index 812c6ebb..4cdb8d92 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/CachedGsonKeyValueN5Reader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/CachedGsonKeyValueN5Reader.java @@ -27,7 +27,6 @@ import java.lang.reflect.Type; -import com.google.gson.JsonSyntaxException; import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; import org.janelia.saalfeldlab.n5.cache.N5JsonCache; import org.janelia.saalfeldlab.n5.cache.N5JsonCacheableContainer; @@ -35,6 +34,7 @@ import com.google.gson.Gson; import com.google.gson.JsonElement; import com.google.gson.JsonObject; +import com.google.gson.JsonSyntaxException; /** * {@link N5Reader} implementation through {@link KeyValueAccess} with JSON @@ -52,6 +52,8 @@ default N5JsonCache newCache() { N5JsonCache getCache(); + public String getAttributesKey(); + @Override default JsonElement getAttributesFromContainer(final String normalPathName, final String normalCacheKey) { @@ -70,7 +72,7 @@ default DatasetAttributes getDatasetAttributes(final String pathName) { return null; if (cacheMeta()) { - attributes = getCache().getAttributes(normalPath, N5KeyValueReader.ATTRIBUTES_JSON); + attributes = getCache().getAttributes(normalPath, getAttributesKey()); } else { attributes = GsonKeyValueN5Reader.super.getAttributes(normalPath); } @@ -96,7 +98,7 @@ default T getAttribute( final JsonElement attributes; if (cacheMeta()) { - attributes = getCache().getAttributes(normalPathName, N5KeyValueReader.ATTRIBUTES_JSON); + attributes = getCache().getAttributes(normalPathName, getAttributesKey()); } else { attributes = GsonKeyValueN5Reader.super.getAttributes(normalPathName); } @@ -117,7 +119,7 @@ default T getAttribute( final String normalizedAttributePath = N5URI.normalizeAttributePath(key); JsonElement attributes; if (cacheMeta()) { - attributes = getCache().getAttributes(normalPathName, N5KeyValueReader.ATTRIBUTES_JSON); + attributes = getCache().getAttributes(normalPathName, getAttributesKey()); } else { attributes = GsonKeyValueN5Reader.super.getAttributes(normalPathName); } @@ -133,7 +135,7 @@ default boolean exists(final String pathName) { final String normalPathName = N5URI.normalizeGroupPath(pathName); if (cacheMeta()) - return getCache().isGroup(normalPathName, N5KeyValueReader.ATTRIBUTES_JSON); + return getCache().isGroup(normalPathName, getAttributesKey()); else { return existsFromContainer(normalPathName, null); } @@ -176,7 +178,7 @@ default boolean datasetExists(final String pathName) throws N5IOException { final String normalPathName = N5URI.normalizeGroupPath(pathName); if (cacheMeta()) { - return getCache().isDataset(normalPathName, N5KeyValueReader.ATTRIBUTES_JSON); + return getCache().isDataset(normalPathName, getAttributesKey()); } return isDatasetFromContainer(normalPathName); } @@ -208,7 +210,7 @@ default JsonElement getAttributes(final String pathName) throws N5IOException { /* If cached, return the cache */ if (cacheMeta()) { - return getCache().getAttributes(groupPath, N5KeyValueReader.ATTRIBUTES_JSON); + return getCache().getAttributes(groupPath, getAttributesKey()); } else { return GsonKeyValueN5Reader.super.getAttributes(groupPath); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/CachedGsonKeyValueN5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/CachedGsonKeyValueN5Writer.java index d95f6345..9d509051 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/CachedGsonKeyValueN5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/CachedGsonKeyValueN5Writer.java @@ -59,9 +59,9 @@ default void createGroup(final String path) throws N5Exception { // else if exists is true (then a dataset is present) so throw an exception to avoid // overwriting / invalidating existing data if (cacheMeta()) { - if (getCache().isGroup(normalPath, N5KeyValueReader.ATTRIBUTES_JSON)) + if (getCache().isGroup(normalPath, getAttributesKey())) return; - else if (getCache().exists(normalPath, N5KeyValueReader.ATTRIBUTES_JSON)) { + else if (getCache().exists(normalPath, getAttributesKey())) { throw new N5Exception("Can't make a group on existing path."); } } @@ -88,8 +88,8 @@ else if (getCache().exists(normalPath, N5KeyValueReader.ATTRIBUTES_JSON)) { for (final String child : pathParts) { final String childPath = parent.isEmpty() ? child : parent + "/" + child; - getCache().initializeNonemptyCache(childPath, N5KeyValueReader.ATTRIBUTES_JSON); - getCache().updateCacheInfo(childPath, N5KeyValueReader.ATTRIBUTES_JSON); + getCache().initializeNonemptyCache(childPath, getAttributesKey()); + getCache().updateCacheInfo(childPath, getAttributesKey()); // only add if the parent exists and has children cached already if (parent != null && !child.isEmpty()) @@ -130,7 +130,7 @@ default void writeAndCacheAttributes( nullRespectingAttributes = getGson().toJsonTree(attributes); } /* Update the cache, and write to the writer */ - getCache().updateCacheInfo(normalGroupPath, N5KeyValueReader.ATTRIBUTES_JSON, nullRespectingAttributes); + getCache().updateCacheInfo(normalGroupPath, getAttributesKey(), nullRespectingAttributes); } } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/N5KeyValueReader.java b/src/main/java/org/janelia/saalfeldlab/n5/N5KeyValueReader.java index 29e43ce6..edbc6947 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/N5KeyValueReader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/N5KeyValueReader.java @@ -144,6 +144,12 @@ protected N5KeyValueReader( throw new N5Exception.N5IOException("No container exists at " + basePath); } + @Override + public String getAttributesKey() { + + return ATTRIBUTES_JSON; + } + @Override public Gson getGson() { From bf4f63e79aaa827fa5d4fab066ee06f5cebb588b Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Mon, 5 Aug 2024 13:27:26 -0400 Subject: [PATCH 024/124] wip: move getAttributesKey to GsonN5Reader --- .../org/janelia/saalfeldlab/n5/CachedGsonKeyValueN5Reader.java | 2 -- .../java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java | 2 +- src/main/java/org/janelia/saalfeldlab/n5/GsonN5Reader.java | 2 ++ 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/CachedGsonKeyValueN5Reader.java b/src/main/java/org/janelia/saalfeldlab/n5/CachedGsonKeyValueN5Reader.java index 4cdb8d92..324d242e 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/CachedGsonKeyValueN5Reader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/CachedGsonKeyValueN5Reader.java @@ -52,8 +52,6 @@ default N5JsonCache newCache() { N5JsonCache getCache(); - public String getAttributesKey(); - @Override default JsonElement getAttributesFromContainer(final String normalPathName, final String normalCacheKey) { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java index 3f44eebc..003820b1 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java @@ -215,6 +215,6 @@ default String absoluteGroupPath(final String normalGroupPath) { */ default String absoluteAttributesPath(final String normalPath) { - return getKeyValueAccess().compose(getURI(), normalPath, N5KeyValueReader.ATTRIBUTES_JSON); + return getKeyValueAccess().compose(getURI(), normalPath, getAttributesKey()); } } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonN5Reader.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonN5Reader.java index ea7ea878..c611a9e7 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonN5Reader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonN5Reader.java @@ -42,6 +42,8 @@ public interface GsonN5Reader extends N5Reader { Gson getGson(); + public String getAttributesKey(); + @Override default Map> listAttributes(final String pathName) throws N5Exception { From b01b6becfbeab6ff9479e4538c843631bb106c8b Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Tue, 6 Aug 2024 16:57:36 -0400 Subject: [PATCH 025/124] wip add constant N5_DATASET_ATTRIBUTES --- .../java/org/janelia/saalfeldlab/n5/DatasetAttributes.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java index 9f67d0d0..958cd4be 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java @@ -35,6 +35,10 @@ public class DatasetAttributes implements Serializable { public static final String COMPRESSION_KEY = "compression"; public static final String CODEC_KEY = "codecs"; + public static final String[] N5_DATASET_ATTRIBUTES = new String[]{ + DIMENSIONS_KEY, BLOCK_SIZE_KEY, DATA_TYPE_KEY, COMPRESSION_KEY, CODEC_KEY + }; + /* version 0 */ protected static final String compressionTypeKey = "compressionType"; @@ -54,8 +58,8 @@ public DatasetAttributes( this.dimensions = dimensions; this.blockSize = blockSize; this.dataType = dataType; - this.compression = compression; this.codecs = codecs; + this.compression = compression; } public DatasetAttributes( From 0cb5a0f5fd99a7c0b8cc19e3140be4612d38bd23 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Thu, 8 Aug 2024 16:10:47 -0400 Subject: [PATCH 026/124] refactor: Compression interface extends Codec * no need for getCompressionAsCodec * use getType for codecs --- .../janelia/saalfeldlab/n5/CodecAdapter.java | 28 ++++++------ .../janelia/saalfeldlab/n5/Compression.java | 45 ++----------------- .../saalfeldlab/n5/DatasetAttributes.java | 8 ++-- .../saalfeldlab/n5/codec/AsTypeCodec.java | 7 +-- .../saalfeldlab/n5/codec/BytesCodec.java | 23 +++++++--- .../janelia/saalfeldlab/n5/codec/Codec.java | 2 +- .../saalfeldlab/n5/codec/ComposedCodec.java | 9 ++-- .../n5/codec/FixedScaleOffsetCodec.java | 13 +++--- .../saalfeldlab/n5/codec/IdentityCodec.java | 14 +++--- .../codec/checksum/Crc32cChecksumCodec.java | 16 +++---- .../saalfeldlab/n5/shard/ShardReader.java | 3 +- .../saalfeldlab/n5/shard/ShardingCodec.java | 30 ++++++++----- .../saalfeldlab/n5/shard/ShardDemos.java | 34 +++++++------- 13 files changed, 103 insertions(+), 129 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/CodecAdapter.java b/src/main/java/org/janelia/saalfeldlab/n5/CodecAdapter.java index db8a6daa..785380ad 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/CodecAdapter.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/CodecAdapter.java @@ -50,30 +50,30 @@ public JsonElement serialize( final Type typeOfSrc, final JsonSerializationContext context) { - if (codec.getName().equals(FixedScaleOffsetCodec.FIXED_SCALE_OFFSET_CODEC_ID)) { + if (codec.getType().equals(FixedScaleOffsetCodec.TYPE)) { final FixedScaleOffsetCodec c = (FixedScaleOffsetCodec)codec; final JsonObject obj = new JsonObject(); - obj.addProperty("name", c.getName()); + obj.addProperty("name", c.getType()); obj.addProperty("scale", c.getScale()); obj.addProperty("offset", c.getOffset()); obj.addProperty("type", c.getType().toString().toLowerCase()); - obj.addProperty("encodedType", c.getEncodedType().toString().toLowerCase()); + obj.addProperty("encodedType", c.getEncodedDataType().toString().toLowerCase()); return obj; } - else if (codec.getName().equals(ShardingCodec.ID)) { + else if (codec.getType().equals(ShardingCodec.TYPE)) { final ShardingCodec sharding = (ShardingCodec)codec; final JsonObject obj = new JsonObject(); - obj.addProperty("name", sharding.getName()); + obj.addProperty("name", sharding.getType()); obj.add("configuration", context.serialize(sharding.getConfiguration())); return obj; } - else if (codec.getName().equals(BytesCodec.ID)) { + else if (codec.getType().equals(BytesCodec.TYPE)) { final BytesCodec bytes = (BytesCodec)codec; final JsonObject obj = new JsonObject(); - obj.addProperty("name", bytes.getName()); + obj.addProperty("type", bytes.getType()); final JsonObject config = new JsonObject(); - config.addProperty("endian", bytes.getName()); + config.addProperty("endian", bytes.getType()); obj.add("configuration", config); return obj; @@ -94,10 +94,10 @@ else if (!json.isJsonObject()) return null; final JsonObject jsonObject = json.getAsJsonObject(); - if (jsonObject.has("name")) { + if (jsonObject.has("type")) { - final String id = jsonObject.get("name").getAsString(); - if (id.equals(FixedScaleOffsetCodec.FIXED_SCALE_OFFSET_CODEC_ID)) { + final String type = jsonObject.get("type").getAsString(); + if (type.equals(FixedScaleOffsetCodec.TYPE)) { return new FixedScaleOffsetCodec( jsonObject.get("scale").getAsDouble(), @@ -105,12 +105,12 @@ else if (!json.isJsonObject()) DataType.valueOf(jsonObject.get("type").getAsString().toUpperCase()), DataType.valueOf(jsonObject.get("encodedType").getAsString().toUpperCase())); } - else if (id.equals(ShardingCodec.ID)) { + else if (type.equals(ShardingCodec.TYPE)) { return new ShardingCodec( context.deserialize(jsonObject.get("configuration"), ShardingConfiguration.class)); - } else if (id.equals(BytesCodec.ID)) { + } else if (type.equals(BytesCodec.TYPE)) { - // TODO + // TODO implement return new BytesCodec(); } } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/Compression.java b/src/main/java/org/janelia/saalfeldlab/n5/Compression.java index a6ca1be0..ba78cecf 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/Compression.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/Compression.java @@ -43,13 +43,7 @@ * * @author Stephan Saalfeld */ -public interface Compression extends Serializable { - - // @Override - // public default String getId() { - // - // return getType(); - // } +public interface Compression extends Serializable, Codec { /** * Annotation for runtime discovery of compression schemes. @@ -73,6 +67,7 @@ public interface Compression extends Serializable { @Target(ElementType.FIELD) public static @interface CompressionParameter {} + @Override public default String getType() { final CompressionType compressionType = getClass().getAnnotation(CompressionType.class); @@ -94,6 +89,7 @@ public default String getType() { * input stream * @return the decoded input stream */ + @Override public InputStream decode(InputStream in) throws IOException; /** @@ -103,40 +99,7 @@ public default String getType() { * the output stream * @return the encoded output stream */ + @Override public OutputStream encode(OutputStream out) throws IOException; - public static Codec getCompressionAsCodec(Compression compression) { - - return new CompressionCodec(compression); - } - - public static class CompressionCodec implements Codec { - - private static final long serialVersionUID = -7931131454184340637L; - private Compression compression; - - public CompressionCodec(Compression compression) { - - this.compression = compression; - } - - @Override - public InputStream decode(InputStream in) throws IOException { - - return compression.decode(in); - } - - @Override - public OutputStream encode(OutputStream out) throws IOException { - - return compression.encode(out); - } - - @Override - public String getName() { - - return compression.getType(); - } - - } } \ No newline at end of file diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java index 958cd4be..3205312f 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java @@ -103,18 +103,16 @@ public Codec[] getCodecs() { public Codec collectCodecs() { - final Codec compressionCodec = Compression.getCompressionAsCodec(compression); - if (codecs == null || codecs.length == 0) - return compressionCodec; + return compression; else if (codecs.length == 1) - return new ComposedCodec(codecs[0], compressionCodec); + return new ComposedCodec(codecs[0], compression); else { final Codec[] codecsAndCompresor = new Codec[codecs.length + 1]; for (int i = 0; i < codecs.length; i++) codecsAndCompresor[i] = codecs[i]; - codecsAndCompresor[codecs.length] = compressionCodec; + codecsAndCompresor[codecs.length] = compression; return new ComposedCodec(codecsAndCompresor); } } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java index f7bf9945..9e4ce008 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java @@ -13,6 +13,8 @@ public class AsTypeCodec implements Codec { private static final long serialVersionUID = 1031322606191894484L; + public static final String TYPE = "astype"; + protected transient final int numBytes; protected transient final int numEncodedBytes; @@ -22,7 +24,6 @@ public class AsTypeCodec implements Codec { protected final DataType type; protected final DataType encodedType; - protected final String name = "astype"; public AsTypeCodec( DataType type, DataType encodedType ) { @@ -37,9 +38,9 @@ public AsTypeCodec( DataType type, DataType encodedType ) } @Override - public String getName() { + public String getType() { - return name; + return TYPE; } @Override diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java index 65cff6a2..8c2b9e02 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java @@ -5,17 +5,28 @@ public class BytesCodec extends IdentityCodec { private static final long serialVersionUID = 3523505403978222360L; - public static final String ID = "bytes"; + public static final String TYPE = "bytes"; - protected final String name = ID; + private final String endian; - protected final String endian = "little"; + public BytesCodec() { - // TODO implement me + this("little"); + } + + public BytesCodec(final String endian) { + + this.endian = endian; + } @Override - public String getName() { + public String getType() { + + return TYPE; + } + + public String getEndian() { - return name; + return endian; } } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java index b3225b7e..862a255e 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java @@ -32,6 +32,6 @@ public interface Codec extends Serializable { */ public OutputStream encode(OutputStream out) throws IOException; - public String getName(); + public String getType(); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/ComposedCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/ComposedCodec.java index 10f8adb0..3b07ad2b 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/ComposedCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/ComposedCodec.java @@ -10,9 +10,10 @@ public class ComposedCodec implements Codec { private static final long serialVersionUID = 5068349140842235924L; - private final Codec[] filters; - protected String name = "composed"; + protected static final String TYPE = "composed"; + + private final Codec[] filters; public ComposedCodec(final Codec... filters) { @@ -20,9 +21,9 @@ public ComposedCodec(final Codec... filters) { } @Override - public String getName() { + public String getType() { - return name; + return TYPE; } @Override diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetCodec.java index 0538498d..eba6e12e 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetCodec.java @@ -12,14 +12,11 @@ public class FixedScaleOffsetCodec extends AsTypeCodec { private static final long serialVersionUID = 8024945290803548528L; - public static transient final String FIXED_SCALE_OFFSET_CODEC_ID = "fixedscaleoffset"; + public static transient final String TYPE = "fixedscaleoffset"; private final double scale; - private final double offset; - protected final String name = FIXED_SCALE_OFFSET_CODEC_ID; - private transient final ByteBuffer tmpEncoder; private transient final ByteBuffer tmpDecoder; @@ -82,20 +79,20 @@ public double getOffset() { return offset; } - public DataType getType() { + public DataType getDataType() { return super.type; } - public DataType getEncodedType() { + public DataType getEncodedDataType() { return encodedType; } @Override - public String getName() { + public String getType() { - return name; + return TYPE; } @Override diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java index 4383669a..6ee5c64c 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java @@ -8,13 +8,7 @@ public class IdentityCodec implements Codec { private static final long serialVersionUID = 8354269325800855621L; - protected final String name = "id"; - - @Override - public String getName() { - - return name; - } + protected static final String TYPE = "id"; @Override public InputStream decode(InputStream in) throws IOException { @@ -28,4 +22,10 @@ public OutputStream encode(OutputStream out) throws IOException { return out; } + @Override + public String getType() { + + return TYPE; + } + } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/Crc32cChecksumCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/Crc32cChecksumCodec.java index 0a16d435..0b85ed5a 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/Crc32cChecksumCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/Crc32cChecksumCodec.java @@ -7,21 +7,13 @@ public class Crc32cChecksumCodec extends ChecksumCodec { private static final long serialVersionUID = 7424151868725442500L; - public static transient final String CRC32C_CHECKSUM_CODEC_ID = "crc32c"; - - private final String name = CRC32C_CHECKSUM_CODEC_ID; + public static transient final String TYPE = "crc32c"; public Crc32cChecksumCodec() { super(new CRC32(), 4); } - @Override - public String getName() { - - return name; - } - @Override public long encodedSize(final long size) { @@ -42,4 +34,10 @@ public ByteBuffer getChecksumValue() { return buf; } + @Override + public String getType() { + + return TYPE; + } + } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java index ed4e7128..0ab5d276 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java @@ -5,7 +5,6 @@ import java.nio.channels.Channels; import java.nio.channels.FileChannel; -import org.janelia.saalfeldlab.n5.Compression; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DataType; import org.janelia.saalfeldlab.n5.DefaultBlockReader; @@ -82,7 +81,7 @@ private static ShardedDatasetAttributes buildTestAttributes() { new ShardingCodec( new ShardingConfiguration( new int[]{2, 2}, - new Codec[]{new Compression.CompressionCodec(new RawCompression()), new IdentityCodec()}, + new Codec[]{new RawCompression(), new IdentityCodec()}, new Codec[]{new Crc32cChecksumCodec()}, IndexLocation.END) ) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java index 9f9f978a..aa0f6684 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java @@ -6,6 +6,7 @@ import java.lang.reflect.Type; import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration.IndexLocation; import com.google.gson.JsonDeserializationContext; import com.google.gson.JsonDeserializer; @@ -19,17 +20,24 @@ public class ShardingCodec implements Codec { private static final long serialVersionUID = -5879797314954717810L; - public static final String ID = "sharding_indexed"; + public static final String TYPE = "sharding_indexed"; private final ShardingConfiguration configuration; - private final String name = ID; - public ShardingCodec(ShardingConfiguration configuration) { this.configuration = configuration; } + public ShardingCodec( + final int[] blockSize, + final Codec[] codecs, + final Codec[] indexCodecs, + final IndexLocation indexLocation) { + + this.configuration = new ShardingConfiguration(blockSize, codecs, indexCodecs, indexLocation); + } + public ShardingConfiguration getConfiguration() { return configuration; @@ -39,6 +47,7 @@ public ShardingConfiguration getConfiguration() { public InputStream decode(InputStream in) throws IOException { // TODO Auto-generated method stub + // This method actually makes no sense for a sharding codec return in; } @@ -46,15 +55,10 @@ public InputStream decode(InputStream in) throws IOException { public OutputStream encode(OutputStream out) throws IOException { // TODO Auto-generated method stub + // This method actually makes no sense for a sharding codec return out; } - @Override - public String getName() { - - return name; - } - public static boolean isShardingCodec(final Codec codec) { return codec instanceof ShardingCodec; @@ -68,7 +72,7 @@ public JsonElement serialize(ShardingCodec src, Type typeOfSrc, JsonSerializatio final JsonObject jsonObj = new JsonObject(); - jsonObj.addProperty("name", ShardingCodec.ID); + jsonObj.addProperty("name", ShardingCodec.TYPE); // context.serialize(typeOfSrc); return jsonObj; @@ -83,4 +87,10 @@ public ShardingCodec deserialize(JsonElement json, Type typeOfT, JsonDeserializa } + @Override + public String getType() { + + return TYPE; + } + } diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java index 31491634..9ea523b1 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java @@ -1,14 +1,12 @@ package org.janelia.saalfeldlab.n5.shard; -import com.fasterxml.jackson.databind.JsonDeserializer; -import com.fasterxml.jackson.databind.JsonSerializer; -import com.google.gson.GsonBuilder; -import com.google.gson.JsonDeserializationContext; -import com.google.gson.JsonElement; -import com.google.gson.JsonObject; -import com.google.gson.JsonParseException; -import com.google.gson.JsonSerializationContext; -import org.janelia.saalfeldlab.n5.Compression; +import java.lang.reflect.Type; +import java.net.MalformedURLException; +import java.nio.file.FileSystems; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; + import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DataType; import org.janelia.saalfeldlab.n5.FileSystemKeyValueAccess; @@ -23,12 +21,12 @@ import org.janelia.saalfeldlab.n5.universe.N5Factory; import org.junit.Test; -import java.lang.reflect.Type; -import java.net.MalformedURLException; -import java.nio.file.FileSystems; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.Arrays; +import com.google.gson.GsonBuilder; +import com.google.gson.JsonDeserializationContext; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonParseException; +import com.google.gson.JsonSerializationContext; public class ShardDemos { @@ -79,7 +77,7 @@ public void writeReadBlockTest() { new ShardingCodec( new ShardingConfiguration( new int[]{2, 2}, - new Codec[]{new Compression.CompressionCodec(new RawCompression()), new IdentityCodec()}, + new Codec[]{new RawCompression(), new IdentityCodec()}, new Codec[]{new Crc32cChecksumCodec()}, IndexLocation.END) ) @@ -144,13 +142,11 @@ public void nameConfigurationGsonTest() { final N5Factory factory = new N5Factory(); final GsonBuilder gson = new GsonBuilder(); - - gson.registerTypeHierarchyAdapter() factory.gsonBuilder(gson); final N5Reader n5 = factory.openReader("src/test/resources/shardExamples/test.zarr/mid_sharded"); final JsonObject zarrJson = n5.getAttribute("/", "/", JsonObject.class); - zarrJson.remove("shard") + zarrJson.remove("shard"); } } From 553c85e3dfe2e3a662a48d30e97f2830ef777c96 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Fri, 9 Aug 2024 15:25:19 -0400 Subject: [PATCH 027/124] wip: BytesCodec update --- .../saalfeldlab/n5/codec/BytesCodec.java | 73 +++++++++++++++++-- 1 file changed, 65 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java index 8c2b9e02..a66657b6 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java @@ -1,22 +1,56 @@ package org.janelia.saalfeldlab.n5.codec; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteOrder; -public class BytesCodec extends IdentityCodec { +import com.google.gson.JsonDeserializationContext; +import com.google.gson.JsonDeserializer; +import com.google.gson.JsonElement; +import com.google.gson.JsonParseException; +import com.google.gson.JsonPrimitive; +import com.google.gson.JsonSerializationContext; +import com.google.gson.JsonSerializer; + +public class BytesCodec implements Codec { private static final long serialVersionUID = 3523505403978222360L; - public static final String TYPE = "bytes"; + public static String TYPE = "bytes"; + + protected final ByteOrder byteOrder; - private final String endian; + protected transient final byte[] array; public BytesCodec() { - this("little"); + this(ByteOrder.LITTLE_ENDIAN); + } + + public BytesCodec(final ByteOrder byteOrder) { + + this(byteOrder, 256); } - public BytesCodec(final String endian) { + public BytesCodec(final ByteOrder byteOrder, final int N) { - this.endian = endian; + this.byteOrder = byteOrder; + this.array = new byte[N]; + } + + @Override + public InputStream decode(InputStream in) throws IOException { + + // TODO not applicable for array -> bytes + return in; + } + + @Override + public OutputStream encode(OutputStream out) throws IOException { + + // TODO not applicable for array -> bytes + return out; } @Override @@ -25,8 +59,31 @@ public String getType() { return TYPE; } - public String getEndian() { + public static final ByteOrderAdapter byteOrderAdapter = new ByteOrderAdapter(); + + public static class ByteOrderAdapter implements JsonDeserializer, JsonSerializer { + + @Override + public JsonElement serialize(ByteOrder src, java.lang.reflect.Type typeOfSrc, + JsonSerializationContext context) { + + if (src.equals(ByteOrder.LITTLE_ENDIAN)) + return new JsonPrimitive("little"); + else + return new JsonPrimitive("big"); + } + + @Override + public ByteOrder deserialize(JsonElement json, java.lang.reflect.Type typeOfT, + JsonDeserializationContext context) throws JsonParseException { + + if (json.getAsString().equals("little")) + return ByteOrder.LITTLE_ENDIAN; + if (json.getAsString().equals("big")) + return ByteOrder.BIG_ENDIAN; + + return null; + } - return endian; } } From 35a4d354ee54d1c22849f92c49fad50269b069e9 Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Fri, 9 Aug 2024 15:21:45 -0400 Subject: [PATCH 028/124] test: remove outdated wip config parsing --- .../saalfeldlab/n5/shard/ShardDemos.java | 69 +------------------ 1 file changed, 1 insertion(+), 68 deletions(-) diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java index 9ea523b1..d9a1a604 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java @@ -1,6 +1,5 @@ package org.janelia.saalfeldlab.n5.shard; -import java.lang.reflect.Type; import java.net.MalformedURLException; import java.nio.file.FileSystems; import java.nio.file.Path; @@ -10,7 +9,6 @@ import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DataType; import org.janelia.saalfeldlab.n5.FileSystemKeyValueAccess; -import org.janelia.saalfeldlab.n5.N5Reader; import org.janelia.saalfeldlab.n5.N5Writer; import org.janelia.saalfeldlab.n5.RawCompression; import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; @@ -21,12 +19,6 @@ import org.janelia.saalfeldlab.n5.universe.N5Factory; import org.junit.Test; -import com.google.gson.GsonBuilder; -import com.google.gson.JsonDeserializationContext; -import com.google.gson.JsonElement; -import com.google.gson.JsonObject; -import com.google.gson.JsonParseException; -import com.google.gson.JsonSerializationContext; public class ShardDemos { @@ -77,7 +69,7 @@ public void writeReadBlockTest() { new ShardingCodec( new ShardingConfiguration( new int[]{2, 2}, - new Codec[]{new RawCompression(), new IdentityCodec()}, + new Codec[]{new Compression.CompressionCodec(new RawCompression()), new IdentityCodec()}, new Codec[]{new Crc32cChecksumCodec()}, IndexLocation.END) ) @@ -90,63 +82,4 @@ public void writeReadBlockTest() { writer.readBlock("shard", datasetAttributes, 0,0); } - private static class ZarrConfig { - final String name; - final T configuration; - - private ZarrConfig() { - name = ""; - configuration = null; - } - } - - private class GridConfig extends ZarrConfig {} - private class KeyEncodingConfig extends ZarrConfig {} - - private class ZarrChunk {} - - private class ZarrChunkAdapter implements com.google.gson.JsonSerializer, com.google.gson.JsonDeserializer { - final ZarrConfig grid; - final ZarrConfig keyEncoding; - - public ZarrChunkAdapter() { - grid = null; - keyEncoding = null; - } - public ZarrChunkAdapter(ZarrConfig grid, ZarrConfig key_encoding) { - - this.grid = grid; - this.keyEncoding = key_encoding; - } - - @Override public ZarrChunk deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) throws JsonParseException { - - if (!json.isJsonObject()) return null; - - final JsonObject obj = json.getAsJsonObject(); - final JsonObject grid = obj.getAsJsonObject("chunk_grid"); - - return null; - } - - @Override public JsonElement serialize(ZarrChunk src, Type typeOfSrc, JsonSerializationContext context) { - - return null; - } - } - - - @Test - public void nameConfigurationGsonTest() { - - final N5Factory factory = new N5Factory(); - final GsonBuilder gson = new GsonBuilder(); - - factory.gsonBuilder(gson); - final N5Reader n5 = factory.openReader("src/test/resources/shardExamples/test.zarr/mid_sharded"); - - final JsonObject zarrJson = n5.getAttribute("/", "/", JsonObject.class); - zarrJson.remove("shard"); - } - } From d636b83133476e037bd9b466d4f6a0cc41196cc6 Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Mon, 12 Aug 2024 10:24:33 -0400 Subject: [PATCH 029/124] feat: annotations for extensible serialization for codecs --- .../org/janelia/saalfeldlab/n5/GsonUtils.java | 5 +- .../saalfeldlab/n5/NameConfigAdapter.java | 233 ++++++++++++++++++ .../saalfeldlab/n5/codec/BytesCodec.java | 3 + .../janelia/saalfeldlab/n5/codec/Codec.java | 3 + .../saalfeldlab/n5/codec/IdentityCodec.java | 3 + .../n5/serialization/JsonArrayUtils.java | 20 ++ .../n5/serialization/N5Annotations.java | 18 ++ .../n5/serialization/NameConfig.java | 43 ++++ .../saalfeldlab/n5/codec/BytesTests.java | 53 ++++ .../saalfeldlab/n5/shard/ShardDemos.java | 2 +- 10 files changed, 379 insertions(+), 4 deletions(-) create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/NameConfigAdapter.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/serialization/JsonArrayUtils.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/serialization/N5Annotations.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/serialization/NameConfig.java create mode 100644 src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonUtils.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonUtils.java index 7423b57e..c09d1e14 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonUtils.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonUtils.java @@ -56,10 +56,9 @@ public interface GsonUtils { static Gson registerGson(final GsonBuilder gsonBuilder) { gsonBuilder.registerTypeAdapter(DataType.class, new DataType.JsonAdapter()); + gsonBuilder.registerTypeHierarchyAdapter(ShardingConfiguration.class, new ShardingConfiguration.ShardingConfigurationAdapter()); + gsonBuilder.registerTypeHierarchyAdapter(Codec.class, NameConfigAdapter.getJsonAdapter(Codec.class)); gsonBuilder.registerTypeHierarchyAdapter(Compression.class, CompressionAdapter.getJsonAdapter()); - gsonBuilder.registerTypeHierarchyAdapter(ShardingConfiguration.class, - new ShardingConfiguration.ShardingConfigurationAdapter()); - gsonBuilder.registerTypeHierarchyAdapter(Codec.class, new CodecAdapter()); gsonBuilder.disableHtmlEscaping(); return gsonBuilder.create(); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/NameConfigAdapter.java b/src/main/java/org/janelia/saalfeldlab/n5/NameConfigAdapter.java new file mode 100644 index 00000000..3384fb0d --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/NameConfigAdapter.java @@ -0,0 +1,233 @@ +/** + * Copyright (c) 2017, Stephan Saalfeld + * All rights reserved. + *

      + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + *

      + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + *

      + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +package org.janelia.saalfeldlab.n5; + +import com.google.gson.JsonArray; +import com.google.gson.JsonDeserializationContext; +import com.google.gson.JsonDeserializer; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonParseException; +import com.google.gson.JsonSerializationContext; +import com.google.gson.JsonSerializer; +import org.janelia.saalfeldlab.n5.serialization.N5Annotations; +import org.janelia.saalfeldlab.n5.serialization.NameConfig; +import org.scijava.annotations.Index; +import org.scijava.annotations.IndexItem; + +import java.lang.reflect.Constructor; +import java.lang.reflect.Field; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Type; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map.Entry; + +/** + * T adapter, auto-discovers annotated T implementations in the classpath. + * + * @author Caleb Hulbert + */ +public class NameConfigAdapter implements JsonDeserializer, JsonSerializer { + + private static HashMap, NameConfigAdapter> adapters = new HashMap<>(); + + private static void registerAdapter(Class cls) { + + adapters.put(cls, new NameConfigAdapter(cls)); + update(adapters.get(cls)); + } + private final HashMap> constructors = new HashMap<>(); + + private final HashMap> parameters = new HashMap<>(); + private final HashMap> parameterNames = new HashMap<>(); + private static ArrayList getDeclaredFields(Class clazz) { + + final ArrayList fields = new ArrayList<>(); + fields.addAll(Arrays.asList(clazz.getDeclaredFields())); + for (clazz = clazz.getSuperclass(); clazz != null; clazz = clazz.getSuperclass()) + fields.addAll(Arrays.asList(clazz.getDeclaredFields())); + return fields; + } + + @SuppressWarnings("unchecked") + public static synchronized void update(final NameConfigAdapter adapter) { + + final ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); + final Index annotationIndex = Index.load(NameConfig.Name.class, classLoader); + for (final IndexItem item : annotationIndex) { + Class clazz; + try { + clazz = (Class)Class.forName(item.className()); + final String name = clazz.getAnnotation(NameConfig.Name.class).value(); + final String prefix = adapter.type.getAnnotation(NameConfig.Prefix.class).value(); + final String type = prefix + "." + name; + + final Constructor constructor = clazz.getDeclaredConstructor(); + + final HashMap parameters = new HashMap<>(); + final HashMap parameterNames = new HashMap<>(); + final ArrayList fields = getDeclaredFields(clazz); + for (final Field field : fields) { + final NameConfig.Parameter parameter = field.getAnnotation(NameConfig.Parameter.class); + if (parameter != null) { + + final String parameterName; + if (parameter.value().equals("")) + parameterName = field.getName(); + else + parameterName = parameter.value(); + + parameterNames.put(field.getName(), parameterName); + + parameters.put(field.getName(), field); + } + } + + adapter.constructors.put(type, constructor); + adapter.parameters.put(type, parameters); + adapter.parameterNames.put(type, parameterNames); + } catch (final ClassNotFoundException | NoSuchMethodException | ClassCastException + | UnsatisfiedLinkError e) { + System.err.println("T '" + item.className() + "' could not be registered"); + } + } + } + + private final Class type; + + public NameConfigAdapter(Class cls) { + this.type = cls; + } + + @Override + public JsonElement serialize( + final T object, + final Type typeOfSrc, + final JsonSerializationContext context) { + + final Class clazz = (Class)object.getClass(); + final String name = clazz.getAnnotation(NameConfig.Name.class).value(); + final String prefix = type.getAnnotation(NameConfig.Prefix.class).value(); + final String type = prefix + "." + name; + + final JsonObject json = new JsonObject(); + json.addProperty("name", name); + final JsonObject configuration = new JsonObject(); + json.add("configuration", configuration); + + final HashMap parameterTypes = parameters.get(type); + final HashMap parameterNameMap = parameterNames.get(type); + try { + for (final Entry parameterType : parameterTypes.entrySet()) { + final String fieldName = parameterType.getKey(); + final Field field = clazz.getDeclaredField(fieldName); + final boolean isAccessible = field.isAccessible(); + field.setAccessible(true); + final Object value = field.get(object); + field.setAccessible(isAccessible); + final JsonElement serialized = context.serialize(value); + if (field.getAnnotation(N5Annotations.ReverseArray.class) != null) { + final JsonArray reversedArray = reverseJsonArray(serialized.getAsJsonArray()); + configuration.add(parameterNameMap.get(fieldName), reversedArray); + } else + configuration.add(parameterNameMap.get(fieldName), serialized); + + } + } catch (NoSuchFieldException | SecurityException | IllegalArgumentException | IllegalAccessException e) { + e.printStackTrace(System.err); + return null; + } + + return json; + } + + @Override + public T deserialize( + final JsonElement json, + final Type typeOfT, + final JsonDeserializationContext context) throws JsonParseException { + + final String prefix = type.getAnnotation(NameConfig.Prefix.class).value(); + + final JsonObject chunkGridJson = json.getAsJsonObject(); + final String name = chunkGridJson.getAsJsonPrimitive("name").getAsString(); + if (name == null) + return null; + final JsonObject configuration = chunkGridJson.getAsJsonObject("configuration"); + if (configuration == null) + return null; + + final String type = prefix + "." + name; + + final Constructor constructor = constructors.get(type); + constructor.setAccessible(true); + final T chunkGrid; + try { + chunkGrid = constructor.newInstance(); + final HashMap parameterTypes = parameters.get(type); + final HashMap parameterNameMap = parameterNames.get(type); + for (final Entry parameterType : parameterTypes.entrySet()) { + final String fieldName = parameterType.getKey(); + final String paramName = parameterNameMap.get(fieldName); + final JsonElement paramJson = configuration.get(paramName); + if (paramJson != null) { + final Field field = parameterType.getValue(); + final Object parameter; + if (field.getAnnotation(N5Annotations.ReverseArray.class) != null) { + final JsonArray reversedArray = reverseJsonArray(paramJson); + parameter = context.deserialize(reversedArray, field.getType()); + } else + parameter = context.deserialize(paramJson, field.getType()); + ReflectionUtils.setFieldValue(chunkGrid, fieldName, parameter); + } + } + } catch (InstantiationException | IllegalAccessException | IllegalArgumentException | InvocationTargetException + | SecurityException | NoSuchFieldException e) { + e.printStackTrace(System.err); + return null; + } + + return chunkGrid; + } + + private static JsonArray reverseJsonArray(JsonElement paramJson) { + + final JsonArray reversedJson = new JsonArray(paramJson.getAsJsonArray().size()); + for (int i = paramJson.getAsJsonArray().size() - 1; i >= 0; i--) { + reversedJson.add(paramJson.getAsJsonArray().get(i)); + } + return reversedJson; + } + + public static NameConfigAdapter getJsonAdapter(Class cls) { + + if (adapters.get(cls) == null) + registerAdapter(cls); + return (NameConfigAdapter) adapters.get(cls); + } +} \ No newline at end of file diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java index a66657b6..5f0bb4ab 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java @@ -12,13 +12,16 @@ import com.google.gson.JsonPrimitive; import com.google.gson.JsonSerializationContext; import com.google.gson.JsonSerializer; +import org.janelia.saalfeldlab.n5.serialization.NameConfig; +@NameConfig.Name("bytes") public class BytesCodec implements Codec { private static final long serialVersionUID = 3523505403978222360L; public static String TYPE = "bytes"; + @NameConfig.Parameter("endian") protected final ByteOrder byteOrder; protected transient final byte[] array; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java index 862a255e..2c76438c 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java @@ -1,5 +1,7 @@ package org.janelia.saalfeldlab.n5.codec; +import org.janelia.saalfeldlab.n5.serialization.NameConfig; + import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -12,6 +14,7 @@ * Modeled after Filters in * Zarr. */ +@NameConfig.Prefix("codec") public interface Codec extends Serializable { /** diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java index 6ee5c64c..79c532b1 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java @@ -1,9 +1,12 @@ package org.janelia.saalfeldlab.n5.codec; +import org.janelia.saalfeldlab.n5.serialization.NameConfig; + import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +@NameConfig.Name(IdentityCodec.TYPE) public class IdentityCodec implements Codec { private static final long serialVersionUID = 8354269325800855621L; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/serialization/JsonArrayUtils.java b/src/main/java/org/janelia/saalfeldlab/n5/serialization/JsonArrayUtils.java new file mode 100644 index 00000000..b65fbb6c --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/serialization/JsonArrayUtils.java @@ -0,0 +1,20 @@ +package org.janelia.saalfeldlab.n5.serialization; + +import com.google.gson.JsonArray; +import com.google.gson.JsonElement; + +public class JsonArrayUtils { + + public static void reverse(final JsonArray array) { + + JsonElement a; + final int max = array.size() - 1; + for (int i = (max - 1) / 2; i >= 0; --i) { + final int j = max - i; + a = array.get(i); + array.set(i, array.get(j)); + array.set(j, a); + } + } + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/serialization/N5Annotations.java b/src/main/java/org/janelia/saalfeldlab/n5/serialization/N5Annotations.java new file mode 100644 index 00000000..500f139f --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/serialization/N5Annotations.java @@ -0,0 +1,18 @@ +package org.janelia.saalfeldlab.n5.serialization; + +import java.io.Serializable; +import java.lang.annotation.ElementType; +import java.lang.annotation.Inherited; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +public interface N5Annotations extends Serializable { + + @Inherited + @Retention(RetentionPolicy.RUNTIME) + @Target(ElementType.FIELD) + @interface ReverseArray { + } +} + diff --git a/src/main/java/org/janelia/saalfeldlab/n5/serialization/NameConfig.java b/src/main/java/org/janelia/saalfeldlab/n5/serialization/NameConfig.java new file mode 100644 index 00000000..22965382 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/serialization/NameConfig.java @@ -0,0 +1,43 @@ +package org.janelia.saalfeldlab.n5.serialization; + +import org.scijava.annotations.Indexable; + +import java.io.Serializable; +import java.lang.annotation.ElementType; +import java.lang.annotation.Inherited; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +public interface NameConfig extends Serializable { + + @Retention(RetentionPolicy.RUNTIME) + @Inherited + @Target(ElementType.TYPE) + @interface Prefix { + String value(); + } + + @Retention(RetentionPolicy.RUNTIME) + @Inherited + @Target(ElementType.TYPE) + @Indexable + @interface Name { + String value(); + } + + @Retention(RetentionPolicy.RUNTIME) + @Inherited + @Target(ElementType.FIELD) + @interface Parameter { + String value() default ""; + } + + default String getType() { + + final Name type = getClass().getAnnotation(Name.class); + return type == null ? null : type.value(); + + } +} + diff --git a/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java b/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java new file mode 100644 index 00000000..8eeb8345 --- /dev/null +++ b/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java @@ -0,0 +1,53 @@ +package org.janelia.saalfeldlab.n5.codec; + +import com.google.gson.GsonBuilder; +import org.janelia.saalfeldlab.n5.DataType; +import org.janelia.saalfeldlab.n5.DatasetAttributes; +import org.janelia.saalfeldlab.n5.N5Writer; +import org.janelia.saalfeldlab.n5.NameConfigAdapter; +import org.janelia.saalfeldlab.n5.RawCompression; +import org.janelia.saalfeldlab.n5.universe.N5Factory; +import org.junit.Test; + +import java.nio.ByteOrder; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class BytesTests { + + @Test + public void testSerialization() { + + final N5Factory factory = new N5Factory(); + factory.cacheAttributes(false); + final GsonBuilder gsonBuilder = new GsonBuilder(); + gsonBuilder.registerTypeHierarchyAdapter(Codec.class, NameConfigAdapter.getJsonAdapter(Codec.class)); + gsonBuilder.registerTypeAdapter(ByteOrder.class, BytesCodec.byteOrderAdapter); + factory.gsonBuilder(gsonBuilder); + + final N5Writer reader = factory.openWriter("n5:src/test/resources/shardExamples/test.zarr"); + final Codec bytes = reader.getAttribute("mid_sharded", "codecs[0]/configuration/codecs[0]", Codec.class); + assertTrue("as BytesCodec", bytes instanceof BytesCodec); + + final N5Writer writer = factory.openWriter("n5:src/test/resources/shardExamples/test.n5"); + + final DatasetAttributes datasetAttributes = new DatasetAttributes( + new long[]{8, 8}, + new int[]{4, 4}, + DataType.UINT8, + new RawCompression(), + new Codec[]{ + new IdentityCodec(), + new BytesCodec(ByteOrder.LITTLE_ENDIAN) + } + ); + writer.setAttribute("shard", "/", datasetAttributes); + final DatasetAttributes deserialized = writer.getAttribute("shard", "/", DatasetAttributes.class); + + assertEquals("2 codecs", 2, deserialized.getCodecs().length); + assertTrue("Identity", deserialized.getCodecs()[0] instanceof IdentityCodec); + assertTrue("Bytes", deserialized.getCodecs()[1] instanceof BytesCodec); + assertEquals("LittleEndian",ByteOrder.LITTLE_ENDIAN, ((BytesCodec)deserialized.getCodecs()[1]).byteOrder); + } +} diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java index d9a1a604..32574d4b 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java @@ -69,7 +69,7 @@ public void writeReadBlockTest() { new ShardingCodec( new ShardingConfiguration( new int[]{2, 2}, - new Codec[]{new Compression.CompressionCodec(new RawCompression()), new IdentityCodec()}, + new Codec[]{new RawCompression(), new IdentityCodec()}, new Codec[]{new Crc32cChecksumCodec()}, IndexLocation.END) ) From 1d9c0c2414afb309ae1def918b36c761d136b61d Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Mon, 12 Aug 2024 11:37:49 -0400 Subject: [PATCH 030/124] refactor: rename former 'chunkGrid' variables --- .../janelia/saalfeldlab/n5/NameConfigAdapter.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/NameConfigAdapter.java b/src/main/java/org/janelia/saalfeldlab/n5/NameConfigAdapter.java index 3384fb0d..85768d18 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/NameConfigAdapter.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/NameConfigAdapter.java @@ -174,11 +174,11 @@ public T deserialize( final String prefix = type.getAnnotation(NameConfig.Prefix.class).value(); - final JsonObject chunkGridJson = json.getAsJsonObject(); - final String name = chunkGridJson.getAsJsonPrimitive("name").getAsString(); + final JsonObject objectJson = json.getAsJsonObject(); + final String name = objectJson.getAsJsonPrimitive("name").getAsString(); if (name == null) return null; - final JsonObject configuration = chunkGridJson.getAsJsonObject("configuration"); + final JsonObject configuration = objectJson.getAsJsonObject("configuration"); if (configuration == null) return null; @@ -186,9 +186,9 @@ public T deserialize( final Constructor constructor = constructors.get(type); constructor.setAccessible(true); - final T chunkGrid; + final T object; try { - chunkGrid = constructor.newInstance(); + object = constructor.newInstance(); final HashMap parameterTypes = parameters.get(type); final HashMap parameterNameMap = parameterNames.get(type); for (final Entry parameterType : parameterTypes.entrySet()) { @@ -203,7 +203,7 @@ public T deserialize( parameter = context.deserialize(reversedArray, field.getType()); } else parameter = context.deserialize(paramJson, field.getType()); - ReflectionUtils.setFieldValue(chunkGrid, fieldName, parameter); + ReflectionUtils.setFieldValue(object, fieldName, parameter); } } } catch (InstantiationException | IllegalAccessException | IllegalArgumentException | InvocationTargetException @@ -212,7 +212,7 @@ public T deserialize( return null; } - return chunkGrid; + return object; } private static JsonArray reverseJsonArray(JsonElement paramJson) { From c7fb316082ab1ee3bc794ba8bb700b03fec469c0 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Mon, 12 Aug 2024 13:18:34 -0400 Subject: [PATCH 031/124] test: start CodecSerialization test * make AsType, FixedScaleOffset, Identity codecs serializable --- .../saalfeldlab/n5/codec/AsTypeCodec.java | 53 ++++++++++---- .../n5/codec/FixedScaleOffsetCodec.java | 40 +++++------ .../saalfeldlab/n5/codec/IdentityCodec.java | 7 +- .../n5/serialization/CodecSerialization.java | 70 +++++++++++++++++++ 4 files changed, 133 insertions(+), 37 deletions(-) create mode 100644 src/test/java/org/janelia/saalfeldlab/n5/serialization/CodecSerialization.java diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java index 9e4ce008..3a437ffa 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java @@ -7,34 +7,37 @@ import java.util.function.BiConsumer; import org.janelia.saalfeldlab.n5.DataType; +import org.janelia.saalfeldlab.n5.serialization.NameConfig; - +@NameConfig.Name(AsTypeCodec.TYPE) +@NameConfig.Prefix("codec") public class AsTypeCodec implements Codec { private static final long serialVersionUID = 1031322606191894484L; public static final String TYPE = "astype"; - protected transient final int numBytes; - protected transient final int numEncodedBytes; + protected transient int numBytes; + protected transient int numEncodedBytes; + + protected transient BiConsumer encoder; + protected transient BiConsumer decoder; - protected transient final BiConsumer encoder; - protected transient final BiConsumer decoder; + @NameConfig.Parameter + protected final DataType dataType; - protected final DataType type; + @NameConfig.Parameter protected final DataType encodedType; + private AsTypeCodec() { - public AsTypeCodec( DataType type, DataType encodedType ) - { - this.type = type; - this.encodedType = encodedType; + this(null, null); + } - numBytes = bytes(type); - numEncodedBytes = bytes(encodedType); + public AsTypeCodec(DataType dataType, DataType encodedType) { - encoder = converter(type, encodedType); - decoder = converter(encodedType, type); + this.dataType = dataType; + this.encodedType = encodedType; } @Override @@ -43,15 +46,37 @@ public String getType() { return TYPE; } + public DataType getDataType() { + + return dataType; + } + + public DataType getEncodedDataType() { + + return encodedType; + } + @Override public InputStream decode(InputStream in) throws IOException { + numBytes = bytes(dataType); + numEncodedBytes = bytes(encodedType); + + encoder = converter(dataType, encodedType); + decoder = converter(encodedType, dataType); + return new FixedLengthConvertedInputStream(numEncodedBytes, numBytes, decoder, in); } @Override public OutputStream encode(OutputStream out) throws IOException { + numBytes = bytes(dataType); + numEncodedBytes = bytes(encodedType); + + encoder = converter(dataType, encodedType); + decoder = converter(encodedType, dataType); + return new FixedLengthConvertedOutputStream(numBytes, numEncodedBytes, encoder, out); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetCodec.java index eba6e12e..55613b57 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetCodec.java @@ -7,25 +7,35 @@ import java.util.function.BiConsumer; import org.janelia.saalfeldlab.n5.DataType; +import org.janelia.saalfeldlab.n5.serialization.NameConfig; +@NameConfig.Name(FixedScaleOffsetCodec.TYPE) public class FixedScaleOffsetCodec extends AsTypeCodec { private static final long serialVersionUID = 8024945290803548528L; public static transient final String TYPE = "fixedscaleoffset"; - private final double scale; - private final double offset; + @NameConfig.Parameter + protected final double scale; - private transient final ByteBuffer tmpEncoder; - private transient final ByteBuffer tmpDecoder; + @NameConfig.Parameter + protected final double offset; - public transient final BiConsumer encoder; - public transient final BiConsumer encoderPre; - public transient final BiConsumer encoderPost; - public transient final BiConsumer decoder; - public transient final BiConsumer decoderPre; - public transient final BiConsumer decoderPost; + private transient ByteBuffer tmpEncoder; + private transient ByteBuffer tmpDecoder; + + public transient BiConsumer encoder; + public transient BiConsumer encoderPre; + public transient BiConsumer encoderPost; + public transient BiConsumer decoder; + public transient BiConsumer decoderPre; + public transient BiConsumer decoderPost; + + private FixedScaleOffsetCodec() { + + this(1, 0, null, null); + } public FixedScaleOffsetCodec(final double scale, final double offset, DataType type, DataType encodedType) { @@ -79,16 +89,6 @@ public double getOffset() { return offset; } - public DataType getDataType() { - - return super.type; - } - - public DataType getEncodedDataType() { - - return encodedType; - } - @Override public String getType() { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java index 79c532b1..41fd10f1 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java @@ -1,17 +1,18 @@ package org.janelia.saalfeldlab.n5.codec; -import org.janelia.saalfeldlab.n5.serialization.NameConfig; - import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import org.janelia.saalfeldlab.n5.serialization.NameConfig; + @NameConfig.Name(IdentityCodec.TYPE) +@NameConfig.Prefix("codec") public class IdentityCodec implements Codec { private static final long serialVersionUID = 8354269325800855621L; - protected static final String TYPE = "id"; + public static final String TYPE = "id"; @Override public InputStream decode(InputStream in) throws IOException { diff --git a/src/test/java/org/janelia/saalfeldlab/n5/serialization/CodecSerialization.java b/src/test/java/org/janelia/saalfeldlab/n5/serialization/CodecSerialization.java new file mode 100644 index 00000000..148618ee --- /dev/null +++ b/src/test/java/org/janelia/saalfeldlab/n5/serialization/CodecSerialization.java @@ -0,0 +1,70 @@ +package org.janelia.saalfeldlab.n5.serialization; + +import static org.junit.Assert.assertEquals; + +import org.janelia.saalfeldlab.n5.DataType; +import org.janelia.saalfeldlab.n5.NameConfigAdapter; +import org.janelia.saalfeldlab.n5.codec.AsTypeCodec; +import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.codec.FixedScaleOffsetCodec; +import org.janelia.saalfeldlab.n5.codec.IdentityCodec; +import org.junit.Before; +import org.junit.Test; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.gson.JsonArray; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; + +public class CodecSerialization { + + private Gson gson; + + @Before + public void before() { + + final GsonBuilder builder = new GsonBuilder(); + builder.registerTypeAdapter(IdentityCodec.class, NameConfigAdapter.getJsonAdapter(IdentityCodec.class)); + builder.registerTypeAdapter(AsTypeCodec.class, NameConfigAdapter.getJsonAdapter(AsTypeCodec.class)); + builder.registerTypeAdapter(FixedScaleOffsetCodec.class, + NameConfigAdapter.getJsonAdapter(FixedScaleOffsetCodec.class)); + gson = builder.create(); + } + + @Test + public void testSerializeIdentity() { + + final IdentityCodec id = new IdentityCodec(); + final JsonObject jsonId = gson.toJsonTree(id).getAsJsonObject(); + final JsonElement expected = gson.fromJson("{\"name\":\"id\", \"configuration\":{}}", JsonElement.class); + assertEquals("identity", expected, jsonId.getAsJsonObject()); + } + + @Test + public void testSerializeAsType() { + + final AsTypeCodec asTypeCodec = new AsTypeCodec(DataType.FLOAT64, DataType.INT16); + final JsonObject jsonAsType = gson.toJsonTree(asTypeCodec).getAsJsonObject(); + final JsonElement expected = gson.fromJson( + "{\"name\":\"astype\",\"configuration\":{\"dataType\":\"FLOAT64\",\"encodedType\":\"INT16\"}}", + JsonElement.class); + assertEquals("asType", expected, jsonAsType.getAsJsonObject()); + } + + @Test + public void testSerializeCodecArray() { + + final Codec[] codecs = new Codec[]{ + new IdentityCodec(), + new AsTypeCodec(DataType.FLOAT64, DataType.INT16) + }; + final JsonArray jsonCodecArray = gson.toJsonTree(codecs).getAsJsonArray(); + final JsonElement expected = gson.fromJson( + "[{\"name\":\"id\",\"configuration\":{}},{\"name\":\"astype\",\"configuration\":{\"dataType\":\"FLOAT64\",\"encodedType\":\"INT16\"}}]", + JsonElement.class); + + assertEquals("codec array", expected, jsonCodecArray.getAsJsonArray()); + } + +} From 344aa462d0855019a5475eddcf46a288112d9c0a Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Mon, 12 Aug 2024 13:39:59 -0400 Subject: [PATCH 032/124] test: codec array with a compressor --- .../n5/serialization/CodecSerialization.java | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/test/java/org/janelia/saalfeldlab/n5/serialization/CodecSerialization.java b/src/test/java/org/janelia/saalfeldlab/n5/serialization/CodecSerialization.java index 148618ee..c5d09a1d 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/serialization/CodecSerialization.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/serialization/CodecSerialization.java @@ -3,6 +3,7 @@ import static org.junit.Assert.assertEquals; import org.janelia.saalfeldlab.n5.DataType; +import org.janelia.saalfeldlab.n5.GzipCompression; import org.janelia.saalfeldlab.n5.NameConfigAdapter; import org.janelia.saalfeldlab.n5.codec.AsTypeCodec; import org.janelia.saalfeldlab.n5.codec.Codec; @@ -55,15 +56,25 @@ public void testSerializeAsType() { @Test public void testSerializeCodecArray() { - final Codec[] codecs = new Codec[]{ + Codec[] codecs = new Codec[]{ new IdentityCodec(), new AsTypeCodec(DataType.FLOAT64, DataType.INT16) }; - final JsonArray jsonCodecArray = gson.toJsonTree(codecs).getAsJsonArray(); - final JsonElement expected = gson.fromJson( + JsonArray jsonCodecArray = gson.toJsonTree(codecs).getAsJsonArray(); + JsonElement expected = gson.fromJson( "[{\"name\":\"id\",\"configuration\":{}},{\"name\":\"astype\",\"configuration\":{\"dataType\":\"FLOAT64\",\"encodedType\":\"INT16\"}}]", JsonElement.class); + assertEquals("codec array", expected, jsonCodecArray.getAsJsonArray()); + + codecs = new Codec[]{ + new AsTypeCodec(DataType.FLOAT64, DataType.INT16), + new GzipCompression() + }; + jsonCodecArray = gson.toJsonTree(codecs).getAsJsonArray(); + expected = gson.fromJson( + "[{\"name\":\"astype\",\"configuration\":{\"dataType\":\"FLOAT64\",\"encodedType\":\"INT16\"}},{\"level\":-1,\"useZlib\":false}]", + JsonElement.class); assertEquals("codec array", expected, jsonCodecArray.getAsJsonArray()); } From 6b1e9a188d71e87abd7a925e4fe358bd7d2717f0 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Mon, 12 Aug 2024 16:38:52 -0400 Subject: [PATCH 033/124] test: deserialization behavior --- .../n5/serialization/CodecSerialization.java | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/test/java/org/janelia/saalfeldlab/n5/serialization/CodecSerialization.java b/src/test/java/org/janelia/saalfeldlab/n5/serialization/CodecSerialization.java index c5d09a1d..1fc6031c 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/serialization/CodecSerialization.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/serialization/CodecSerialization.java @@ -25,12 +25,15 @@ public class CodecSerialization { @Before public void before() { - final GsonBuilder builder = new GsonBuilder(); - builder.registerTypeAdapter(IdentityCodec.class, NameConfigAdapter.getJsonAdapter(IdentityCodec.class)); - builder.registerTypeAdapter(AsTypeCodec.class, NameConfigAdapter.getJsonAdapter(AsTypeCodec.class)); - builder.registerTypeAdapter(FixedScaleOffsetCodec.class, + final GsonBuilder gsonBuilder = new GsonBuilder(); + gsonBuilder.registerTypeAdapter(IdentityCodec.class, NameConfigAdapter.getJsonAdapter(IdentityCodec.class)); + gsonBuilder.registerTypeAdapter(AsTypeCodec.class, NameConfigAdapter.getJsonAdapter(AsTypeCodec.class)); + gsonBuilder.registerTypeAdapter(FixedScaleOffsetCodec.class, NameConfigAdapter.getJsonAdapter(FixedScaleOffsetCodec.class)); - gson = builder.create(); + // gsonBuilder.registerTypeAdapter(Codec.class, + // NameConfigAdapter.getJsonAdapter(Codec.class)); + + gson = gsonBuilder.create(); } @Test @@ -66,6 +69,8 @@ public void testSerializeCodecArray() { JsonElement.class); assertEquals("codec array", expected, jsonCodecArray.getAsJsonArray()); + // final Codec[] codecsDeserialized = gson.fromJson(expected, Codec[].class); + // System.out.println(Arrays.toString(codecsDeserialized)); codecs = new Codec[]{ new AsTypeCodec(DataType.FLOAT64, DataType.INT16), From 664007b7cb5e9da951aa58901bd6a06a73a63fa5 Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Wed, 14 Aug 2024 12:15:40 -0400 Subject: [PATCH 034/124] feat: more shard and codec work --- .../janelia/saalfeldlab/n5/CodecAdapter.java | 121 ------------------ .../saalfeldlab/n5/DatasetAttributes.java | 68 ++++++++++ .../saalfeldlab/n5/DefaultBlockReader.java | 4 +- .../n5/FileSystemKeyValueAccess.java | 4 +- .../org/janelia/saalfeldlab/n5/GsonUtils.java | 10 +- .../saalfeldlab/n5/GzipCompression.java | 6 + .../saalfeldlab/n5/KeyValueAccess.java | 2 +- .../org/janelia/saalfeldlab/n5/N5Writer.java | 24 +++- .../saalfeldlab/n5/NameConfigAdapter.java | 25 +++- .../saalfeldlab/n5/RawCompression.java | 1 + .../n5/ShardedDatasetAttributes.java | 15 +-- .../saalfeldlab/n5/codec/BytesCodec.java | 15 +-- .../codec/checksum/Crc32cChecksumCodec.java | 5 +- .../n5/serialization/NameConfig.java | 1 + .../saalfeldlab/n5/shard/AbstractShard.java | 2 +- .../saalfeldlab/n5/shard/ShardIndex.java | 8 +- .../saalfeldlab/n5/shard/ShardReader.java | 21 ++- .../saalfeldlab/n5/shard/ShardWriter.java | 4 +- .../saalfeldlab/n5/shard/ShardingCodec.java | 90 ++++++++----- .../n5/shard/ShardingConfiguration.java | 92 ------------- .../saalfeldlab/n5/shard/VirtualShard.java | 68 +++++++++- .../n5/serialization/CodecSerialization.java | 13 +- .../saalfeldlab/n5/shard/ShardDemos.java | 42 +++--- 23 files changed, 308 insertions(+), 333 deletions(-) delete mode 100644 src/main/java/org/janelia/saalfeldlab/n5/CodecAdapter.java delete mode 100644 src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingConfiguration.java diff --git a/src/main/java/org/janelia/saalfeldlab/n5/CodecAdapter.java b/src/main/java/org/janelia/saalfeldlab/n5/CodecAdapter.java deleted file mode 100644 index 785380ad..00000000 --- a/src/main/java/org/janelia/saalfeldlab/n5/CodecAdapter.java +++ /dev/null @@ -1,121 +0,0 @@ -/** - * Copyright (c) 2017, Stephan Saalfeld - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ -package org.janelia.saalfeldlab.n5; - -import java.lang.reflect.Type; - -import org.janelia.saalfeldlab.n5.codec.BytesCodec; -import org.janelia.saalfeldlab.n5.codec.Codec; -import org.janelia.saalfeldlab.n5.codec.FixedScaleOffsetCodec; -import org.janelia.saalfeldlab.n5.shard.ShardingCodec; -import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration; - -import com.google.gson.JsonDeserializationContext; -import com.google.gson.JsonDeserializer; -import com.google.gson.JsonElement; -import com.google.gson.JsonNull; -import com.google.gson.JsonObject; -import com.google.gson.JsonParseException; -import com.google.gson.JsonSerializationContext; -import com.google.gson.JsonSerializer; - -public class CodecAdapter implements JsonDeserializer, JsonSerializer { - - @Override - public JsonElement serialize( - final Codec codec, - final Type typeOfSrc, - final JsonSerializationContext context) { - - if (codec.getType().equals(FixedScaleOffsetCodec.TYPE)) { - final FixedScaleOffsetCodec c = (FixedScaleOffsetCodec)codec; - final JsonObject obj = new JsonObject(); - obj.addProperty("name", c.getType()); - obj.addProperty("scale", c.getScale()); - obj.addProperty("offset", c.getOffset()); - obj.addProperty("type", c.getType().toString().toLowerCase()); - obj.addProperty("encodedType", c.getEncodedDataType().toString().toLowerCase()); - return obj; - } - else if (codec.getType().equals(ShardingCodec.TYPE)) { - final ShardingCodec sharding = (ShardingCodec)codec; - final JsonObject obj = new JsonObject(); - obj.addProperty("name", sharding.getType()); - obj.add("configuration", context.serialize(sharding.getConfiguration())); - return obj; - } - else if (codec.getType().equals(BytesCodec.TYPE)) { - final BytesCodec bytes = (BytesCodec)codec; - final JsonObject obj = new JsonObject(); - obj.addProperty("type", bytes.getType()); - - final JsonObject config = new JsonObject(); - config.addProperty("endian", bytes.getType()); - obj.add("configuration", config); - - return obj; - } - - return JsonNull.INSTANCE; - } - - @Override - public Codec deserialize( - final JsonElement json, - final Type typeOfT, - final JsonDeserializationContext context) throws JsonParseException { - - if (json == null) - return null; - else if (!json.isJsonObject()) - return null; - - final JsonObject jsonObject = json.getAsJsonObject(); - if (jsonObject.has("type")) { - - final String type = jsonObject.get("type").getAsString(); - if (type.equals(FixedScaleOffsetCodec.TYPE)) { - - return new FixedScaleOffsetCodec( - jsonObject.get("scale").getAsDouble(), - jsonObject.get("offset").getAsDouble(), - DataType.valueOf(jsonObject.get("type").getAsString().toUpperCase()), - DataType.valueOf(jsonObject.get("encodedType").getAsString().toUpperCase())); - } - else if (type.equals(ShardingCodec.TYPE)) { - return new ShardingCodec( - context.deserialize(jsonObject.get("configuration"), ShardingConfiguration.class)); - } else if (type.equals(BytesCodec.TYPE)) { - - // TODO implement - return new BytesCodec(); - } - } - - return null; - } - -} \ No newline at end of file diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java index 3205312f..95eec18a 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java @@ -1,11 +1,20 @@ package org.janelia.saalfeldlab.n5; import java.io.Serializable; +import java.lang.reflect.Type; import java.util.Arrays; import java.util.HashMap; +import com.google.gson.JsonDeserializationContext; +import com.google.gson.JsonDeserializer; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonParseException; +import com.google.gson.JsonSerializationContext; +import com.google.gson.JsonSerializer; import org.janelia.saalfeldlab.n5.codec.Codec; import org.janelia.saalfeldlab.n5.codec.ComposedCodec; +import org.janelia.saalfeldlab.n5.shard.ShardingCodec; /** * Mandatory dataset attributes: @@ -172,4 +181,63 @@ static DatasetAttributes from( return new DatasetAttributes(dimensions, blockSize, dataType, compression, codecs); } + + private static DatasetAttributesAdapter adapter = null; + public static DatasetAttributesAdapter getJsonAdapter() { + if (adapter == null) { + adapter = new DatasetAttributesAdapter(); + } + return adapter; + } + + public static class DatasetAttributesAdapter implements JsonSerializer, JsonDeserializer { + + @Override public DatasetAttributes deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) throws JsonParseException { + + final JsonObject obj = json.getAsJsonObject(); + if (!obj.has(DIMENSIONS_KEY) || !obj.has(BLOCK_SIZE_KEY) || !obj.has(DATA_TYPE_KEY) || !obj.has(COMPRESSION_KEY)) + return null; + + final long[] dimensions = context.deserialize(obj.get(DIMENSIONS_KEY), long[].class); + final int[] blockSize = context.deserialize(obj.get(BLOCK_SIZE_KEY), int[].class); + final DataType dataType = context.deserialize(obj.get(DATA_TYPE_KEY), DataType.class); + final Compression compression = context.deserialize(obj.get(COMPRESSION_KEY), Compression.class); + final Codec[] codecs; + if (obj.has(CODEC_KEY)) { + codecs = context.deserialize(obj.get(CODEC_KEY), Codec[].class); + } else codecs = new Codec[0]; + + for (Codec codec : codecs) { + if (codec instanceof ShardingCodec) { + ShardingCodec shardingCodec = (ShardingCodec)codec; + return new ShardedDatasetAttributes( + dimensions, + shardingCodec.getBlockSize(), + blockSize, + shardingCodec.getIndexLocation(), + dataType, + compression, + codecs + ); + } + } + return new DatasetAttributes(dimensions, blockSize, dataType, compression); + } + + @Override public JsonElement serialize(DatasetAttributes src, Type typeOfSrc, JsonSerializationContext context) { + + final JsonObject obj = new JsonObject(); + obj.add(DIMENSIONS_KEY, context.serialize(src.dimensions)); + obj.add(BLOCK_SIZE_KEY, context.serialize(src.blockSize)); + obj.add(DATA_TYPE_KEY, context.serialize(src.dataType)); + obj.add(COMPRESSION_KEY, CompressionAdapter.getJsonAdapter().serialize(src.compression, src.compression.getClass(), context)); + + //TODO Caleb: Per the zarr v3 spec, codecs is necessary and cannot be an empty list, since it always needs at least + // one array -> bytes codec. Even in the case of no compressor, there should always be at least the + // `bytes` codec it seems. Consider how we want to handle this in N5 + obj.add(CODEC_KEY, context.serialize(src.codecs)); + + return obj; + } + } } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java index 0f70baea..9f9b9772 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java @@ -30,7 +30,7 @@ import java.io.InputStream; import java.nio.ByteBuffer; -import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration; +import org.janelia.saalfeldlab.n5.shard.ShardingCodec; /** * Default implementation of {@link BlockReader}. @@ -149,7 +149,7 @@ public static > void readFromStream(final B dataBlock, dataBlock.readData(buffer); } - public static long getShardIndex(final ShardingConfiguration shardingConfiguration, final long[] gridPosition) { + public static long getShardIndex(final ShardingCodec shardingCodec, final long[] gridPosition) { // TODO implement return -1; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java b/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java index 967cb6e2..bb2cf2db 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java @@ -218,10 +218,10 @@ public LockedFileChannel lockForWriting(final String normalPath) throws IOExcept } @Override - public LockedFileChannel lockForWriting(final String normalPath, final long startByte, final long endByte) + public LockedFileChannel lockForWriting(final String normalPath, final long startByte, final long size) throws IOException { - return new LockedFileChannel(normalPath, false, startByte, endByte); + return new LockedFileChannel(normalPath, false, startByte, size); } public LockedFileChannel lockForReading(final Path path) throws IOException { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonUtils.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonUtils.java index c09d1e14..157c9bdd 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonUtils.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonUtils.java @@ -30,12 +30,13 @@ import java.io.Writer; import java.lang.reflect.Array; import java.lang.reflect.Type; +import java.nio.ByteOrder; import java.util.HashMap; import java.util.Map; import java.util.regex.Matcher; +import org.janelia.saalfeldlab.n5.codec.BytesCodec; import org.janelia.saalfeldlab.n5.codec.Codec; -import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration; import com.google.gson.Gson; import com.google.gson.GsonBuilder; @@ -45,6 +46,7 @@ import com.google.gson.JsonPrimitive; import com.google.gson.JsonSyntaxException; import com.google.gson.reflect.TypeToken; +import org.janelia.saalfeldlab.n5.shard.ShardingCodec; /** * Utility class for working with JSON. @@ -56,9 +58,11 @@ public interface GsonUtils { static Gson registerGson(final GsonBuilder gsonBuilder) { gsonBuilder.registerTypeAdapter(DataType.class, new DataType.JsonAdapter()); - gsonBuilder.registerTypeHierarchyAdapter(ShardingConfiguration.class, new ShardingConfiguration.ShardingConfigurationAdapter()); - gsonBuilder.registerTypeHierarchyAdapter(Codec.class, NameConfigAdapter.getJsonAdapter(Codec.class)); gsonBuilder.registerTypeHierarchyAdapter(Compression.class, CompressionAdapter.getJsonAdapter()); + gsonBuilder.registerTypeHierarchyAdapter(DatasetAttributes.class, DatasetAttributes.getJsonAdapter()); + gsonBuilder.registerTypeHierarchyAdapter(Codec.class, NameConfigAdapter.getJsonAdapter(Codec.class)); + gsonBuilder.registerTypeHierarchyAdapter(ByteOrder.class, BytesCodec.byteOrderAdapter); + gsonBuilder.registerTypeHierarchyAdapter(ShardingCodec.IndexLocation.class, ShardingCodec.indexLocationAdapter); gsonBuilder.disableHtmlEscaping(); return gsonBuilder.create(); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GzipCompression.java b/src/main/java/org/janelia/saalfeldlab/n5/GzipCompression.java index 0b6d734d..ecaf2f0e 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GzipCompression.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GzipCompression.java @@ -37,13 +37,19 @@ import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; import org.apache.commons.compress.compressors.gzip.GzipParameters; import org.janelia.saalfeldlab.n5.Compression.CompressionType; +import org.janelia.saalfeldlab.n5.serialization.NameConfig; @CompressionType("gzip") +@NameConfig.Name("gzip") public class GzipCompression implements DefaultBlockReader, DefaultBlockWriter, Compression { private static final long serialVersionUID = 8630847239813334263L; @CompressionParameter + @NameConfig.Parameter + //TODO Caleb: How to handle serialization of parameter-less constructor. + // For N5, default is -1, for zarr, range is 0-9 and is required. + // How to map -1 to some default (1?) when serializing to zarr? private final int level; @CompressionParameter diff --git a/src/main/java/org/janelia/saalfeldlab/n5/KeyValueAccess.java b/src/main/java/org/janelia/saalfeldlab/n5/KeyValueAccess.java index d199bf67..a4fa42b8 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/KeyValueAccess.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/KeyValueAccess.java @@ -180,7 +180,7 @@ public LockedChannel lockForReading(String normalPath, final long startByte, fin */ public LockedChannel lockForWriting(final String normalPath) throws IOException; - public LockedChannel lockForWriting(String normalPath, final long startByte, final long endByte) + public LockedChannel lockForWriting(String normalPath, final long startByte, final long size) throws IOException; /** diff --git a/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java index f4645a80..0c734e16 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java @@ -37,8 +37,7 @@ import org.janelia.saalfeldlab.n5.codec.Codec; import org.janelia.saalfeldlab.n5.shard.Shard; import org.janelia.saalfeldlab.n5.shard.ShardingCodec; -import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration; -import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration.IndexLocation; +import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; /** * A simple structured container API for hierarchies of chunked @@ -141,7 +140,7 @@ default void setDatasetAttributes( final String datasetPath, final DatasetAttributes datasetAttributes) throws N5Exception { - setAttributes(datasetPath, datasetAttributes.asMap()); + setAttribute(datasetPath, "/", datasetAttributes); } /** @@ -220,8 +219,7 @@ default void createDataset( final DataType dataType, final Compression compression) throws N5Exception { - final Codec[] codecs = new Codec[]{new ShardingCodec( - new ShardingConfiguration(blockSize, null, null, IndexLocation.END))}; + final Codec[] codecs = new Codec[]{new ShardingCodec(blockSize, null, null, IndexLocation.END)}; createDataset(datasetPath, new DatasetAttributes(dimensions, shardSize, dataType, compression, codecs)); } @@ -284,6 +282,22 @@ void writeBlock( final DatasetAttributes datasetAttributes, final DataBlock dataBlock) throws N5Exception; + /** + * Write multiple data blocks, useful for request aggregation . + * + * @param datasetPath dataset path + * @param datasetAttributes the dataset attributes + * @param dataBlocks the data block + * @param the data block data type + * @throws N5Exception the exception + */ + default void writeBlocks( + final String datasetPath, + final DatasetAttributes datasetAttributes, + final DataBlock... dataBlocks) throws N5Exception { + //TODO Caleb: write this + } + /** * Writes a complete {@link Shard} to a dataset. * diff --git a/src/main/java/org/janelia/saalfeldlab/n5/NameConfigAdapter.java b/src/main/java/org/janelia/saalfeldlab/n5/NameConfigAdapter.java index 85768d18..5081f821 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/NameConfigAdapter.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/NameConfigAdapter.java @@ -131,6 +131,7 @@ public JsonElement serialize( final JsonSerializationContext context) { final Class clazz = (Class)object.getClass(); + final String name = clazz.getAnnotation(NameConfig.Name.class).value(); final String prefix = type.getAnnotation(NameConfig.Prefix.class).value(); final String type = prefix + "." + name; @@ -138,7 +139,6 @@ public JsonElement serialize( final JsonObject json = new JsonObject(); json.addProperty("name", name); final JsonObject configuration = new JsonObject(); - json.add("configuration", configuration); final HashMap parameterTypes = parameters.get(type); final HashMap parameterNameMap = parameterNames.get(type); @@ -158,6 +158,8 @@ public JsonElement serialize( configuration.add(parameterNameMap.get(fieldName), serialized); } + if (!configuration.isEmpty()) + json.add("configuration", configuration); } catch (NoSuchFieldException | SecurityException | IllegalArgumentException | IllegalAccessException e) { e.printStackTrace(System.err); return null; @@ -176,14 +178,22 @@ public T deserialize( final JsonObject objectJson = json.getAsJsonObject(); final String name = objectJson.getAsJsonPrimitive("name").getAsString(); - if (name == null) - return null; - final JsonObject configuration = objectJson.getAsJsonObject("configuration"); - if (configuration == null) + if (name == null) { return null; + } final String type = prefix + "." + name; + final JsonObject configuration = objectJson.getAsJsonObject("configuration"); + /* It's ok to be null if all parameters are optional. + * Otherwise, return*/ + if (configuration == null) { + for (Field field : parameters.get(type).values()) { + if (!field.getAnnotation(NameConfig.Parameter.class).optional()) + return null; + } + } + final Constructor constructor = constructors.get(type); constructor.setAccessible(true); final T object; @@ -195,8 +205,8 @@ public T deserialize( final String fieldName = parameterType.getKey(); final String paramName = parameterNameMap.get(fieldName); final JsonElement paramJson = configuration.get(paramName); + final Field field = parameterType.getValue(); if (paramJson != null) { - final Field field = parameterType.getValue(); final Object parameter; if (field.getAnnotation(N5Annotations.ReverseArray.class) != null) { final JsonArray reversedArray = reverseJsonArray(paramJson); @@ -204,6 +214,9 @@ public T deserialize( } else parameter = context.deserialize(paramJson, field.getType()); ReflectionUtils.setFieldValue(object, fieldName, parameter); + } else if (!field.getAnnotation(NameConfig.Parameter.class).optional()) { + /* if param is null, and not optional, return null */ + return null; } } } catch (InstantiationException | IllegalAccessException | IllegalArgumentException | InvocationTargetException diff --git a/src/main/java/org/janelia/saalfeldlab/n5/RawCompression.java b/src/main/java/org/janelia/saalfeldlab/n5/RawCompression.java index 7d1327b0..ebd58b38 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/RawCompression.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/RawCompression.java @@ -30,6 +30,7 @@ import java.io.OutputStream; import org.janelia.saalfeldlab.n5.Compression.CompressionType; +import org.janelia.saalfeldlab.n5.serialization.NameConfig; @CompressionType("raw") public class RawCompression implements DefaultBlockReader, DefaultBlockWriter, Compression { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java index 2825eff9..60244334 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java @@ -4,8 +4,7 @@ import org.janelia.saalfeldlab.n5.codec.Codec; import org.janelia.saalfeldlab.n5.shard.ShardingCodec; -import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration; -import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration.IndexLocation; +import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; public class ShardedDatasetAttributes extends DatasetAttributes { @@ -24,7 +23,7 @@ public ShardedDatasetAttributes( final Compression compression, final Codec[] codecs) { - super(dimensions, blockSize, dataType, compression, codecs); + super(dimensions, shardSize, dataType, compression, codecs); this.shardSize = shardSize; this.indexLocation = shardIndexLocation; @@ -106,11 +105,11 @@ public long getNumBlocks() { public static int[] getBlockSize(Codec[] codecs) { - //TODO Caleb: Move this? - return Arrays.stream(codecs) - .filter(ShardingCodec::isShardingCodec) - .map(x -> ((ShardingCodec)x).getConfiguration()) - .map(ShardingConfiguration::getBlockSize).findFirst().orElse(null); + for (Codec codec : codecs) + if (codec instanceof ShardingCodec) + return ((ShardingCodec)codec).getBlockSize(); + + return null; } public IndexLocation getIndexLocation() { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java index 5f0bb4ab..a16d0ec3 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java @@ -14,18 +14,16 @@ import com.google.gson.JsonSerializer; import org.janelia.saalfeldlab.n5.serialization.NameConfig; -@NameConfig.Name("bytes") +@NameConfig.Name(value = BytesCodec.TYPE) public class BytesCodec implements Codec { private static final long serialVersionUID = 3523505403978222360L; - public static String TYPE = "bytes"; + public static final String TYPE = "bytes"; - @NameConfig.Parameter("endian") + @NameConfig.Parameter(value = "endian", optional = true) protected final ByteOrder byteOrder; - protected transient final byte[] array; - public BytesCodec() { this(ByteOrder.LITTLE_ENDIAN); @@ -33,13 +31,8 @@ public BytesCodec() { public BytesCodec(final ByteOrder byteOrder) { - this(byteOrder, 256); - } - - public BytesCodec(final ByteOrder byteOrder, final int N) { - this.byteOrder = byteOrder; - this.array = new byte[N]; + } @Override diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/Crc32cChecksumCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/Crc32cChecksumCodec.java index 0b85ed5a..f7c03608 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/Crc32cChecksumCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/Crc32cChecksumCodec.java @@ -1,13 +1,16 @@ package org.janelia.saalfeldlab.n5.codec.checksum; +import org.janelia.saalfeldlab.n5.serialization.NameConfig; + import java.nio.ByteBuffer; import java.util.zip.CRC32; +@NameConfig.Name(Crc32cChecksumCodec.TYPE) public class Crc32cChecksumCodec extends ChecksumCodec { private static final long serialVersionUID = 7424151868725442500L; - public static transient final String TYPE = "crc32c"; + public static final String TYPE = "crc32c"; public Crc32cChecksumCodec() { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/serialization/NameConfig.java b/src/main/java/org/janelia/saalfeldlab/n5/serialization/NameConfig.java index 22965382..2ccb122e 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/serialization/NameConfig.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/serialization/NameConfig.java @@ -31,6 +31,7 @@ public interface NameConfig extends Serializable { @Target(ElementType.FIELD) @interface Parameter { String value() default ""; + boolean optional() default false; } default String getType() { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java index f642075a..fc30eaa6 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java @@ -7,7 +7,7 @@ public abstract class AbstractShard implements Shard { protected final ShardedDatasetAttributes datasetAttributes; - protected final ShardIndex index; + protected ShardIndex index; private final long[] gridPosition; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java index 1d0da8a4..31f51049 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java @@ -20,7 +20,7 @@ import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; import org.janelia.saalfeldlab.n5.RawCompression; import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; -import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration.IndexLocation; +import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; public class ShardIndex extends LongArrayDataBlock { @@ -83,8 +83,7 @@ private int getNumBytesIndex(long... gridPosition) { public static ShardIndex read(final KeyValueAccess keyValueAccess, final String key, final ShardedDatasetAttributes datasetAttributes) throws IOException { - return read(keyValueAccess, key, datasetAttributes.getShardBlockGridSize(), - datasetAttributes.getIndexLocation()); + return read(keyValueAccess, key, datasetAttributes.getShardBlockGridSize(), datasetAttributes.getIndexLocation()); } public static ShardIndex read( @@ -144,8 +143,7 @@ public static IndexByteBounds byteBounds(ShardedDatasetAttributes datasetAttribu return byteBounds(indexShape, datasetAttributes.getIndexLocation(), objectSize); } - public static IndexByteBounds byteBounds(final int[] indexShape, final IndexLocation indexLocation, - final long objectSize) { + public static IndexByteBounds byteBounds(final int[] indexShape, final IndexLocation indexLocation, final long objectSize) { final int indexSize = (int)Arrays.stream(indexShape).reduce(1, (x, y) -> x * y); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java index 0ab5d276..4584d1db 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java @@ -1,10 +1,5 @@ package org.janelia.saalfeldlab.n5.shard; -import java.io.IOException; -import java.io.InputStream; -import java.nio.channels.Channels; -import java.nio.channels.FileChannel; - import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DataType; import org.janelia.saalfeldlab.n5.DefaultBlockReader; @@ -15,7 +10,12 @@ import org.janelia.saalfeldlab.n5.codec.Codec; import org.janelia.saalfeldlab.n5.codec.IdentityCodec; import org.janelia.saalfeldlab.n5.codec.checksum.Crc32cChecksumCodec; -import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration.IndexLocation; +import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.channels.Channels; +import java.nio.channels.FileChannel; public class ShardReader { @@ -79,11 +79,10 @@ private static ShardedDatasetAttributes buildTestAttributes() { final Codec[] codecs = new Codec[]{ new IdentityCodec(), new ShardingCodec( - new ShardingConfiguration( - new int[]{2, 2}, - new Codec[]{new RawCompression(), new IdentityCodec()}, - new Codec[]{new Crc32cChecksumCodec()}, - IndexLocation.END) + new int[]{2, 2}, + new Codec[]{new RawCompression(), new IdentityCodec()}, + new Codec[]{new Crc32cChecksumCodec()}, + IndexLocation.END ) }; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java index d1ade67f..1c18e564 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java @@ -54,7 +54,7 @@ public void write(final OutputStream out) throws IOException { // TODO need codecs // prepareForWriting(); - // if (datasetAttributes.getShardingConfiguration().areIndexesAtStart()) { + // if (datasetAttributes.getShardingConfiguration().getIndexLocation()) { // writeIndexes(out); // writeBlocks(out); // } else { @@ -63,7 +63,7 @@ public void write(final OutputStream out) throws IOException { // } prepareForWritingDataBlock(); - if (datasetAttributes.getIndexLocation() == ShardingConfiguration.IndexLocation.START) { + if (datasetAttributes.getIndexLocation() == ShardingCodec.IndexLocation.START) { writeIndexBlock(out); writeBlocks(out); } else { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java index aa0f6684..7297aab5 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java @@ -1,32 +1,54 @@ package org.janelia.saalfeldlab.n5.shard; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.lang.reflect.Type; - -import org.janelia.saalfeldlab.n5.codec.Codec; -import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration.IndexLocation; - import com.google.gson.JsonDeserializationContext; import com.google.gson.JsonDeserializer; import com.google.gson.JsonElement; -import com.google.gson.JsonObject; import com.google.gson.JsonParseException; +import com.google.gson.JsonPrimitive; import com.google.gson.JsonSerializationContext; import com.google.gson.JsonSerializer; +import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.serialization.NameConfig; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.lang.reflect.Type; + +@NameConfig.Name(ShardingCodec.TYPE) public class ShardingCodec implements Codec { private static final long serialVersionUID = -5879797314954717810L; public static final String TYPE = "sharding_indexed"; - private final ShardingConfiguration configuration; + public final static String CHUNK_SHAPE_KEY = "chunk_shape"; + public static final String INDEX_LOCATION_KEY = "index_location"; + public static final String CODECS_KEY = "codecs"; + public static final String INDEX_CODECS_KEY = "index_codecs"; + + public enum IndexLocation { + START, END + } + + @NameConfig.Parameter(CHUNK_SHAPE_KEY) + private final int[] blockSize; + + @NameConfig.Parameter(CODECS_KEY) + private final Codec[] codecs; - public ShardingCodec(ShardingConfiguration configuration) { + @NameConfig.Parameter(INDEX_CODECS_KEY) + private final Codec[] indexCodecs; - this.configuration = configuration; + @NameConfig.Parameter(INDEX_LOCATION_KEY) + private final IndexLocation indexLocation; + + private ShardingCodec() { + + blockSize = null; + codecs = null; + indexCodecs = null; + indexLocation = null; } public ShardingCodec( @@ -35,12 +57,20 @@ public ShardingCodec( final Codec[] indexCodecs, final IndexLocation indexLocation) { - this.configuration = new ShardingConfiguration(blockSize, codecs, indexCodecs, indexLocation); + this.blockSize = blockSize; + this.codecs = codecs; + this.indexCodecs = indexCodecs; + this.indexLocation = indexLocation; + } + + public int[] getBlockSize() { + + return blockSize; } - public ShardingConfiguration getConfiguration() { + public IndexLocation getIndexLocation() { - return configuration; + return indexLocation; } @Override @@ -64,33 +94,27 @@ public static boolean isShardingCodec(final Codec codec) { return codec instanceof ShardingCodec; } - // public static void TypeAd - public static class ShardingCodecAdapter implements JsonDeserializer, JsonSerializer { + @Override + public String getType() { - @Override - public JsonElement serialize(ShardingCodec src, Type typeOfSrc, JsonSerializationContext context) { + return TYPE; + } - final JsonObject jsonObj = new JsonObject(); + public static IndexLocationAdapter indexLocationAdapter = new IndexLocationAdapter(); - jsonObj.addProperty("name", ShardingCodec.TYPE); - // context.serialize(typeOfSrc); + public static class IndexLocationAdapter implements JsonSerializer, JsonDeserializer { - return jsonObj; - } + @Override public IndexLocation deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) throws JsonParseException { - @Override - public ShardingCodec deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) - throws JsonParseException { + if (!json.isJsonPrimitive()) return null; - return null; + return IndexLocation.valueOf(json.getAsString().toUpperCase()); } - } - - @Override - public String getType() { + @Override public JsonElement serialize(IndexLocation src, Type typeOfSrc, JsonSerializationContext context) { - return TYPE; + return new JsonPrimitive(src.name().toLowerCase()); + } } } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingConfiguration.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingConfiguration.java deleted file mode 100644 index ce520700..00000000 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingConfiguration.java +++ /dev/null @@ -1,92 +0,0 @@ -package org.janelia.saalfeldlab.n5.shard; - -import java.lang.reflect.Type; -import java.util.Arrays; - -import org.janelia.saalfeldlab.n5.codec.Codec; - -import com.google.gson.JsonDeserializationContext; -import com.google.gson.JsonDeserializer; -import com.google.gson.JsonElement; -import com.google.gson.JsonNull; -import com.google.gson.JsonObject; -import com.google.gson.JsonParseException; -import com.google.gson.JsonSerializationContext; -import com.google.gson.JsonSerializer; - -public class ShardingConfiguration { - - public static final String CHUNK_SHAPE_KEY = "chunk_shape"; - public static final String INDEX_LOCATION_KEY = "index_location"; - public static final String CODECS_KEY = "codecs"; - public static final String INDEX_CODECS_KEY = "index_codecs"; - - public enum IndexLocation { - START, END - } - - protected int[] blockSize; - protected Codec[] codecs; - protected Codec[] indexCodecs; - protected IndexLocation indexLocation; - - public ShardingConfiguration( - final int[] blockSize, - final Codec[] codecs, - final Codec[] indexCodecs, - final IndexLocation indexLocation) { - - this.blockSize = blockSize; - this.codecs = codecs; - this.indexCodecs = indexCodecs; - this.indexLocation = indexLocation; - } - - public int[] getBlockSize() { - - return blockSize; - } - - public boolean areIndexesAtStart() { - - return indexLocation == IndexLocation.START; - } - - public static class ShardingConfigurationAdapter - implements JsonDeserializer, JsonSerializer { - - @Override - public JsonElement serialize(ShardingConfiguration src, Type typeOfSrc, JsonSerializationContext context) { - - if( anyShardingCodecs(src.codecs) || anyShardingCodecs(src.indexCodecs)) - return JsonNull.INSTANCE; - - final JsonObject jsonObj = new JsonObject(); - jsonObj.add(CHUNK_SHAPE_KEY, context.serialize(src.blockSize)); - jsonObj.add(INDEX_LOCATION_KEY, context.serialize(src.indexLocation.toString())); - jsonObj.add(CODECS_KEY, context.serialize(src.codecs)); - jsonObj.add(INDEX_CODECS_KEY, context.serialize(src.indexCodecs)); - - return jsonObj; - } - - @Override - public ShardingConfiguration deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) - throws JsonParseException { - - return null; - } - - public boolean anyShardingCodecs(final Codec[] codecs) { - - if (codecs == null) - return false; - - return Arrays.stream(codecs).anyMatch(c -> { - return (c instanceof ShardingCodec); - }); - } - - } - -} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java index 699c45bf..d1dce0e9 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java @@ -1,7 +1,9 @@ package org.janelia.saalfeldlab.n5.shard; import java.io.IOException; +import java.io.OutputStream; import java.io.UncheckedIOException; +import java.nio.file.NoSuchFileException; import java.util.Arrays; import org.janelia.saalfeldlab.n5.DataBlock; @@ -63,18 +65,21 @@ public void writeBlock(final DataBlock block) { throw new N5IOException("Attempted to write block in the wrong shard."); final ShardIndex idx = getIndex(); - final long startByte = idx.getOffset(relativePosition); - final long endByte = startByte + idx.getNumBytes(relativePosition); + final long startByte = idx.getOffset(relativePosition) == Shard.EMPTY_INDEX_NBYTES ? 0 : idx.getOffset(relativePosition); + final long size = idx.getNumBytes(relativePosition) == Shard.EMPTY_INDEX_NBYTES ? Long.MAX_VALUE : idx.getNumBytes(relativePosition); // TODO this assumes that the block exists in the shard and // that the available space is sufficient. Should generalize - try (final LockedChannel lockedChannel = keyValueAccess.lockForWriting(path, startByte, endByte)) { + try (final LockedChannel lockedChannel = keyValueAccess.lockForWriting(path, startByte, size)) { // TODO codecs - datasetAttributes.getCompression().getWriter().write(block, lockedChannel.newOutputStream()); + final CountingOutputStream out = new CountingOutputStream(lockedChannel.newOutputStream()); + datasetAttributes.getCompression().getWriter().write(block, out); // TODO update index when we know how many bytes were written - + idx.set(startByte, out.getNumBytes(), relativePosition); + out.write(index.toByteBuffer().array()); + out.realClose(); } catch (final IOException | UncheckedIOException e) { throw new N5IOException("Failed to read block from " + path, e); } @@ -95,7 +100,8 @@ private static int numBlockElements(DatasetAttributes datasetAttributes) { public ShardIndex createIndex() { // Empty index of the correct size - return new ShardIndex(datasetAttributes.getShardBlockGridSize()); + index = new ShardIndex(datasetAttributes.getShardBlockGridSize()); + return index; } @Override @@ -104,9 +110,57 @@ public ShardIndex getIndex() { try { final ShardIndex result = ShardIndex.read(keyValueAccess, path, datasetAttributes); return result == null ? createIndex() : result; - } catch (final IOException e) { + } catch (final NoSuchFileException e) { + return createIndex(); + } catch (IOException e) { throw new N5IOException("Failed to read index at " + path, e); } } + + static class CountingOutputStream extends OutputStream { + private final OutputStream out; + private long numBytes; + + public CountingOutputStream(OutputStream out) { + this.out = out; + this.numBytes = 0; + } + + @Override + public void write(int b) throws IOException { + out.write(b); + numBytes++; + } + + @Override + public void write(byte[] b) throws IOException { + out.write(b); + numBytes += b.length; + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + out.write(b, off, len); + numBytes += len; + } + + @Override + public void flush() throws IOException { + out.flush(); + } + + @Override + public void close() throws IOException { + + } + + private void realClose() throws IOException { + out.close(); + } + + public long getNumBytes() { + return numBytes; + } + } } diff --git a/src/test/java/org/janelia/saalfeldlab/n5/serialization/CodecSerialization.java b/src/test/java/org/janelia/saalfeldlab/n5/serialization/CodecSerialization.java index 1fc6031c..94a07c7f 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/serialization/CodecSerialization.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/serialization/CodecSerialization.java @@ -18,6 +18,8 @@ import com.google.gson.JsonElement; import com.google.gson.JsonObject; +import java.util.Arrays; + public class CodecSerialization { private Gson gson; @@ -28,10 +30,9 @@ public void before() { final GsonBuilder gsonBuilder = new GsonBuilder(); gsonBuilder.registerTypeAdapter(IdentityCodec.class, NameConfigAdapter.getJsonAdapter(IdentityCodec.class)); gsonBuilder.registerTypeAdapter(AsTypeCodec.class, NameConfigAdapter.getJsonAdapter(AsTypeCodec.class)); - gsonBuilder.registerTypeAdapter(FixedScaleOffsetCodec.class, - NameConfigAdapter.getJsonAdapter(FixedScaleOffsetCodec.class)); - // gsonBuilder.registerTypeAdapter(Codec.class, - // NameConfigAdapter.getJsonAdapter(Codec.class)); + gsonBuilder.registerTypeAdapter(FixedScaleOffsetCodec.class, NameConfigAdapter.getJsonAdapter(FixedScaleOffsetCodec.class)); + gsonBuilder.registerTypeAdapter(Codec.class, + NameConfigAdapter.getJsonAdapter(Codec.class)); gson = gsonBuilder.create(); } @@ -69,8 +70,8 @@ public void testSerializeCodecArray() { JsonElement.class); assertEquals("codec array", expected, jsonCodecArray.getAsJsonArray()); - // final Codec[] codecsDeserialized = gson.fromJson(expected, Codec[].class); - // System.out.println(Arrays.toString(codecsDeserialized)); + final Codec[] codecsDeserialized = gson.fromJson(expected, Codec[].class); + System.out.println(Arrays.toString(codecsDeserialized)); codecs = new Codec[]{ new AsTypeCodec(DataType.FLOAT64, DataType.INT16), diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java index 32574d4b..8814231a 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java @@ -1,24 +1,25 @@ package org.janelia.saalfeldlab.n5.shard; -import java.net.MalformedURLException; -import java.nio.file.FileSystems; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.Arrays; - +import com.google.gson.GsonBuilder; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DataType; import org.janelia.saalfeldlab.n5.FileSystemKeyValueAccess; +import org.janelia.saalfeldlab.n5.GzipCompression; import org.janelia.saalfeldlab.n5.N5Writer; import org.janelia.saalfeldlab.n5.RawCompression; import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; import org.janelia.saalfeldlab.n5.codec.Codec; import org.janelia.saalfeldlab.n5.codec.IdentityCodec; import org.janelia.saalfeldlab.n5.codec.checksum.Crc32cChecksumCodec; -import org.janelia.saalfeldlab.n5.shard.ShardingConfiguration.IndexLocation; +import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; import org.janelia.saalfeldlab.n5.universe.N5Factory; import org.junit.Test; +import java.net.MalformedURLException; +import java.nio.file.FileSystems; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; public class ShardDemos { @@ -27,7 +28,6 @@ public static void main(String[] args) throws MalformedURLException { final Path p = Paths.get("src/test/resources/shardExamples/test.zarr/mid_sharded/c/0/0"); System.out.println(p); - final String key = p.toString(); final ShardedDatasetAttributes dsetAttrs = new ShardedDatasetAttributes(new long[]{6, 4}, new int[]{6, 4}, new int[]{3, 2}, IndexLocation.END, DataType.UINT8, new RawCompression(), null); @@ -55,7 +55,13 @@ public static void main(String[] args) throws MalformedURLException { @Test public void writeReadBlockTest() { - final N5Writer writer = N5Factory.createWriter("src/test/resources/shardExamples/test.n5"); + final N5Factory factory = new N5Factory(); + final GsonBuilder gsonBuilder = new GsonBuilder(); + gsonBuilder.setPrettyPrinting(); + factory.gsonBuilder(gsonBuilder); + factory.cacheAttributes(false); + + final N5Writer writer = factory.openWriter("src/test/resources/shardExamples/test.n5"); final ShardedDatasetAttributes datasetAttributes = new ShardedDatasetAttributes( new long[]{8, 8}, @@ -67,19 +73,23 @@ public void writeReadBlockTest() { new Codec[]{ new IdentityCodec(), new ShardingCodec( - new ShardingConfiguration( - new int[]{2, 2}, - new Codec[]{new RawCompression(), new IdentityCodec()}, - new Codec[]{new Crc32cChecksumCodec()}, - IndexLocation.END) + new int[]{2, 2}, + new Codec[]{new GzipCompression(4), new IdentityCodec()}, + new Codec[]{new Crc32cChecksumCodec()}, + IndexLocation.END ) } ); writer.createDataset("shard", datasetAttributes); - final DataBlock dataBlock = datasetAttributes.getDataType().createDataBlock(datasetAttributes.getBlockSize(), new long[]{0, 0}, 2 * 2); + final DataBlock dataBlock = datasetAttributes.getDataType().createDataBlock(datasetAttributes.getBlockSize(), new long[]{0, 0}, 2 * 2); + byte[] data = (byte[])dataBlock.getData(); + for (int i = 0; i < data.length; i++) { + data[i] = (byte)i; + } + writer.deleteBlock("shard", 0,0 ); writer.writeBlock("shard", datasetAttributes, dataBlock); - writer.readBlock("shard", datasetAttributes, 0,0); + writer.readBlock("shard", datasetAttributes, 0, 0); } } From 21ab72c34b63a8ddf66ef162b829521d4d7e1412 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Wed, 14 Aug 2024 13:07:47 -0400 Subject: [PATCH 035/124] test: minor updates --- .../saalfeldlab/n5/codec/BytesTests.java | 11 ++++---- .../n5/serialization/CodecSerialization.java | 25 +++++++++++++------ 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java b/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java index 8eeb8345..e8bf64b0 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java @@ -1,6 +1,10 @@ package org.janelia.saalfeldlab.n5.codec; -import com.google.gson.GsonBuilder; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.nio.ByteOrder; + import org.janelia.saalfeldlab.n5.DataType; import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.N5Writer; @@ -9,10 +13,7 @@ import org.janelia.saalfeldlab.n5.universe.N5Factory; import org.junit.Test; -import java.nio.ByteOrder; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import com.google.gson.GsonBuilder; public class BytesTests { diff --git a/src/test/java/org/janelia/saalfeldlab/n5/serialization/CodecSerialization.java b/src/test/java/org/janelia/saalfeldlab/n5/serialization/CodecSerialization.java index 94a07c7f..f12150b6 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/serialization/CodecSerialization.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/serialization/CodecSerialization.java @@ -1,6 +1,7 @@ package org.janelia.saalfeldlab.n5.serialization; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import org.janelia.saalfeldlab.n5.DataType; import org.janelia.saalfeldlab.n5.GzipCompression; @@ -18,8 +19,6 @@ import com.google.gson.JsonElement; import com.google.gson.JsonObject; -import java.util.Arrays; - public class CodecSerialization { private Gson gson; @@ -30,9 +29,12 @@ public void before() { final GsonBuilder gsonBuilder = new GsonBuilder(); gsonBuilder.registerTypeAdapter(IdentityCodec.class, NameConfigAdapter.getJsonAdapter(IdentityCodec.class)); gsonBuilder.registerTypeAdapter(AsTypeCodec.class, NameConfigAdapter.getJsonAdapter(AsTypeCodec.class)); - gsonBuilder.registerTypeAdapter(FixedScaleOffsetCodec.class, NameConfigAdapter.getJsonAdapter(FixedScaleOffsetCodec.class)); - gsonBuilder.registerTypeAdapter(Codec.class, - NameConfigAdapter.getJsonAdapter(Codec.class)); + gsonBuilder.registerTypeAdapter(FixedScaleOffsetCodec.class, + NameConfigAdapter.getJsonAdapter(FixedScaleOffsetCodec.class)); + gsonBuilder.registerTypeAdapter(GzipCompression.class, + NameConfigAdapter.getJsonAdapter(GzipCompression.class)); + gsonBuilder.registerTypeAdapter(Codec.class, + NameConfigAdapter.getJsonAdapter(Codec.class)); gson = gsonBuilder.create(); } @@ -70,8 +72,10 @@ public void testSerializeCodecArray() { JsonElement.class); assertEquals("codec array", expected, jsonCodecArray.getAsJsonArray()); - final Codec[] codecsDeserialized = gson.fromJson(expected, Codec[].class); - System.out.println(Arrays.toString(codecsDeserialized)); + Codec[] codecsDeserialized = gson.fromJson(expected, Codec[].class); + assertEquals("codecs length not 2", 2, codecsDeserialized.length); + assertTrue("first codec not identity", codecsDeserialized[0] instanceof IdentityCodec); + assertTrue("second codec not asType", codecsDeserialized[1] instanceof AsTypeCodec); codecs = new Codec[]{ new AsTypeCodec(DataType.FLOAT64, DataType.INT16), @@ -79,9 +83,14 @@ public void testSerializeCodecArray() { }; jsonCodecArray = gson.toJsonTree(codecs).getAsJsonArray(); expected = gson.fromJson( - "[{\"name\":\"astype\",\"configuration\":{\"dataType\":\"FLOAT64\",\"encodedType\":\"INT16\"}},{\"level\":-1,\"useZlib\":false}]", + "[{\"name\":\"astype\",\"configuration\":{\"dataType\":\"FLOAT64\",\"encodedType\":\"INT16\"}},{\"name\":\"gzip\",\"configuration\":{\"level\":-1,\"use_z_lib\":false}}]", JsonElement.class); assertEquals("codec array", expected, jsonCodecArray.getAsJsonArray()); + + codecsDeserialized = gson.fromJson(expected, Codec[].class); + assertEquals("codecs length not 2", 2, codecsDeserialized.length); + assertTrue("first codec not asType", codecsDeserialized[0] instanceof AsTypeCodec); + assertTrue("second codec not gzip", codecsDeserialized[1] instanceof GzipCompression); } } From 314b1996df72b528fcc7798ddaecd9e9c78ebb1b Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Wed, 14 Aug 2024 13:08:05 -0400 Subject: [PATCH 036/124] perf: BlockReader have read call static method --- .../java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java index 9f9b9772..3d8319d2 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java @@ -47,13 +47,9 @@ public default > void read( final B dataBlock, final InputStream in) throws IOException { - final ByteBuffer buffer = dataBlock.toByteBuffer(); - // do not try with this input stream because subsequent block reads may happen if the stream points to a shard final InputStream inflater = getInputStream(in); - final DataInputStream dis = new DataInputStream(inflater); - dis.readFully(buffer.array()); - dataBlock.readData(buffer); + readFromStream(dataBlock, inflater); } /** From 5ad00fd58d55a3c03e53a5bdf3c2eed30d2c14c9 Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Fri, 16 Aug 2024 16:58:21 -0400 Subject: [PATCH 037/124] feat: WIP initial read/write blocks through composed codecs implemention analogous to zarr array->bytes/ bytes->bytes codecs. --- .../saalfeldlab/n5/Bzip2Compression.java | 2 + .../n5/CachedGsonKeyValueN5Reader.java | 3 +- .../janelia/saalfeldlab/n5/Compression.java | 2 +- .../saalfeldlab/n5/DatasetAttributes.java | 84 +++++++------- .../saalfeldlab/n5/DefaultBlockReader.java | 70 ++---------- .../saalfeldlab/n5/DefaultBlockWriter.java | 79 +++---------- .../saalfeldlab/n5/GsonKeyValueN5Reader.java | 2 +- .../saalfeldlab/n5/GsonKeyValueN5Writer.java | 3 +- .../janelia/saalfeldlab/n5/GsonN5Reader.java | 30 ++--- .../saalfeldlab/n5/Lz4Compression.java | 2 + .../saalfeldlab/n5/ShortArrayDataBlock.java | 12 ++ .../janelia/saalfeldlab/n5/XzCompression.java | 2 + .../saalfeldlab/n5/codec/AsTypeCodec.java | 13 +-- .../saalfeldlab/n5/codec/BytesCodec.java | 104 ++++++++++++++++-- .../janelia/saalfeldlab/n5/codec/Codec.java | 74 ++++++++++--- .../saalfeldlab/n5/codec/ComposedCodec.java | 16 +-- .../n5/codec/DeterministicSizeCodec.java | 2 +- .../saalfeldlab/n5/codec/IdentityCodec.java | 2 +- .../n5/codec/checksum/ChecksumCodec.java | 20 ++-- .../saalfeldlab/n5/shard/ShardingCodec.java | 2 +- .../saalfeldlab/n5/AbstractN5Test.java | 93 ++++++++++------ .../saalfeldlab/n5/codec/AsTypeTests.java | 6 +- .../saalfeldlab/n5/shard/ShardDemos.java | 6 +- 23 files changed, 339 insertions(+), 290 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/Bzip2Compression.java b/src/main/java/org/janelia/saalfeldlab/n5/Bzip2Compression.java index 0c40d01a..49a333f3 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/Bzip2Compression.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/Bzip2Compression.java @@ -32,8 +32,10 @@ import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; import org.janelia.saalfeldlab.n5.Compression.CompressionType; +import org.janelia.saalfeldlab.n5.serialization.NameConfig; @CompressionType("bzip2") +@NameConfig.Name("bzip2") public class Bzip2Compression implements DefaultBlockReader, DefaultBlockWriter, Compression { private static final long serialVersionUID = -4873117458390529118L; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/CachedGsonKeyValueN5Reader.java b/src/main/java/org/janelia/saalfeldlab/n5/CachedGsonKeyValueN5Reader.java index 324d242e..d2e8418e 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/CachedGsonKeyValueN5Reader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/CachedGsonKeyValueN5Reader.java @@ -27,6 +27,7 @@ import java.lang.reflect.Type; +import com.google.gson.reflect.TypeToken; import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; import org.janelia.saalfeldlab.n5.cache.N5JsonCache; import org.janelia.saalfeldlab.n5.cache.N5JsonCacheableContainer; @@ -82,7 +83,7 @@ default DatasetAttributes normalGetDatasetAttributes(final String pathName) thro final String normalPath = N5URI.normalizeGroupPath(pathName); final JsonElement attributes = GsonKeyValueN5Reader.super.getAttributes(normalPath); - return createDatasetAttributes(attributes); + return getGson().fromJson(attributes, DatasetAttributes.class); } @Override diff --git a/src/main/java/org/janelia/saalfeldlab/n5/Compression.java b/src/main/java/org/janelia/saalfeldlab/n5/Compression.java index ba78cecf..eac6a0ac 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/Compression.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/Compression.java @@ -43,7 +43,7 @@ * * @author Stephan Saalfeld */ -public interface Compression extends Serializable, Codec { +public interface Compression extends Serializable, Codec.BytesToBytes { /** * Annotation for runtime discovery of compression schemes. diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java index 95eec18a..ff420fea 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java @@ -12,10 +12,12 @@ import com.google.gson.JsonParseException; import com.google.gson.JsonSerializationContext; import com.google.gson.JsonSerializer; +import org.janelia.saalfeldlab.n5.codec.BytesCodec; import org.janelia.saalfeldlab.n5.codec.Codec; -import org.janelia.saalfeldlab.n5.codec.ComposedCodec; import org.janelia.saalfeldlab.n5.shard.ShardingCodec; +import javax.xml.crypto.Data; + /** * Mandatory dataset attributes: * @@ -62,13 +64,19 @@ public DatasetAttributes( final int[] blockSize, final DataType dataType, final Compression compression, - final Codec[] codecs ) { + final Codec[] codecs) { this.dimensions = dimensions; this.blockSize = blockSize; this.dataType = dataType; - this.codecs = codecs; this.compression = compression; + if (codecs == null && !(compression instanceof RawCompression)) { + this.codecs = new Codec[]{new BytesCodec(), compression}; + } else if (codecs == null) { + this.codecs = new Codec[]{new BytesCodec()}; + } else { + this.codecs = codecs; + } } public DatasetAttributes( @@ -110,22 +118,6 @@ public Codec[] getCodecs() { return codecs; } - public Codec collectCodecs() { - - if (codecs == null || codecs.length == 0) - return compression; - else if (codecs.length == 1) - return new ComposedCodec(codecs[0], compression); - else { - final Codec[] codecsAndCompresor = new Codec[codecs.length + 1]; - for (int i = 0; i < codecs.length; i++) - codecsAndCompresor[i] = codecs[i]; - - codecsAndCompresor[codecs.length] = compression; - return new ComposedCodec(codecsAndCompresor); - } - } - public HashMap asMap() { final HashMap map = new HashMap<>(); @@ -160,28 +152,29 @@ static DatasetAttributes from( /* version 0 */ if (compression == null) { - switch (compressionVersion0Name) { - case "raw": - compression = new RawCompression(); - break; - case "gzip": - compression = new GzipCompression(); - break; - case "bzip2": - compression = new Bzip2Compression(); - break; - case "lz4": - compression = new Lz4Compression(); - break; - case "xz": - compression = new XzCompression(); - break; - } + compression = getCompressionVersion0(compressionVersion0Name); } return new DatasetAttributes(dimensions, blockSize, dataType, compression, codecs); } + private static Compression getCompressionVersion0(final String compressionVersion0Name) { + + switch (compressionVersion0Name) { + case "raw": + return new RawCompression(); + case "gzip": + return new GzipCompression(); + case "bzip2": + return new Bzip2Compression(); + case "lz4": + return new Lz4Compression(); + case "xz": + return new XzCompression(); + } + return null; + } + private static DatasetAttributesAdapter adapter = null; public static DatasetAttributesAdapter getJsonAdapter() { if (adapter == null) { @@ -201,11 +194,20 @@ public static class DatasetAttributesAdapter implements JsonSerializer bytes codec. Even in the case of no compressor, there should always be at least the - // `bytes` codec it seems. Consider how we want to handle this in N5 obj.add(CODEC_KEY, context.serialize(src.codecs)); return obj; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java index 3d8319d2..bb3f2639 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java @@ -30,6 +30,7 @@ import java.io.InputStream; import java.nio.ByteBuffer; +import org.janelia.saalfeldlab.n5.codec.Codec; import org.janelia.saalfeldlab.n5.shard.ShardingCodec; /** @@ -70,70 +71,19 @@ public static DataBlock readBlock( final DatasetAttributes datasetAttributes, final long[] gridPosition) throws IOException { - final DataInputStream dis = new DataInputStream(in); - final short mode = dis.readShort(); - final int numElements; - final DataBlock dataBlock; - if (mode != 2) { - final int nDim = dis.readShort(); - final int[] blockSize = new int[nDim]; - for (int d = 0; d < nDim; ++d) - blockSize[d] = dis.readInt(); - if (mode == 0) { - numElements = DataBlock.getNumElements(blockSize); - } else { - numElements = dis.readInt(); + Codec.DataBlockInputStream dataBlockStream = null; + InputStream stream = in; + for (Codec codec : datasetAttributes.getCodecs()) { + if (codec instanceof Codec.ArrayToBytes) { + stream = dataBlockStream = ((Codec.ArrayToBytes)codec).decode(datasetAttributes, gridPosition, stream); } - dataBlock = datasetAttributes.getDataType().createDataBlock(blockSize, gridPosition, numElements); - } else { - numElements = dis.readInt(); - dataBlock = datasetAttributes.getDataType().createDataBlock(null, gridPosition, numElements); - } - - final BlockReader reader = datasetAttributes.getCompression().getReader(); - reader.read(dataBlock, in); - return dataBlock; - } - - /** - * Reads a {@link DataBlock} from an {@link InputStream}. - * - * @param in - * the input stream - * @param datasetAttributes - * the dataset attributes - * @param gridPosition - * the grid position - * @return the block - * @throws IOException - * the exception - */ - public static DataBlock readBlockWithCodecs( - final InputStream in, - final DatasetAttributes datasetAttributes, - final long[] gridPosition) throws IOException { - - final DataInputStream dis = new DataInputStream(in); - final short mode = dis.readShort(); - final int numElements; - final DataBlock dataBlock; - if (mode != 2) { - final int nDim = dis.readShort(); - final int[] blockSize = new int[nDim]; - for (int d = 0; d < nDim; ++d) - blockSize[d] = dis.readInt(); - if (mode == 0) { - numElements = DataBlock.getNumElements(blockSize); - } else { - numElements = dis.readInt(); + if (codec instanceof Codec.BytesToBytes) { + stream = ((Codec.BytesToBytes)codec).decode(stream); } - dataBlock = datasetAttributes.getDataType().createDataBlock(blockSize, gridPosition, numElements); - } else { - numElements = dis.readInt(); - dataBlock = datasetAttributes.getDataType().createDataBlock(null, gridPosition, numElements); } - readFromStream(dataBlock, datasetAttributes.collectCodecs().decode(in)); + final DataBlock dataBlock = dataBlockStream.allocateDataBlock(); + readFromStream(dataBlock, stream); return dataBlock; } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java index c5d985f9..6b0e988d 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java @@ -25,7 +25,7 @@ */ package org.janelia.saalfeldlab.n5; -import java.io.DataOutputStream; +import org.janelia.saalfeldlab.n5.codec.Codec; import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; @@ -70,75 +70,22 @@ public static void writeBlock( final DatasetAttributes datasetAttributes, final DataBlock dataBlock) throws IOException { - final DataOutputStream dos = new DataOutputStream(out); - final int mode; - if (datasetAttributes.getDataType() == DataType.OBJECT || dataBlock.getSize() == null) - mode = 2; - else if (dataBlock.getNumElements() == DataBlock.getNumElements(dataBlock.getSize())) - mode = 0; - else - mode = 1; - dos.writeShort(mode); - - if (mode != 2) { - dos.writeShort(datasetAttributes.getNumDimensions()); - for (final int size : dataBlock.getSize()) - dos.writeInt(size); - } - - if (mode != 0) - dos.writeInt(dataBlock.getNumElements()); - - dos.flush(); - - final BlockWriter writer = datasetAttributes.getCompression().getWriter(); - writer.write(dataBlock, out); - } - - /** - * Writes a {@link DataBlock} into an {@link OutputStream}. - * - * @param - * the type of data - * @param out - * the output stream - * @param datasetAttributes - * the dataset attributes - * @param dataBlock - * the data block the block data type - * @throws IOException - * the exception - */ - public static void writeBlockWithCodecs( - final OutputStream out, - final DatasetAttributes datasetAttributes, - final DataBlock dataBlock) throws IOException { - - final DataOutputStream dos = new DataOutputStream(out); - - final int mode; - if (datasetAttributes.getDataType() == DataType.OBJECT || dataBlock.getSize() == null) - mode = 2; - else if (dataBlock.getNumElements() == DataBlock.getNumElements(dataBlock.getSize())) - mode = 0; - else - mode = 1; - dos.writeShort(mode); - - if (mode != 2) { - dos.writeShort(datasetAttributes.getNumDimensions()); - for (final int size : dataBlock.getSize()) - dos.writeInt(size); + OutputStream stream = out; + final Codec[] codecs = datasetAttributes.getCodecs(); + for (Codec codec : codecs) { + if (codec instanceof Codec.BytesToBytes) + stream = ((Codec.BytesToBytes)codec).encode(stream); + else if (codec instanceof Codec.ArrayToBytes) + stream = ((Codec.ArrayToBytes)codec).encode(datasetAttributes, dataBlock, stream); } - if (mode != 0) - dos.writeInt(dataBlock.getNumElements()); + writeFromStream(dataBlock, stream); + stream.flush(); - try (final OutputStream encodedStream = datasetAttributes.collectCodecs().encode(out)) { - writeFromStream(dataBlock, encodedStream); - out.flush(); - } + //FIXME Caleb: The stream must be closed BUT it shouldn't be `writeBlock`'s responsibility. + // Whoever opens the stream should close it + stream.close(); } public static void writeFromStream(final DataBlock dataBlock, final OutputStream out) throws IOException { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java index 003820b1..e6a3429f 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java @@ -113,7 +113,7 @@ default DataBlock readBlock( final String path = absoluteDataBlockPath(N5URI.normalizeGroupPath(pathName), gridPosition); try (final LockedChannel lockedChannel = getKeyValueAccess().lockForReading(path)) { - return DefaultBlockReader.readBlockWithCodecs(lockedChannel.newInputStream(), datasetAttributes, gridPosition); + return DefaultBlockReader.readBlock(lockedChannel.newInputStream(), datasetAttributes, gridPosition); } catch (final N5Exception.N5NoSuchKeyException e) { return null; } catch (final IOException | UncheckedIOException e) { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java index 69e77fc2..2545853e 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java @@ -230,8 +230,7 @@ default void writeBlock( final String blockPath = absoluteDataBlockPath(N5URI.normalizeGroupPath(path), dataBlock.getGridPosition()); try (final LockedChannel lock = getKeyValueAccess().lockForWriting(blockPath)) { - DefaultBlockWriter.writeBlockWithCodecs(lock.newOutputStream(), datasetAttributes, dataBlock); - // DefaultBlockWriter.writeBlock(lock.newOutputStream(), datasetAttributes, dataBlock); + DefaultBlockWriter.writeBlock(lock.newOutputStream(), datasetAttributes, dataBlock); } catch (final IOException | UncheckedIOException e) { throw new N5IOException( "Failed to write block " + Arrays.toString(dataBlock.getGridPosition()) + " into dataset " + path, diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonN5Reader.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonN5Reader.java index c611a9e7..0ba185e3 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonN5Reader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonN5Reader.java @@ -28,6 +28,8 @@ import java.lang.reflect.Type; import java.util.Map; +import com.google.gson.JsonDeserializationContext; +import com.google.gson.JsonParseException; import org.janelia.saalfeldlab.n5.codec.Codec; import com.google.gson.Gson; @@ -60,31 +62,15 @@ default DatasetAttributes getDatasetAttributes(final String pathName) throws N5E default DatasetAttributes createDatasetAttributes(final JsonElement attributes) { - try { - final long[] dimensions = GsonUtils.readAttribute(attributes, DatasetAttributes.DIMENSIONS_KEY, long[].class, getGson()); - if (dimensions == null) { - return null; - } - - final DataType dataType = GsonUtils.readAttribute(attributes, DatasetAttributes.DATA_TYPE_KEY, DataType.class, getGson()); - if (dataType == null) { - return null; - } + final JsonDeserializationContext context = new JsonDeserializationContext() { - final int[] blockSize = GsonUtils.readAttribute(attributes, DatasetAttributes.BLOCK_SIZE_KEY, int[].class, getGson()); - final Compression compression = GsonUtils.readAttribute(attributes, DatasetAttributes.COMPRESSION_KEY, Compression.class, getGson()); - final Codec[] codecs = GsonUtils.readAttribute(attributes, DatasetAttributes.CODEC_KEY, Codec[].class, getGson()); + @Override public T deserialize(JsonElement json, Type typeOfT) throws JsonParseException { - /* version 0 */ - final String compressionVersion0Name = compression == null - ? GsonUtils.readAttribute(attributes, DatasetAttributes.compressionTypeKey, String.class, getGson()) - : null; + return getGson().fromJson(json, typeOfT); + } + }; - return DatasetAttributes.from(dimensions, dataType, blockSize, compression, compressionVersion0Name, codecs); - } catch (JsonSyntaxException | NumberFormatException | ClassCastException e) { - /* We cannot create a dataset, so return null. */ - return null; - } + return DatasetAttributes.getJsonAdapter().deserialize(attributes, DatasetAttributes.class, context); } @Override diff --git a/src/main/java/org/janelia/saalfeldlab/n5/Lz4Compression.java b/src/main/java/org/janelia/saalfeldlab/n5/Lz4Compression.java index 0ba88e12..654ca4b5 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/Lz4Compression.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/Lz4Compression.java @@ -33,8 +33,10 @@ import net.jpountz.lz4.LZ4BlockInputStream; import net.jpountz.lz4.LZ4BlockOutputStream; +import org.janelia.saalfeldlab.n5.serialization.NameConfig; @CompressionType("lz4") +@NameConfig.Name("lz4") public class Lz4Compression implements DefaultBlockReader, DefaultBlockWriter, Compression { private static final long serialVersionUID = -9071316415067427256L; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/ShortArrayDataBlock.java b/src/main/java/org/janelia/saalfeldlab/n5/ShortArrayDataBlock.java index 2dbf6b17..68aa4b35 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/ShortArrayDataBlock.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/ShortArrayDataBlock.java @@ -25,6 +25,9 @@ */ package org.janelia.saalfeldlab.n5; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; import java.nio.ByteBuffer; public class ShortArrayDataBlock extends AbstractDataBlock { @@ -48,6 +51,15 @@ public void readData(final ByteBuffer buffer) { buffer.asShortBuffer().get(data); } + + public void readData(final InputStream stream) throws IOException { + + final DataInputStream dis = new DataInputStream(stream); + for (int i = 0; i < data.length; i++) { + data[i] = dis.readShort(); + } + } + @Override public int getNumElements() { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/XzCompression.java b/src/main/java/org/janelia/saalfeldlab/n5/XzCompression.java index ed73b7d5..d2c1ce3d 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/XzCompression.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/XzCompression.java @@ -32,8 +32,10 @@ import org.apache.commons.compress.compressors.xz.XZCompressorInputStream; import org.apache.commons.compress.compressors.xz.XZCompressorOutputStream; import org.janelia.saalfeldlab.n5.Compression.CompressionType; +import org.janelia.saalfeldlab.n5.serialization.NameConfig; @CompressionType("xz") +@NameConfig.Name("xz") public class XzCompression implements DefaultBlockReader, DefaultBlockWriter, Compression { private static final long serialVersionUID = -7272153943564743774L; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java index 3a437ffa..c3f8e69b 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java @@ -10,8 +10,7 @@ import org.janelia.saalfeldlab.n5.serialization.NameConfig; @NameConfig.Name(AsTypeCodec.TYPE) -@NameConfig.Prefix("codec") -public class AsTypeCodec implements Codec { +public class AsTypeCodec implements Codec.BytesToBytes { private static final long serialVersionUID = 1031322606191894484L; @@ -141,15 +140,15 @@ else if (to == DataType.FLOAT64) else if (to == DataType.INT16) return AsTypeCodec::INT_TO_SHORT; if (to == DataType.INT8) - return AsTypeCodec::DOUBLE_TO_BYTE; + return AsTypeCodec::INT_TO_BYTE; else if (to == DataType.INT16) - return AsTypeCodec::DOUBLE_TO_SHORT; + return AsTypeCodec::INT_TO_SHORT; else if (to == DataType.INT32) - return AsTypeCodec::DOUBLE_TO_INT; + return AsTypeCodec::IDENTITY; else if (to == DataType.INT64) - return AsTypeCodec::DOUBLE_TO_LONG; + return AsTypeCodec::INT_TO_LONG; else if (to == DataType.FLOAT32) - return AsTypeCodec::DOUBLE_TO_FLOAT; + return AsTypeCodec::INT_TO_FLOAT; else if (to == DataType.INT64) return AsTypeCodec::INT_TO_LONG; else if (to == DataType.FLOAT32) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java index a16d0ec3..3fa2c844 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java @@ -1,8 +1,11 @@ package org.janelia.saalfeldlab.n5.codec; +import java.io.DataInputStream; +import java.io.DataOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.ByteBuffer; import java.nio.ByteOrder; import com.google.gson.JsonDeserializationContext; @@ -12,10 +15,14 @@ import com.google.gson.JsonPrimitive; import com.google.gson.JsonSerializationContext; import com.google.gson.JsonSerializer; +import org.apache.commons.io.output.ProxyOutputStream; +import org.janelia.saalfeldlab.n5.DataBlock; +import org.janelia.saalfeldlab.n5.DataType; +import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.serialization.NameConfig; @NameConfig.Name(value = BytesCodec.TYPE) -public class BytesCodec implements Codec { +public class BytesCodec implements Codec.ArrayToBytes { private static final long serialVersionUID = 3523505403978222360L; @@ -35,18 +42,93 @@ public BytesCodec(final ByteOrder byteOrder) { } - @Override - public InputStream decode(InputStream in) throws IOException { - - // TODO not applicable for array -> bytes - return in; + @Override public DataBlockInputStream decode(final DatasetAttributes attributes, final long[] gridPosition, InputStream in) throws IOException { + + return new DataBlockInputStream(in) { + + private short mode = -1; + private int[] blockSize = null; + private int numElements = -1; + + private boolean start = true; + + @Override protected void beforeRead(int n) throws IOException { + + if (start) { + readHeader(); + start = false; + } + } + + @Override + public DataBlock allocateDataBlock() throws IOException { + if (start) { + readHeader(); + start = false; + } + if (mode != 2) { + return attributes.getDataType().createDataBlock(blockSize, gridPosition, numElements); + } else { + return attributes.getDataType().createDataBlock(null, gridPosition, numElements); + } + } + + private void readHeader() throws IOException { + final DataInputStream dis = new DataInputStream(in); + mode = dis.readShort(); + if (mode != 2) { + final int nDim = dis.readShort(); + blockSize = new int[nDim]; + for (int d = 0; d < nDim; ++d) + blockSize[d] = dis.readInt(); + if (mode == 0) { + numElements = DataBlock.getNumElements(blockSize); + } else { + numElements = dis.readInt(); + } + + } else { + numElements = dis.readInt(); + } + } + }; } - @Override - public OutputStream encode(OutputStream out) throws IOException { - - // TODO not applicable for array -> bytes - return out; + @Override public OutputStream encode(final DatasetAttributes attributes, final DataBlock dataBlock, final OutputStream out) throws IOException { + + return new ProxyOutputStream(out) { + + boolean start = true; + + @Override protected void beforeWrite(int n) throws IOException { + if (start) { + writeHeader(); + start = false; + } + } + + private void writeHeader() throws IOException { + final DataOutputStream dos = new DataOutputStream(out); + + final int mode; + if (attributes.getDataType() == DataType.OBJECT || dataBlock.getSize() == null) + mode = 2; + else if (dataBlock.getNumElements() == DataBlock.getNumElements(dataBlock.getSize())) + mode = 0; + else + mode = 1; + dos.writeShort(mode); + + if (mode != 2) { + dos.writeShort(attributes.getNumDimensions()); + for (final int size : dataBlock.getSize()) + dos.writeInt(size); + } + + if (mode != 0) + dos.writeInt(dataBlock.getNumElements()); + } + }; } @Override diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java index 2c76438c..fdd3a037 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java @@ -1,7 +1,11 @@ package org.janelia.saalfeldlab.n5.codec; +import org.apache.commons.io.input.ProxyInputStream; +import org.janelia.saalfeldlab.n5.DataBlock; +import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.serialization.NameConfig; +import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -17,24 +21,58 @@ @NameConfig.Prefix("codec") public interface Codec extends Serializable { - /** - * Decode an {@link InputStream}. - * - * @param in - * input stream - * @return the decoded input stream - */ - public InputStream decode(InputStream in) throws IOException; - - /** - * Encode an {@link OutputStream}. - * - * @param out - * the output stream - * @return the encoded output stream - */ - public OutputStream encode(OutputStream out) throws IOException; + public interface BytesToBytes extends Codec { - public String getType(); + /** + * Decode an {@link InputStream}. + * + * @param in + * input stream + * @return the decoded input stream + */ + public InputStream decode(final InputStream in) throws IOException; + + /** + * Encode an {@link OutputStream}. + * + * @param out + * the output stream + * @return the encoded output stream + */ + public OutputStream encode(final OutputStream out) throws IOException; + } + + interface ArrayToBytes extends Codec { + + /** + * Decode an {@link InputStream}. + * + * @param in + * input stream + * @return the DataBlock corresponding to the input stream + */ + public DataBlockInputStream decode(final DatasetAttributes attributes, final long[] gridPosition, final InputStream in) throws IOException; + + /** + * Encode a {@link DataBlock}. + * + * @param datablock the datablock to encode + */ + public OutputStream encode(final DatasetAttributes attributes, final DataBlock datablock, final OutputStream out) throws IOException; + + } + + public abstract class DataBlockInputStream extends ProxyInputStream { + + protected DataBlockInputStream(InputStream in) { + + super(in); + } + + public abstract DataBlock allocateDataBlock() throws IOException; + } + + public String getType(); } + diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/ComposedCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/ComposedCodec.java index 3b07ad2b..de720bbc 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/ComposedCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/ComposedCodec.java @@ -7,17 +7,17 @@ /** * A {@link Codec} that is composition of a collection of codecs. */ -public class ComposedCodec implements Codec { +public class ComposedCodec implements Codec.BytesToBytes { //TODO Caleb: Remove? private static final long serialVersionUID = 5068349140842235924L; protected static final String TYPE = "composed"; - private final Codec[] filters; + private final Codec[] codecs; - public ComposedCodec(final Codec... filters) { + public ComposedCodec(final Codec... codec) { - this.filters = filters; + this.codecs = codec; } @Override @@ -31,8 +31,8 @@ public InputStream decode(InputStream in) throws IOException { // note that decoding is in reverse order InputStream decoded = in; - for (int i = filters.length - 1; i >= 0; i--) - decoded = filters[i].decode(decoded); + for (int i = codecs.length - 1; i >= 0; i--){} +// decoded = codecs[i].decode(decoded); return decoded; } @@ -41,8 +41,8 @@ public InputStream decode(InputStream in) throws IOException { public OutputStream encode(OutputStream out) throws IOException { OutputStream encoded = out; - for (int i = 0; i < filters.length; i++) - encoded = filters[i].encode(encoded); + for (int i = 0; i < codecs.length; i++){} +// encoded = codecs[i].encode(encoded); return encoded; } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/DeterministicSizeCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/DeterministicSizeCodec.java index 9ac0a1fe..b0288adf 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/DeterministicSizeCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/DeterministicSizeCodec.java @@ -4,7 +4,7 @@ * A {@link Codec} that can deterministically determine the size of encoded data from the size of the raw data and vice versa from the data length alone (i.e. encoding is data * independent). */ -public interface DeterministicSizeCodec extends Codec { +public interface DeterministicSizeCodec extends Codec.BytesToBytes { public abstract long encodedSize(long size); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java index 41fd10f1..b308d31b 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java @@ -8,7 +8,7 @@ @NameConfig.Name(IdentityCodec.TYPE) @NameConfig.Prefix("codec") -public class IdentityCodec implements Codec { +public class IdentityCodec implements Codec.BytesToBytes { private static final long serialVersionUID = 8354269325800855621L; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumCodec.java index 84555dd3..5cf83cf0 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumCodec.java @@ -37,16 +37,6 @@ public int numChecksumBytes() { return numChecksumBytes; } - @Override - public CheckedInputStream decode(final InputStream in) throws IOException { - - // TODO get the correct expected checksum - // TODO write a test with nested checksum codecs - - // has to know the number of it needs to read? - return new CheckedInputStream(in, getChecksum()); - } - @Override public CheckedOutputStream encode(final OutputStream out) throws IOException { @@ -61,6 +51,16 @@ public void encode(final OutputStream out, ByteBuffer buffer) throws IOException writeChecksum(out); } + @Override + public CheckedInputStream decode(final InputStream in) throws IOException { + + // TODO get the correct expected checksum + // TODO write a test with nested checksum codecs + + // has to know the number of it needs to read? + return new CheckedInputStream(in, getChecksum()); + } + public ByteBuffer decodeAndValidate(final InputStream in, int numBytes) throws IOException, ChecksumException { final CheckedInputStream cin = decode(in); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java index 7297aab5..dc5392d4 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java @@ -16,7 +16,7 @@ import java.lang.reflect.Type; @NameConfig.Name(ShardingCodec.TYPE) -public class ShardingCodec implements Codec { +public class ShardingCodec implements Codec.BytesToBytes { //TODO Caleb: should be ArrayToBytes private static final long serialVersionUID = -5879797314954717810L; diff --git a/src/test/java/org/janelia/saalfeldlab/n5/AbstractN5Test.java b/src/test/java/org/janelia/saalfeldlab/n5/AbstractN5Test.java index 6e2e9e8f..076ae246 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/AbstractN5Test.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/AbstractN5Test.java @@ -25,13 +25,21 @@ */ package org.janelia.saalfeldlab.n5; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertThrows; -import static org.junit.Assert.assertTrue; +import com.google.gson.GsonBuilder; +import com.google.gson.JsonArray; +import com.google.gson.JsonElement; +import com.google.gson.JsonNull; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; +import com.google.gson.reflect.TypeToken; +import org.janelia.saalfeldlab.n5.N5Exception.N5ClassCastException; +import org.janelia.saalfeldlab.n5.N5Reader.Version; +import org.janelia.saalfeldlab.n5.codec.AsTypeCodec; +import org.janelia.saalfeldlab.n5.codec.BytesCodec; +import org.janelia.saalfeldlab.n5.codec.Codec; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; import java.io.IOException; import java.net.URISyntaxException; @@ -48,19 +56,13 @@ import java.util.concurrent.Executors; import java.util.function.Predicate; -import org.janelia.saalfeldlab.n5.N5Exception.N5ClassCastException; -import org.janelia.saalfeldlab.n5.N5Reader.Version; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -import com.google.gson.GsonBuilder; -import com.google.gson.JsonArray; -import com.google.gson.JsonElement; -import com.google.gson.JsonNull; -import com.google.gson.JsonObject; -import com.google.gson.JsonParser; -import com.google.gson.reflect.TypeToken; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; /** * Abstract base class for testing N5 functionality. @@ -117,6 +119,7 @@ protected final N5Writer createTempN5Writer(String location, GsonBuilder gson) { @After public void removeTempWriters() { + synchronized (tempWriters) { for (final N5Writer writer : tempWriters) { try { @@ -208,7 +211,7 @@ public void testSetAttributeDoesntCreateGroup() { } @Test - public void testCreateDataset() { + public void testCreateDataset() { final DatasetAttributes info; try (N5Writer writer = createTempN5Writer()) { @@ -246,6 +249,33 @@ public void testWriteReadByteBlock() { } } + @Test + public void testWriteReadByteBlockMultipleCompressors() { + + try (final N5Writer n5 = createTempN5Writer()) { + final Codec[] codecs = { + new BytesCodec(), + new AsTypeCodec(DataType.INT32, DataType.INT8), + new AsTypeCodec(DataType.INT64, DataType.INT32), + }; + final long[] longBlock1 = new long[]{1,2,3,4,5,6,7,8}; + final long[] dimensions1 = new long[]{2,2,2}; + final int[] blockSize1 = new int[]{2,2,2}; + n5.createDataset(datasetName, dimensions1, blockSize1, DataType.INT8, new RawCompression(), codecs); +// n5.createDataset(datasetName, dimensions, blockSize, DataType.INT64, new RawCompression(), codecs); + final DatasetAttributes attributes = n5.getDatasetAttributes(datasetName); +// final LongArrayDataBlock dataBlock = new LongArrayDataBlock(blockSize, new long[]{0, 0, 0}, longBlock); + final LongArrayDataBlock dataBlock = new LongArrayDataBlock(blockSize1, new long[]{0, 0, 0}, longBlock1); + n5.writeBlock(datasetName, attributes, dataBlock); + + final DatasetAttributes fakeAttributes = new DatasetAttributes(dimensions1, blockSize1, DataType.INT64, new RawCompression(), codecs); + final DataBlock loadedDataBlock = n5.readBlock(datasetName, fakeAttributes, 0, 0, 0); + assertArrayEquals(byteBlock, (byte[])loadedDataBlock.getData()); + assertTrue(n5.remove(datasetName)); + + } + } + @Test public void testWriteReadStringBlock() { @@ -465,7 +495,7 @@ public void testOverwriteBlock() { } @Test - public void testAttributeParsingPrimitive() { + public void testAttributeParsingPrimitive() { try (final N5Writer n5 = createTempN5Writer()) { @@ -541,7 +571,7 @@ public void testAttributeParsingPrimitive() { } @Test - public void testAttributes() { + public void testAttributes() { try (final N5Writer n5 = createTempN5Writer()) { assertNull(n5.getAttribute(groupName, "test", String.class)); @@ -607,7 +637,6 @@ public void testAttributes() { } } - @Test public void testNullAttributes() throws URISyntaxException, IOException { @@ -831,7 +860,7 @@ public void testUri() throws IOException, URISyntaxException { } @Test - public void testRemoveGroup() { + public void testRemoveGroup() { try (final N5Writer n5 = createTempN5Writer()) { n5.createDataset(datasetName, dimensions, blockSize, DataType.UINT64, new RawCompression()); @@ -982,7 +1011,7 @@ public void testDeepList() throws ExecutionException, InterruptedException { } @Test - public void testExists() { + public void testExists() { final String groupName2 = groupName + "-2"; final String datasetName2 = datasetName + "-2"; @@ -1003,7 +1032,7 @@ public void testExists() { } @Test - public void testListAttributes() { + public void testListAttributes() { try (N5Writer n5 = createTempN5Writer()) { final String groupName2 = groupName + "-2"; @@ -1106,7 +1135,7 @@ public void testReaderCreation() throws IOException, URISyntaxException { writer.setAttribute("/", N5Reader.VERSION_KEY, invalidVersion); assertThrows("Incompatible version throws error", N5Exception.class, () -> { try (final N5Reader ignored = createN5Reader(location)) { - /*Only try with resource to ensure `close()` is called.*/ + /*Only try with resource to ensure `close()` is called.*/ } }); } finally { @@ -1123,7 +1152,7 @@ public void testReaderCreation() throws IOException, URISyntaxException { } @Test - public void testDelete() { + public void testDelete() { try (N5Writer n5 = createTempN5Writer()) { final String datasetName = AbstractN5Test.datasetName + "-test-delete"; @@ -1209,7 +1238,7 @@ protected static void runTests(final N5Writer writer, final ArrayList Date: Fri, 23 Aug 2024 10:02:50 -0400 Subject: [PATCH 038/124] fix: isDataset caching --- .../org/janelia/saalfeldlab/n5/DatasetAttributes.java | 3 +-- .../java/org/janelia/saalfeldlab/n5/AbstractN5Test.java | 8 ++++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java index ff420fea..3eb2b8d5 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java @@ -16,8 +16,6 @@ import org.janelia.saalfeldlab.n5.codec.Codec; import org.janelia.saalfeldlab.n5.shard.ShardingCodec; -import javax.xml.crypto.Data; - /** * Mandatory dataset attributes: * @@ -187,6 +185,7 @@ public static class DatasetAttributesAdapter implements JsonSerializer loadedDataBlock = n5.readBlock(datasetName, fakeAttributes, 0, 0, 0); - assertArrayEquals(byteBlock, (byte[])loadedDataBlock.getData()); + assertArrayEquals(longBlock1, (long[])loadedDataBlock.getData()); assertTrue(n5.remove(datasetName)); } From 9155ec5e4faa710568424c98d774771c387ffa64 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Mon, 26 Aug 2024 10:52:31 -0400 Subject: [PATCH 039/124] test: fix FixedScaleOffsetTests --- .../org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java | 4 ++-- .../janelia/saalfeldlab/n5/codec/FixedScaleOffsetCodec.java | 4 ++++ .../janelia/saalfeldlab/n5/codec/FixedScaleOffsetTests.java | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java index 60244334..b74002f8 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java @@ -23,7 +23,7 @@ public ShardedDatasetAttributes( final Compression compression, final Codec[] codecs) { - super(dimensions, shardSize, dataType, compression, codecs); + super(dimensions, blockSize, dataType, compression, codecs); this.shardSize = shardSize; this.indexLocation = shardIndexLocation; @@ -105,7 +105,7 @@ public long getNumBlocks() { public static int[] getBlockSize(Codec[] codecs) { - for (Codec codec : codecs) + for (final Codec codec : codecs) if (codec instanceof ShardingCodec) return ((ShardingCodec)codec).getBlockSize(); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetCodec.java index 55613b57..e6c83163 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetCodec.java @@ -98,12 +98,16 @@ public String getType() { @Override public InputStream decode(InputStream in) throws IOException { + numBytes = bytes(dataType); + numEncodedBytes = bytes(encodedType); return new FixedLengthConvertedInputStream(numEncodedBytes, numBytes, this.decoder, in); } @Override public OutputStream encode(OutputStream out) throws IOException { + numBytes = bytes(dataType); + numEncodedBytes = bytes(encodedType); return new FixedLengthConvertedOutputStream(numBytes, numEncodedBytes, this.encoder, out); } diff --git a/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetTests.java b/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetTests.java index 6de67690..135a7f2b 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetTests.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedScaleOffsetTests.java @@ -35,7 +35,7 @@ public void testDouble2Byte() throws IOException { public void testLong2Short() throws IOException { final int N = 16; - final ByteBuffer encodedLongs = ByteBuffer.allocate(Double.BYTES * N); + final ByteBuffer encodedLongs = ByteBuffer.allocate(Long.BYTES * N); final ByteBuffer encodedShorts = ByteBuffer.allocate(Short.BYTES * N); final long scale = 2; From e8cfefd382a007a5ce7865cb4106e2c7f227fc00 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Mon, 26 Aug 2024 11:18:46 -0400 Subject: [PATCH 040/124] fix: LockedFileChannel locking --- .../org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java b/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java index bb2cf2db..03dfde81 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java @@ -75,7 +75,7 @@ protected class LockedFileChannel implements LockedChannel { protected LockedFileChannel(final String path, final boolean readOnly) throws IOException { - this(fileSystem.getPath(path), readOnly, 0, 0); + this(fileSystem.getPath(path), readOnly, 0, Long.MAX_VALUE); } protected LockedFileChannel(final String path, final boolean readOnly, final long startByte, final long size) throws IOException { @@ -85,7 +85,7 @@ protected LockedFileChannel(final String path, final boolean readOnly, final lon protected LockedFileChannel(final Path path, final boolean readOnly) throws IOException { - this(path, readOnly, 0, 0); + this(path, readOnly, 0, Long.MAX_VALUE); } protected LockedFileChannel(final Path path, final boolean readOnly, final long startByte, final long size) From 17ef0fef07ad0e25992ba5b088c179e52a2efeb0 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Mon, 26 Aug 2024 11:45:19 -0400 Subject: [PATCH 041/124] fix: LockedFileChannel truncation logic --- .../org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java b/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java index 03dfde81..54aacf80 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java @@ -93,10 +93,10 @@ protected LockedFileChannel(final Path path, final boolean readOnly, final long final long start = startByte < 0 ? 0L : startByte; - final long len = size < 0 ? 0L : size; + final long len = size < 0 ? Long.MAX_VALUE : size; //TODO Caleb: How does this handle if manually overwriting the entire file? (e.g. len > file size) - truncate = (start == 0 && len == 0); + truncate = (start == 0 && len == Long.MAX_VALUE); final OpenOption[] options; if (readOnly) { From 2e18288dbf647e93e39b110fefaf9c685a0b5a16 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Mon, 26 Aug 2024 14:56:26 -0400 Subject: [PATCH 042/124] fix: NameConfigAdapter avoid NPE * configuration may be null when all parameters are optional --- .../java/org/janelia/saalfeldlab/n5/NameConfigAdapter.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/NameConfigAdapter.java b/src/main/java/org/janelia/saalfeldlab/n5/NameConfigAdapter.java index 5081f821..2cb72463 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/NameConfigAdapter.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/NameConfigAdapter.java @@ -188,7 +188,7 @@ public T deserialize( /* It's ok to be null if all parameters are optional. * Otherwise, return*/ if (configuration == null) { - for (Field field : parameters.get(type).values()) { + for (final Field field : parameters.get(type).values()) { if (!field.getAnnotation(NameConfig.Parameter.class).optional()) return null; } @@ -204,7 +204,7 @@ public T deserialize( for (final Entry parameterType : parameterTypes.entrySet()) { final String fieldName = parameterType.getKey(); final String paramName = parameterNameMap.get(fieldName); - final JsonElement paramJson = configuration.get(paramName); + final JsonElement paramJson = configuration == null ? null : configuration.get(paramName); final Field field = parameterType.getValue(); if (paramJson != null) { final Object parameter; From 71802905336096f9db27cfe3556993479a5986fd Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Mon, 26 Aug 2024 14:57:35 -0400 Subject: [PATCH 043/124] pref: normalGetDatasetAttributes should call createDatasetAttributes * so zarr only needs to override createDatasetAttributes --- .../org/janelia/saalfeldlab/n5/CachedGsonKeyValueN5Reader.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/CachedGsonKeyValueN5Reader.java b/src/main/java/org/janelia/saalfeldlab/n5/CachedGsonKeyValueN5Reader.java index d2e8418e..324d242e 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/CachedGsonKeyValueN5Reader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/CachedGsonKeyValueN5Reader.java @@ -27,7 +27,6 @@ import java.lang.reflect.Type; -import com.google.gson.reflect.TypeToken; import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; import org.janelia.saalfeldlab.n5.cache.N5JsonCache; import org.janelia.saalfeldlab.n5.cache.N5JsonCacheableContainer; @@ -83,7 +82,7 @@ default DatasetAttributes normalGetDatasetAttributes(final String pathName) thro final String normalPath = N5URI.normalizeGroupPath(pathName); final JsonElement attributes = GsonKeyValueN5Reader.super.getAttributes(normalPath); - return getGson().fromJson(attributes, DatasetAttributes.class); + return createDatasetAttributes(attributes); } @Override From 764d05ad63305447dc0f757a46f4175400af03c5 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Tue, 3 Sep 2024 16:48:17 -0400 Subject: [PATCH 044/124] feat: DataBlock methods to read/write directly from DataInput/Output --- .../saalfeldlab/n5/AbstractDataBlock.java | 21 +++++++++++++++++++ .../saalfeldlab/n5/ByteArrayDataBlock.java | 8 +++++++ .../org/janelia/saalfeldlab/n5/DataBlock.java | 7 +++++++ .../saalfeldlab/n5/DoubleArrayDataBlock.java | 9 ++++++++ .../saalfeldlab/n5/FloatArrayDataBlock.java | 9 ++++++++ .../saalfeldlab/n5/IntArrayDataBlock.java | 19 ++++++++++++++++- .../saalfeldlab/n5/LongArrayDataBlock.java | 17 +++++++++++++++ .../saalfeldlab/n5/ShortArrayDataBlock.java | 13 +++++------- 8 files changed, 94 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/AbstractDataBlock.java b/src/main/java/org/janelia/saalfeldlab/n5/AbstractDataBlock.java index f1cbc352..59208fcf 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/AbstractDataBlock.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/AbstractDataBlock.java @@ -25,6 +25,11 @@ */ package org.janelia.saalfeldlab.n5; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.nio.ByteBuffer; + /** * Abstract base class for {@link DataBlock} implementations. * @@ -63,4 +68,20 @@ public T getData() { return data; } + + @Override + public void readData(final DataInput input) throws IOException { + + final ByteBuffer buffer = toByteBuffer(); + input.readFully(buffer.array()); + readData(buffer); + } + + @Override + public void writeData(final DataOutput output) throws IOException { + + final ByteBuffer buffer = toByteBuffer(); + output.write(buffer.array()); + } + } \ No newline at end of file diff --git a/src/main/java/org/janelia/saalfeldlab/n5/ByteArrayDataBlock.java b/src/main/java/org/janelia/saalfeldlab/n5/ByteArrayDataBlock.java index 5717ad2e..4610811d 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/ByteArrayDataBlock.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/ByteArrayDataBlock.java @@ -25,6 +25,8 @@ */ package org.janelia.saalfeldlab.n5; +import java.io.DataInput; +import java.io.IOException; import java.nio.ByteBuffer; public class ByteArrayDataBlock extends AbstractDataBlock { @@ -47,6 +49,12 @@ public void readData(final ByteBuffer buffer) { buffer.get(getData()); } + @Override + public void readData(final DataInput inputStream) throws IOException { + + inputStream.readFully(data); + } + @Override public int getNumElements() { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DataBlock.java b/src/main/java/org/janelia/saalfeldlab/n5/DataBlock.java index 3d9dc92a..5ccdbbaf 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DataBlock.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DataBlock.java @@ -25,6 +25,9 @@ */ package org.janelia.saalfeldlab.n5; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; import java.nio.ByteBuffer; /** @@ -94,6 +97,10 @@ public interface DataBlock { */ public void readData(final ByteBuffer buffer); + public void readData(final DataInput inputStream) throws IOException; + + public void writeData(final DataOutput output) throws IOException; + /** * Returns the number of elements in this {@link DataBlock}. This number is * not necessarily equal {@link #getNumElements(int[]) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DoubleArrayDataBlock.java b/src/main/java/org/janelia/saalfeldlab/n5/DoubleArrayDataBlock.java index 426c7944..8cbb1511 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DoubleArrayDataBlock.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DoubleArrayDataBlock.java @@ -25,6 +25,8 @@ */ package org.janelia.saalfeldlab.n5; +import java.io.DataInput; +import java.io.IOException; import java.nio.ByteBuffer; public class DoubleArrayDataBlock extends AbstractDataBlock { @@ -48,6 +50,13 @@ public void readData(final ByteBuffer buffer) { buffer.asDoubleBuffer().get(data); } + @Override + public void readData(final DataInput inputStream) throws IOException { + + for (int i = 0; i < data.length; i++) + data[i] = inputStream.readDouble(); + } + @Override public int getNumElements() { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/FloatArrayDataBlock.java b/src/main/java/org/janelia/saalfeldlab/n5/FloatArrayDataBlock.java index b8d30999..aa97ce3f 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/FloatArrayDataBlock.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/FloatArrayDataBlock.java @@ -25,6 +25,8 @@ */ package org.janelia.saalfeldlab.n5; +import java.io.DataInput; +import java.io.IOException; import java.nio.ByteBuffer; public class FloatArrayDataBlock extends AbstractDataBlock { @@ -48,6 +50,13 @@ public void readData(final ByteBuffer buffer) { buffer.asFloatBuffer().get(data); } + @Override + public void readData(final DataInput inputStream) throws IOException { + + for (int i = 0; i < data.length; i++) + data[i] = inputStream.readFloat(); + } + @Override public int getNumElements() { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/IntArrayDataBlock.java b/src/main/java/org/janelia/saalfeldlab/n5/IntArrayDataBlock.java index 98c5577d..4d338332 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/IntArrayDataBlock.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/IntArrayDataBlock.java @@ -25,10 +25,13 @@ */ package org.janelia.saalfeldlab.n5; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; import java.nio.ByteBuffer; -public class IntArrayDataBlock extends AbstractDataBlock { +public class IntArrayDataBlock extends AbstractDataBlock { public IntArrayDataBlock(final int[] size, final long[] gridPosition, final int[] data) { super(size, gridPosition, data); @@ -48,6 +51,20 @@ public void readData(final ByteBuffer buffer) { buffer.asIntBuffer().get(data); } + @Override + public void readData(final DataInput input) throws IOException { + + for (int i = 0; i < data.length; i++) + data[i] = input.readInt(); + } + + @Override + public void writeData(final DataOutput output) throws IOException { + + for (int i = 0; i < data.length; i++) + output.writeInt(data[i]); + } + @Override public int getNumElements() { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/LongArrayDataBlock.java b/src/main/java/org/janelia/saalfeldlab/n5/LongArrayDataBlock.java index d3f3fc9c..be435c4f 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/LongArrayDataBlock.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/LongArrayDataBlock.java @@ -25,6 +25,9 @@ */ package org.janelia.saalfeldlab.n5; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; import java.nio.ByteBuffer; public class LongArrayDataBlock extends AbstractDataBlock { @@ -48,6 +51,20 @@ public void readData(final ByteBuffer buffer) { buffer.asLongBuffer().get(data); } + @Override + public void readData(final DataInput inputStream) throws IOException { + + for (int i = 0; i < data.length; i++) + data[i] = inputStream.readLong(); + } + + @Override + public void writeData(final DataOutput output) throws IOException { + + for (int i = 0; i < data.length; i++) + output.writeLong(data[i]); + } + @Override public int getNumElements() { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/ShortArrayDataBlock.java b/src/main/java/org/janelia/saalfeldlab/n5/ShortArrayDataBlock.java index 68aa4b35..34c5a883 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/ShortArrayDataBlock.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/ShortArrayDataBlock.java @@ -25,9 +25,8 @@ */ package org.janelia.saalfeldlab.n5; -import java.io.DataInputStream; +import java.io.DataInput; import java.io.IOException; -import java.io.InputStream; import java.nio.ByteBuffer; public class ShortArrayDataBlock extends AbstractDataBlock { @@ -51,13 +50,11 @@ public void readData(final ByteBuffer buffer) { buffer.asShortBuffer().get(data); } + @Override + public void readData(final DataInput dataInput) throws IOException { - public void readData(final InputStream stream) throws IOException { - - final DataInputStream dis = new DataInputStream(stream); - for (int i = 0; i < data.length; i++) { - data[i] = dis.readShort(); - } + for (int i = 0; i < data.length; i++) + data[i] = dataInput.readShort(); } @Override From dbd7b158930734cb85af2ecf5bbad40eddd60b62 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Tue, 3 Sep 2024 20:13:59 -0400 Subject: [PATCH 045/124] refactor: create N5BytesCodec, BytesCodec is simple zarr approach --- .../saalfeldlab/n5/DatasetAttributes.java | 49 ++++-- .../saalfeldlab/n5/codec/BytesCodec.java | 114 +++++-------- .../saalfeldlab/n5/codec/N5BytesCodec.java | 153 ++++++++++++++++++ .../saalfeldlab/n5/AbstractN5Test.java | 2 +- .../saalfeldlab/n5/shard/ShardDemos.java | 6 +- 5 files changed, 231 insertions(+), 93 deletions(-) create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/codec/N5BytesCodec.java diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java index 3eb2b8d5..d12feee1 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java @@ -5,6 +5,10 @@ import java.util.Arrays; import java.util.HashMap; +import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.codec.N5BytesCodec; +import org.janelia.saalfeldlab.n5.shard.ShardingCodec; + import com.google.gson.JsonDeserializationContext; import com.google.gson.JsonDeserializer; import com.google.gson.JsonElement; @@ -12,9 +16,6 @@ import com.google.gson.JsonParseException; import com.google.gson.JsonSerializationContext; import com.google.gson.JsonSerializer; -import org.janelia.saalfeldlab.n5.codec.BytesCodec; -import org.janelia.saalfeldlab.n5.codec.Codec; -import org.janelia.saalfeldlab.n5.shard.ShardingCodec; /** * Mandatory dataset attributes: @@ -55,7 +56,8 @@ public class DatasetAttributes implements Serializable { private final int[] blockSize; private final DataType dataType; private final Compression compression; - private final Codec[] codecs; + private final Codec.ArrayToBytes arrayToBytesCodec; + private final Codec[] byteByteCodecs; public DatasetAttributes( final long[] dimensions, @@ -69,11 +71,19 @@ public DatasetAttributes( this.dataType = dataType; this.compression = compression; if (codecs == null && !(compression instanceof RawCompression)) { - this.codecs = new Codec[]{new BytesCodec(), compression}; + byteByteCodecs = new Codec[]{compression}; + arrayToBytesCodec = new N5BytesCodec(); } else if (codecs == null) { - this.codecs = new Codec[]{new BytesCodec()}; + byteByteCodecs = new Codec[]{}; + arrayToBytesCodec = new N5BytesCodec(); } else { - this.codecs = codecs; + if (!(codecs[0] instanceof Codec.ArrayToBytes)) + throw new N5Exception("Expected first element of codecs to be ArrayToBytes, but was: " + codecs[0]); + + arrayToBytesCodec = (Codec.ArrayToBytes)codecs[0]; + byteByteCodecs = new Codec[codecs.length - 1]; + for (int i = 0; i < byteByteCodecs.length; i++) + byteByteCodecs[i] = codecs[i + 1]; } } @@ -111,9 +121,14 @@ public DataType getDataType() { return dataType; } + public Codec.ArrayToBytes getArrayToBytesCodec() { + + return arrayToBytesCodec; + } + public Codec[] getCodecs() { - return codecs; + return byteByteCodecs; } public HashMap asMap() { @@ -123,7 +138,7 @@ public HashMap asMap() { map.put(BLOCK_SIZE_KEY, blockSize); map.put(DATA_TYPE_KEY, dataType); map.put(COMPRESSION_KEY, compression); - map.put(CODEC_KEY, codecs); // TODO : consider not adding to map when null + map.put(CODEC_KEY, concatenateCodecs()); // TODO : consider not adding to map when null return map; } @@ -173,6 +188,16 @@ private static Compression getCompressionVersion0(final String compressionVersio return null; } + private Codec[] concatenateCodecs() { + + final Codec[] allCodecs = new Codec[byteByteCodecs.length + 1]; + allCodecs[0] = arrayToBytesCodec; + for (int i = 0; i < byteByteCodecs.length; i++) + allCodecs[i + 1] = byteByteCodecs[i]; + + return allCodecs; + } + private static DatasetAttributesAdapter adapter = null; public static DatasetAttributesAdapter getJsonAdapter() { if (adapter == null) { @@ -208,9 +233,9 @@ public static class DatasetAttributesAdapter implements JsonSerializer allocateDataBlock() throws IOException { - if (start) { - readHeader(); - start = false; - } - if (mode != 2) { - return attributes.getDataType().createDataBlock(blockSize, gridPosition, numElements); - } else { - return attributes.getDataType().createDataBlock(null, gridPosition, numElements); - } + final int[] blockSize = attributes.getBlockSize(); + final int numElements = Arrays.stream(blockSize).reduce(1, (x, y) -> x * y); + return attributes.getDataType().createDataBlock(blockSize, gridPosition, numElements); } - private void readHeader() throws IOException { - final DataInputStream dis = new DataInputStream(in); - mode = dis.readShort(); - if (mode != 2) { - final int nDim = dis.readShort(); - blockSize = new int[nDim]; - for (int d = 0; d < nDim; ++d) - blockSize[d] = dis.readInt(); - if (mode == 0) { - numElements = DataBlock.getNumElements(blockSize); - } else { - numElements = dis.readInt(); - } - - } else { - numElements = dis.readInt(); - } + @Override + public DataInput getDataInput() { + + if (byteOrder.equals(ByteOrder.BIG_ENDIAN)) + return new DataInputStream(super.in); + else + return new LittleEndianDataInputStream(super.in); } }; - } - @Override public OutputStream encode(final DatasetAttributes attributes, final DataBlock dataBlock, final OutputStream out) throws IOException { + } - return new ProxyOutputStream(out) { + @Override + public DataBlockOutputStream encode(final DatasetAttributes attributes, final DataBlock dataBlock, + final OutputStream out) + throws IOException { - boolean start = true; + return new DataBlockOutputStream(out) { - @Override protected void beforeWrite(int n) throws IOException { - if (start) { - writeHeader(); - start = false; - } - } + @Override + public void beforeWrite(OutputStream rawOut) throws IOException {} - private void writeHeader() throws IOException { - final DataOutputStream dos = new DataOutputStream(out); + @Override + public DataOutput getDataOutput() { - final int mode; - if (attributes.getDataType() == DataType.OBJECT || dataBlock.getSize() == null) - mode = 2; - else if (dataBlock.getNumElements() == DataBlock.getNumElements(dataBlock.getSize())) - mode = 0; + if (byteOrder.equals(ByteOrder.BIG_ENDIAN)) + return new DataOutputStream(out); else - mode = 1; - dos.writeShort(mode); - - if (mode != 2) { - dos.writeShort(attributes.getNumDimensions()); - for (final int size : dataBlock.getSize()) - dos.writeInt(size); - } - - if (mode != 0) - dos.writeInt(dataBlock.getNumElements()); + return new LittleEndianDataOutputStream(out); } }; } @@ -164,4 +123,5 @@ public ByteOrder deserialize(JsonElement json, java.lang.reflect.Type typeOfT, } } + } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BytesCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BytesCodec.java new file mode 100644 index 00000000..fb57a4c6 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BytesCodec.java @@ -0,0 +1,153 @@ +package org.janelia.saalfeldlab.n5.codec; + +import java.io.DataInput; +import java.io.DataInputStream; +import java.io.DataOutput; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteOrder; + +import org.janelia.saalfeldlab.n5.DataBlock; +import org.janelia.saalfeldlab.n5.DataType; +import org.janelia.saalfeldlab.n5.DatasetAttributes; +import org.janelia.saalfeldlab.n5.serialization.NameConfig; + +import com.google.common.io.LittleEndianDataInputStream; +import com.google.common.io.LittleEndianDataOutputStream; + +@NameConfig.Name(value = N5BytesCodec.TYPE) +public class N5BytesCodec implements Codec.ArrayToBytes { + + private static final long serialVersionUID = 3523505403978222360L; + + public static final String TYPE = "n5bytes"; + + @NameConfig.Parameter(value = "endian", optional = true) + protected final ByteOrder byteOrder; + + public N5BytesCodec() { + + this(ByteOrder.BIG_ENDIAN); + } + + public N5BytesCodec(final ByteOrder byteOrder) { + + this.byteOrder = byteOrder; + } + + @Override public DataBlockInputStream decode(final DatasetAttributes attributes, final long[] gridPosition, InputStream in) throws IOException { + + return new DataBlockInputStream(in) { + + private short mode = -1; + private int[] blockSize = null; + private int numElements = -1; + + private boolean start = true; + + @Override protected void beforeRead(int n) throws IOException { + + if (start) { + readHeader(); + start = false; + } + } + + @Override + public DataBlock allocateDataBlock() throws IOException { + if (start) { + readHeader(); + start = false; + } + if (mode != 2) { + return attributes.getDataType().createDataBlock(blockSize, gridPosition, numElements); + } else { + return attributes.getDataType().createDataBlock(null, gridPosition, numElements); + } + } + + private void readHeader() throws IOException { + final DataInputStream dis = new DataInputStream(in); + mode = dis.readShort(); + if (mode != 2) { + final int nDim = dis.readShort(); + blockSize = new int[nDim]; + for (int d = 0; d < nDim; ++d) + blockSize[d] = dis.readInt(); + if (mode == 0) { + numElements = DataBlock.getNumElements(blockSize); + } else { + numElements = dis.readInt(); + } + + } else { + numElements = dis.readInt(); + } + } + + @Override + public DataInput getDataInput() { + + if (byteOrder.equals(ByteOrder.BIG_ENDIAN)) + return new DataInputStream(super.in); + else + return new LittleEndianDataInputStream(super.in); + } + }; + } + + @Override + public DataBlockOutputStream encode(final DatasetAttributes attributes, final DataBlock dataBlock, + final OutputStream out) + throws IOException { + + return new DataBlockOutputStream(out) { + + @Override + public void beforeWrite(OutputStream rawOut) throws IOException { + + writeHeader(rawOut); + } + + private void writeHeader(OutputStream out) throws IOException { + final DataOutputStream dos = new DataOutputStream(out); + + final int mode; + if (attributes.getDataType() == DataType.OBJECT || dataBlock.getSize() == null) + mode = 2; + else if (dataBlock.getNumElements() == DataBlock.getNumElements(dataBlock.getSize())) + mode = 0; + else + mode = 1; + dos.writeShort(mode); + + if (mode != 2) { + dos.writeShort(attributes.getNumDimensions()); + for (final int size : dataBlock.getSize()) + dos.writeInt(size); + } + + if (mode != 0) + dos.writeInt(dataBlock.getNumElements()); + } + + @Override + public DataOutput getDataOutput() { + + if (byteOrder.equals(ByteOrder.BIG_ENDIAN)) + return new DataOutputStream(out); + else + return new LittleEndianDataOutputStream(out); + } + }; + } + + @Override + public String getType() { + + return TYPE; + } + +} diff --git a/src/test/java/org/janelia/saalfeldlab/n5/AbstractN5Test.java b/src/test/java/org/janelia/saalfeldlab/n5/AbstractN5Test.java index 0d0d0123..4372ec2f 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/AbstractN5Test.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/AbstractN5Test.java @@ -256,7 +256,7 @@ public void testWriteReadByteBlockMultipleCodecs() { * maybe is not the behavior we actually want*/ try (final N5Writer n5 = createTempN5Writer()) { final Codec[] codecs = { - new BytesCodec(), + new N5BytesCodec(), new AsTypeCodec(DataType.INT32, DataType.INT8), new AsTypeCodec(DataType.INT64, DataType.INT32), }; diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java index 3479efe5..50981dc6 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java @@ -8,7 +8,7 @@ import org.janelia.saalfeldlab.n5.N5Writer; import org.janelia.saalfeldlab.n5.RawCompression; import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; -import org.janelia.saalfeldlab.n5.codec.BytesCodec; +import org.janelia.saalfeldlab.n5.codec.N5BytesCodec; import org.janelia.saalfeldlab.n5.codec.Codec; import org.janelia.saalfeldlab.n5.codec.IdentityCodec; import org.janelia.saalfeldlab.n5.codec.checksum.Crc32cChecksumCodec; @@ -72,10 +72,10 @@ public void writeReadBlockTest() { DataType.UINT8, new RawCompression(), new Codec[]{ - new BytesCodec(), + new N5BytesCodec(), new ShardingCodec( new int[]{2, 2}, - new Codec[]{new BytesCodec(), new GzipCompression(4)}, + new Codec[]{new N5BytesCodec(), new GzipCompression(4)}, new Codec[]{new Crc32cChecksumCodec()}, IndexLocation.END ) From 786e4d5ea38f7a739f3dc827bfb8e22d6c5639c9 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Wed, 4 Sep 2024 09:17:14 -0400 Subject: [PATCH 046/124] chore(pom): depend on guava --- pom.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pom.xml b/pom.xml index be2913ba..9ea763f0 100644 --- a/pom.xml +++ b/pom.xml @@ -169,6 +169,10 @@ org.apache.commons commons-compress + + com.google.guava + guava + From 2a7327a306aaa4906ba4eba7496b124dfe6502b6 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Wed, 4 Sep 2024 10:38:48 -0400 Subject: [PATCH 047/124] wip: undo ArrayToBytes codec changes --- .../saalfeldlab/n5/codec/BytesCodec.java | 43 ++++++------------- .../saalfeldlab/n5/codec/N5BytesCodec.java | 42 ++++++------------ 2 files changed, 26 insertions(+), 59 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java index 4d19ec86..42304a9e 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java @@ -1,21 +1,16 @@ package org.janelia.saalfeldlab.n5.codec; -import java.io.DataInput; -import java.io.DataInputStream; -import java.io.DataOutput; -import java.io.DataOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.nio.ByteOrder; import java.util.Arrays; +import org.apache.commons.io.output.ProxyOutputStream; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.serialization.NameConfig; -import com.google.common.io.LittleEndianDataInputStream; -import com.google.common.io.LittleEndianDataOutputStream; import com.google.gson.JsonDeserializationContext; import com.google.gson.JsonDeserializer; import com.google.gson.JsonElement; @@ -50,43 +45,31 @@ public DataBlockInputStream decode(final DatasetAttributes attributes, final lon return new DataBlockInputStream(in) { + private int[] blockSize = attributes.getBlockSize(); + private int numElements = Arrays.stream(blockSize).reduce(1, (x, y) -> { + return x * y; + }); + @Override - public DataBlock allocateDataBlock() throws IOException { - final int[] blockSize = attributes.getBlockSize(); - final int numElements = Arrays.stream(blockSize).reduce(1, (x, y) -> x * y); - return attributes.getDataType().createDataBlock(blockSize, gridPosition, numElements); - } + protected void beforeRead(int n) throws IOException {} @Override - public DataInput getDataInput() { + public DataBlock allocateDataBlock() throws IOException { - if (byteOrder.equals(ByteOrder.BIG_ENDIAN)) - return new DataInputStream(super.in); - else - return new LittleEndianDataInputStream(super.in); + return attributes.getDataType().createDataBlock(blockSize, gridPosition, numElements); } - }; + }; } @Override - public DataBlockOutputStream encode(final DatasetAttributes attributes, final DataBlock dataBlock, - final OutputStream out) + public OutputStream encode(final DatasetAttributes attributes, final DataBlock dataBlock, final OutputStream out) throws IOException { - return new DataBlockOutputStream(out) { - - @Override - public void beforeWrite(OutputStream rawOut) throws IOException {} + return new ProxyOutputStream(out) { @Override - public DataOutput getDataOutput() { - - if (byteOrder.equals(ByteOrder.BIG_ENDIAN)) - return new DataOutputStream(out); - else - return new LittleEndianDataOutputStream(out); - } + protected void beforeWrite(int n) throws IOException {} }; } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BytesCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BytesCodec.java index fb57a4c6..813d3523 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BytesCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BytesCodec.java @@ -1,22 +1,18 @@ package org.janelia.saalfeldlab.n5.codec; -import java.io.DataInput; import java.io.DataInputStream; -import java.io.DataOutput; import java.io.DataOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.nio.ByteOrder; +import org.apache.commons.io.output.ProxyOutputStream; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DataType; import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.serialization.NameConfig; -import com.google.common.io.LittleEndianDataInputStream; -import com.google.common.io.LittleEndianDataOutputStream; - @NameConfig.Name(value = N5BytesCodec.TYPE) public class N5BytesCodec implements Codec.ArrayToBytes { @@ -37,6 +33,7 @@ public N5BytesCodec(final ByteOrder byteOrder) { this.byteOrder = byteOrder; } + @Override public DataBlockInputStream decode(final DatasetAttributes attributes, final long[] gridPosition, InputStream in) throws IOException { return new DataBlockInputStream(in) { @@ -86,32 +83,28 @@ private void readHeader() throws IOException { numElements = dis.readInt(); } } - - @Override - public DataInput getDataInput() { - - if (byteOrder.equals(ByteOrder.BIG_ENDIAN)) - return new DataInputStream(super.in); - else - return new LittleEndianDataInputStream(super.in); - } }; } + @Override - public DataBlockOutputStream encode(final DatasetAttributes attributes, final DataBlock dataBlock, - final OutputStream out) + public OutputStream encode(final DatasetAttributes attributes, final DataBlock dataBlock, final OutputStream out) throws IOException { - return new DataBlockOutputStream(out) { + return new ProxyOutputStream(out) { + + boolean start = true; @Override - public void beforeWrite(OutputStream rawOut) throws IOException { + protected void beforeWrite(int n) throws IOException { - writeHeader(rawOut); + if (start) { + writeHeader(); + start = false; + } } - private void writeHeader(OutputStream out) throws IOException { + private void writeHeader() throws IOException { final DataOutputStream dos = new DataOutputStream(out); final int mode; @@ -132,15 +125,6 @@ else if (dataBlock.getNumElements() == DataBlock.getNumElements(dataBlock.getSiz if (mode != 0) dos.writeInt(dataBlock.getNumElements()); } - - @Override - public DataOutput getDataOutput() { - - if (byteOrder.equals(ByteOrder.BIG_ENDIAN)) - return new DataOutputStream(out); - else - return new LittleEndianDataOutputStream(out); - } }; } From c24605d807cfafde2b8a3ca99c238cf70848184a Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Wed, 4 Sep 2024 10:47:11 -0400 Subject: [PATCH 048/124] fix(wip): DefaultBlockReader/Writer can get ArrayToBytesCodec directly --- .../janelia/saalfeldlab/n5/DefaultBlockReader.java | 11 ++++------- .../janelia/saalfeldlab/n5/DefaultBlockWriter.java | 12 +++++------- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java index bb3f2639..6890cbc2 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java @@ -73,13 +73,10 @@ public static DataBlock readBlock( Codec.DataBlockInputStream dataBlockStream = null; InputStream stream = in; - for (Codec codec : datasetAttributes.getCodecs()) { - if (codec instanceof Codec.ArrayToBytes) { - stream = dataBlockStream = ((Codec.ArrayToBytes)codec).decode(datasetAttributes, gridPosition, stream); - } - if (codec instanceof Codec.BytesToBytes) { - stream = ((Codec.BytesToBytes)codec).decode(stream); - } + stream = dataBlockStream = datasetAttributes.getArrayToBytesCodec().decode(datasetAttributes, gridPosition, + stream); + for (final Codec codec : datasetAttributes.getCodecs()) { + stream = ((Codec.BytesToBytes)codec).decode(stream); } final DataBlock dataBlock = dataBlockStream.allocateDataBlock(); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java index 6b0e988d..df584f83 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java @@ -25,11 +25,12 @@ */ package org.janelia.saalfeldlab.n5; -import org.janelia.saalfeldlab.n5.codec.Codec; import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; +import org.janelia.saalfeldlab.n5.codec.Codec; + /** * Default implementation of {@link BlockWriter}. * @@ -70,14 +71,11 @@ public static void writeBlock( final DatasetAttributes datasetAttributes, final DataBlock dataBlock) throws IOException { - OutputStream stream = out; final Codec[] codecs = datasetAttributes.getCodecs(); - for (Codec codec : codecs) { - if (codec instanceof Codec.BytesToBytes) - stream = ((Codec.BytesToBytes)codec).encode(stream); - else if (codec instanceof Codec.ArrayToBytes) - stream = ((Codec.ArrayToBytes)codec).encode(datasetAttributes, dataBlock, stream); + stream = datasetAttributes.getArrayToBytesCodec().encode(datasetAttributes, dataBlock, stream); + for (final Codec codec : codecs) { + stream = ((Codec.BytesToBytes)codec).encode(stream); } writeFromStream(dataBlock, stream); From 5f8fa624e29df82867b774644e583932cbe1bf5a Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Wed, 4 Sep 2024 11:48:35 -0400 Subject: [PATCH 049/124] test/fix: BytesTests now works with refactor --- .../janelia/saalfeldlab/n5/codec/BytesTests.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java b/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java index e8bf64b0..fa407163 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java @@ -29,7 +29,7 @@ public void testSerialization() { final N5Writer reader = factory.openWriter("n5:src/test/resources/shardExamples/test.zarr"); final Codec bytes = reader.getAttribute("mid_sharded", "codecs[0]/configuration/codecs[0]", Codec.class); - assertTrue("as BytesCodec", bytes instanceof BytesCodec); + assertTrue("as BytesCodec", bytes instanceof N5BytesCodec); final N5Writer writer = factory.openWriter("n5:src/test/resources/shardExamples/test.n5"); @@ -39,16 +39,17 @@ public void testSerialization() { DataType.UINT8, new RawCompression(), new Codec[]{ - new IdentityCodec(), - new BytesCodec(ByteOrder.LITTLE_ENDIAN) + new N5BytesCodec(ByteOrder.LITTLE_ENDIAN), + new IdentityCodec() } ); writer.setAttribute("shard", "/", datasetAttributes); final DatasetAttributes deserialized = writer.getAttribute("shard", "/", DatasetAttributes.class); - assertEquals("2 codecs", 2, deserialized.getCodecs().length); + assertEquals("1 codecs", 1, deserialized.getCodecs().length); assertTrue("Identity", deserialized.getCodecs()[0] instanceof IdentityCodec); - assertTrue("Bytes", deserialized.getCodecs()[1] instanceof BytesCodec); - assertEquals("LittleEndian",ByteOrder.LITTLE_ENDIAN, ((BytesCodec)deserialized.getCodecs()[1]).byteOrder); + assertTrue("Bytes", deserialized.getArrayToBytesCodec() instanceof N5BytesCodec); + assertEquals("LittleEndian", ByteOrder.LITTLE_ENDIAN, + ((N5BytesCodec)deserialized.getArrayToBytesCodec()).byteOrder); } } From 784a4a2ca6228d8293d970fffb56da2642345971 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Wed, 4 Sep 2024 13:37:18 -0400 Subject: [PATCH 050/124] wip: toward supporting endianness --- .../saalfeldlab/n5/DefaultBlockReader.java | 2 +- .../saalfeldlab/n5/DefaultBlockWriter.java | 8 +++-- .../saalfeldlab/n5/codec/BytesCodec.java | 30 +++++++++++++++++-- .../janelia/saalfeldlab/n5/codec/Codec.java | 29 +++++++++++++----- .../saalfeldlab/n5/codec/N5BytesCodec.java | 29 ++++++++++++++++-- 5 files changed, 82 insertions(+), 16 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java index 6890cbc2..5df1fc01 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java @@ -80,7 +80,7 @@ public static DataBlock readBlock( } final DataBlock dataBlock = dataBlockStream.allocateDataBlock(); - readFromStream(dataBlock, stream); + dataBlock.readData(dataBlockStream.getDataInput(stream)); return dataBlock; } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java index df584f83..4c9a18e9 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java @@ -30,6 +30,8 @@ import java.nio.ByteBuffer; import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.codec.Codec.ArrayToBytes; +import org.janelia.saalfeldlab.n5.codec.Codec.DataBlockOutputStream; /** * Default implementation of {@link BlockWriter}. @@ -73,12 +75,14 @@ public static void writeBlock( OutputStream stream = out; final Codec[] codecs = datasetAttributes.getCodecs(); - stream = datasetAttributes.getArrayToBytesCodec().encode(datasetAttributes, dataBlock, stream); + final ArrayToBytes arrayToBytes = datasetAttributes.getArrayToBytesCodec(); + final DataBlockOutputStream dataBlockOutput; + stream = dataBlockOutput = arrayToBytes.encode(datasetAttributes, dataBlock, stream); for (final Codec codec : codecs) { stream = ((Codec.BytesToBytes)codec).encode(stream); } - writeFromStream(dataBlock, stream); + dataBlock.writeData(dataBlockOutput.getDataOutput(stream)); stream.flush(); //FIXME Caleb: The stream must be closed BUT it shouldn't be `writeBlock`'s responsibility. diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java index 42304a9e..8cb8e116 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java @@ -1,16 +1,21 @@ package org.janelia.saalfeldlab.n5.codec; +import java.io.DataInput; +import java.io.DataInputStream; +import java.io.DataOutput; +import java.io.DataOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.nio.ByteOrder; import java.util.Arrays; -import org.apache.commons.io.output.ProxyOutputStream; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.serialization.NameConfig; +import com.google.common.io.LittleEndianDataInputStream; +import com.google.common.io.LittleEndianDataOutputStream; import com.google.gson.JsonDeserializationContext; import com.google.gson.JsonDeserializer; import com.google.gson.JsonElement; @@ -59,17 +64,36 @@ public DataBlock allocateDataBlock() throws IOException { return attributes.getDataType().createDataBlock(blockSize, gridPosition, numElements); } + @Override + public DataInput getDataInput(final InputStream inputStream) { + + if (byteOrder.equals(ByteOrder.BIG_ENDIAN)) + return new DataInputStream(inputStream); + else + return new LittleEndianDataInputStream(inputStream); + } + }; } @Override - public OutputStream encode(final DatasetAttributes attributes, final DataBlock dataBlock, final OutputStream out) + public DataBlockOutputStream encode(final DatasetAttributes attributes, final DataBlock dataBlock, + final OutputStream out) throws IOException { - return new ProxyOutputStream(out) { + return new DataBlockOutputStream(out) { @Override protected void beforeWrite(int n) throws IOException {} + + @Override + public DataOutput getDataOutput(OutputStream outputStream) { + + if (byteOrder.equals(ByteOrder.BIG_ENDIAN)) + return new DataOutputStream(outputStream); + else + return new LittleEndianDataOutputStream(outputStream); + } }; } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java index fdd3a037..a8fdd999 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java @@ -1,16 +1,18 @@ package org.janelia.saalfeldlab.n5.codec; -import org.apache.commons.io.input.ProxyInputStream; -import org.janelia.saalfeldlab.n5.DataBlock; -import org.janelia.saalfeldlab.n5.DatasetAttributes; -import org.janelia.saalfeldlab.n5.serialization.NameConfig; - -import java.io.FilterInputStream; +import java.io.DataInput; +import java.io.DataOutput; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.Serializable; +import org.apache.commons.io.input.ProxyInputStream; +import org.apache.commons.io.output.ProxyOutputStream; +import org.janelia.saalfeldlab.n5.DataBlock; +import org.janelia.saalfeldlab.n5.DatasetAttributes; +import org.janelia.saalfeldlab.n5.serialization.NameConfig; + /** * Interface representing a filter can encode a {@link OutputStream}s when writing data, and decode * the {@link InputStream}s when reading data. @@ -58,7 +60,8 @@ interface ArrayToBytes extends Codec { * * @param datablock the datablock to encode */ - public OutputStream encode(final DatasetAttributes attributes, final DataBlock datablock, final OutputStream out) throws IOException; + public DataBlockOutputStream encode(final DatasetAttributes attributes, final DataBlock datablock, + final OutputStream out) throws IOException; } @@ -71,6 +74,18 @@ protected DataBlockInputStream(InputStream in) { } public abstract DataBlock allocateDataBlock() throws IOException; + + public abstract DataInput getDataInput(final InputStream inputStream); + } + + public abstract class DataBlockOutputStream extends ProxyOutputStream { + + protected DataBlockOutputStream(final OutputStream out) { + + super(out); + } + + public abstract DataOutput getDataOutput(final OutputStream outputStream); } public String getType(); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BytesCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BytesCodec.java index 813d3523..bdc71225 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BytesCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BytesCodec.java @@ -1,18 +1,22 @@ package org.janelia.saalfeldlab.n5.codec; +import java.io.DataInput; import java.io.DataInputStream; +import java.io.DataOutput; import java.io.DataOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.nio.ByteOrder; -import org.apache.commons.io.output.ProxyOutputStream; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DataType; import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.serialization.NameConfig; +import com.google.common.io.LittleEndianDataInputStream; +import com.google.common.io.LittleEndianDataOutputStream; + @NameConfig.Name(value = N5BytesCodec.TYPE) public class N5BytesCodec implements Codec.ArrayToBytes { @@ -83,15 +87,25 @@ private void readHeader() throws IOException { numElements = dis.readInt(); } } + + @Override + public DataInput getDataInput(final InputStream inputStream) { + + if (byteOrder.equals(ByteOrder.BIG_ENDIAN)) + return new DataInputStream(inputStream); + else + return new LittleEndianDataInputStream(inputStream); + } }; } @Override - public OutputStream encode(final DatasetAttributes attributes, final DataBlock dataBlock, final OutputStream out) + public DataBlockOutputStream encode(final DatasetAttributes attributes, final DataBlock dataBlock, + final OutputStream out) throws IOException { - return new ProxyOutputStream(out) { + return new DataBlockOutputStream(out) { boolean start = true; @@ -125,6 +139,15 @@ else if (dataBlock.getNumElements() == DataBlock.getNumElements(dataBlock.getSiz if (mode != 0) dos.writeInt(dataBlock.getNumElements()); } + + @Override + public DataOutput getDataOutput(final OutputStream outputStream) { + + if (byteOrder.equals(ByteOrder.BIG_ENDIAN)) + return new DataOutputStream(outputStream); + else + return new LittleEndianDataOutputStream(outputStream); + } }; } From 87456a42e11072cdd62d65d6c7ede21e7fb452f2 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Wed, 4 Sep 2024 13:37:37 -0400 Subject: [PATCH 051/124] wip: DatasetAttributes allow empty codecs --- src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java index d12feee1..f853b20c 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java @@ -73,7 +73,7 @@ public DatasetAttributes( if (codecs == null && !(compression instanceof RawCompression)) { byteByteCodecs = new Codec[]{compression}; arrayToBytesCodec = new N5BytesCodec(); - } else if (codecs == null) { + } else if (codecs == null || codecs.length == 0) { byteByteCodecs = new Codec[]{}; arrayToBytesCodec = new N5BytesCodec(); } else { From 7ef4297a1d32e7082b12ad497e15c1f1530b5b67 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Wed, 4 Sep 2024 14:40:46 -0400 Subject: [PATCH 052/124] style: import order --- .../saalfeldlab/n5/AbstractN5Test.java | 45 ++++++++++--------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/src/test/java/org/janelia/saalfeldlab/n5/AbstractN5Test.java b/src/test/java/org/janelia/saalfeldlab/n5/AbstractN5Test.java index 4372ec2f..939b9925 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/AbstractN5Test.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/AbstractN5Test.java @@ -25,21 +25,13 @@ */ package org.janelia.saalfeldlab.n5; -import com.google.gson.GsonBuilder; -import com.google.gson.JsonArray; -import com.google.gson.JsonElement; -import com.google.gson.JsonNull; -import com.google.gson.JsonObject; -import com.google.gson.JsonParser; -import com.google.gson.reflect.TypeToken; -import org.janelia.saalfeldlab.n5.N5Exception.N5ClassCastException; -import org.janelia.saalfeldlab.n5.N5Reader.Version; -import org.janelia.saalfeldlab.n5.codec.AsTypeCodec; -import org.janelia.saalfeldlab.n5.codec.BytesCodec; -import org.janelia.saalfeldlab.n5.codec.Codec; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; import java.io.IOException; import java.net.URISyntaxException; @@ -56,13 +48,22 @@ import java.util.concurrent.Executors; import java.util.function.Predicate; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertThrows; -import static org.junit.Assert.assertTrue; +import org.janelia.saalfeldlab.n5.N5Exception.N5ClassCastException; +import org.janelia.saalfeldlab.n5.N5Reader.Version; +import org.janelia.saalfeldlab.n5.codec.AsTypeCodec; +import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.codec.N5BytesCodec; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import com.google.gson.GsonBuilder; +import com.google.gson.JsonArray; +import com.google.gson.JsonElement; +import com.google.gson.JsonNull; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; +import com.google.gson.reflect.TypeToken; /** * Abstract base class for testing N5 functionality. From b80fef09617c76347bd8d28dedabe57c847b3b11 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Wed, 4 Sep 2024 15:08:50 -0400 Subject: [PATCH 053/124] fix: BytesTest N5BytesCodec has name "n5bytes" --- .../shardExamples/test.zarr/mid_sharded/attributes.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/resources/shardExamples/test.zarr/mid_sharded/attributes.json b/src/test/resources/shardExamples/test.zarr/mid_sharded/attributes.json index a80cb9d9..920dff92 100644 --- a/src/test/resources/shardExamples/test.zarr/mid_sharded/attributes.json +++ b/src/test/resources/shardExamples/test.zarr/mid_sharded/attributes.json @@ -32,7 +32,7 @@ ], "codecs": [ { - "name": "bytes", + "name": "n5bytes", "configuration": { "endian": "little" } From ae1f347c9d0713aadebea000912af2792a90b8aa Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Mon, 16 Sep 2024 15:05:24 -0400 Subject: [PATCH 054/124] feat: wip shard/codec support Basic working examples with file based shards writing new shards, and reading existing shards (with index at the end) --- .../janelia/saalfeldlab/n5/Compression.java | 2 +- .../saalfeldlab/n5/DatasetAttributes.java | 86 +++++++++------- .../saalfeldlab/n5/DefaultBlockReader.java | 19 ++-- .../saalfeldlab/n5/DefaultBlockWriter.java | 23 ++--- .../n5/FileSystemKeyValueAccess.java | 12 ++- .../saalfeldlab/n5/GsonKeyValueN5Writer.java | 7 +- .../saalfeldlab/n5/GzipCompression.java | 4 +- .../saalfeldlab/n5/KeyValueAccess.java | 2 +- .../saalfeldlab/n5/NameConfigAdapter.java | 4 +- .../n5/ShardedDatasetAttributes.java | 97 +++++++++++++++---- .../saalfeldlab/n5/codec/AsTypeCodec.java | 2 +- .../saalfeldlab/n5/codec/BytesCodec.java | 2 +- .../janelia/saalfeldlab/n5/codec/Codec.java | 13 ++- .../saalfeldlab/n5/codec/ComposedCodec.java | 50 ---------- .../n5/codec/DeterministicSizeCodec.java | 2 +- .../saalfeldlab/n5/codec/IdentityCodec.java | 3 +- .../{N5BytesCodec.java => N5BlockCodec.java} | 8 +- .../n5/codec/checksum/ChecksumCodec.java | 24 +++-- .../janelia/saalfeldlab/n5/shard/Shard.java | 20 +--- .../saalfeldlab/n5/shard/ShardIndex.java | 66 +++++++++---- .../saalfeldlab/n5/shard/ShardReader.java | 24 +++-- .../saalfeldlab/n5/shard/ShardWriter.java | 41 ++++---- .../saalfeldlab/n5/shard/ShardingCodec.java | 44 +++++---- .../saalfeldlab/n5/shard/VirtualShard.java | 71 +++++++------- .../saalfeldlab/n5/AbstractN5Test.java | 4 +- .../saalfeldlab/n5/codec/AsTypeTests.java | 6 +- .../saalfeldlab/n5/codec/BytesTests.java | 8 +- .../n5/serialization/CodecSerialization.java | 21 ++-- .../saalfeldlab/n5/shard/ShardDemos.java | 65 ++++++++----- 29 files changed, 409 insertions(+), 321 deletions(-) delete mode 100644 src/main/java/org/janelia/saalfeldlab/n5/codec/ComposedCodec.java rename src/main/java/org/janelia/saalfeldlab/n5/codec/{N5BytesCodec.java => N5BlockCodec.java} (95%) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/Compression.java b/src/main/java/org/janelia/saalfeldlab/n5/Compression.java index eac6a0ac..ac0c49b5 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/Compression.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/Compression.java @@ -43,7 +43,7 @@ * * @author Stephan Saalfeld */ -public interface Compression extends Serializable, Codec.BytesToBytes { +public interface Compression extends Serializable, Codec.BytesCodec { /** * Annotation for runtime discovery of compression schemes. diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java index f853b20c..1ada1708 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java @@ -6,7 +6,9 @@ import java.util.HashMap; import org.janelia.saalfeldlab.n5.codec.Codec; -import org.janelia.saalfeldlab.n5.codec.N5BytesCodec; +import org.janelia.saalfeldlab.n5.codec.Codec.ArrayCodec; +import org.janelia.saalfeldlab.n5.codec.Codec.BytesCodec; +import org.janelia.saalfeldlab.n5.codec.N5BlockCodec; import org.janelia.saalfeldlab.n5.shard.ShardingCodec; import com.google.gson.JsonDeserializationContext; @@ -56,8 +58,8 @@ public class DatasetAttributes implements Serializable { private final int[] blockSize; private final DataType dataType; private final Compression compression; - private final Codec.ArrayToBytes arrayToBytesCodec; - private final Codec[] byteByteCodecs; + private final ArrayCodec arrayCodec; + private final BytesCodec[] byteCodecs; public DatasetAttributes( final long[] dimensions, @@ -69,22 +71,37 @@ public DatasetAttributes( this.dimensions = dimensions; this.blockSize = blockSize; this.dataType = dataType; - this.compression = compression; if (codecs == null && !(compression instanceof RawCompression)) { - byteByteCodecs = new Codec[]{compression}; - arrayToBytesCodec = new N5BytesCodec(); + byteCodecs = new BytesCodec[]{compression}; + arrayCodec = new N5BlockCodec(); } else if (codecs == null || codecs.length == 0) { - byteByteCodecs = new Codec[]{}; - arrayToBytesCodec = new N5BytesCodec(); + byteCodecs = new BytesCodec[]{}; + arrayCodec = new N5BlockCodec(); } else { - if (!(codecs[0] instanceof Codec.ArrayToBytes)) - throw new N5Exception("Expected first element of codecs to be ArrayToBytes, but was: " + codecs[0]); + if (!(codecs[0] instanceof ArrayCodec)) + throw new N5Exception("Expected first element of codecs to be ArrayCodec, but was: " + codecs[0]); - arrayToBytesCodec = (Codec.ArrayToBytes)codecs[0]; - byteByteCodecs = new Codec[codecs.length - 1]; - for (int i = 0; i < byteByteCodecs.length; i++) - byteByteCodecs[i] = codecs[i + 1]; + arrayCodec = (ArrayCodec)codecs[0]; + byteCodecs = new BytesCodec[codecs.length - 1]; + for (int i = 0; i < byteCodecs.length; i++) + byteCodecs[i] = (BytesCodec)codecs[i + 1]; } + + //TODO Caleb: Do we want to do this? + this.compression = Arrays.stream(byteCodecs) + .filter(codec -> codec instanceof Compression) + .map(codec -> (Compression)codec) + .findFirst() + .orElse(compression == null ? new RawCompression() : compression); + + } + + public DatasetAttributes( + final long[] dimensions, + final int[] blockSize, + final DataType dataType, + final Codec[] codecs) { + this(dimensions, blockSize, dataType, null, codecs); } public DatasetAttributes( @@ -121,14 +138,14 @@ public DataType getDataType() { return dataType; } - public Codec.ArrayToBytes getArrayToBytesCodec() { + public ArrayCodec getArrayCodec() { - return arrayToBytesCodec; + return arrayCodec; } - public Codec[] getCodecs() { + public BytesCodec[] getCodecs() { - return byteByteCodecs; + return byteCodecs; } public HashMap asMap() { @@ -190,10 +207,10 @@ private static Compression getCompressionVersion0(final String compressionVersio private Codec[] concatenateCodecs() { - final Codec[] allCodecs = new Codec[byteByteCodecs.length + 1]; - allCodecs[0] = arrayToBytesCodec; - for (int i = 0; i < byteByteCodecs.length; i++) - allCodecs[i + 1] = byteByteCodecs[i]; + final Codec[] allCodecs = new Codec[byteCodecs.length + 1]; + allCodecs[0] = arrayCodec; + for (int i = 0; i < byteCodecs.length; i++) + allCodecs[i + 1] = byteCodecs[i]; return allCodecs; } @@ -233,19 +250,18 @@ public static class DatasetAttributesAdapter implements JsonSerializer blocksPerShard[i] * blockSize[i]); + return new ShardedDatasetAttributes( + dimensions, + shardSize, + blockSize, + dataType, + shardingCodec + ); } return new DatasetAttributes(dimensions, blockSize, dataType, compression, codecs); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java index 5df1fc01..f881aea9 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java @@ -30,7 +30,9 @@ import java.io.InputStream; import java.nio.ByteBuffer; -import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.codec.Codec.ArrayCodec; +import org.janelia.saalfeldlab.n5.codec.Codec.BytesCodec; +import org.janelia.saalfeldlab.n5.codec.Codec.DataBlockInputStream; import org.janelia.saalfeldlab.n5.shard.ShardingCodec; /** @@ -71,16 +73,19 @@ public static DataBlock readBlock( final DatasetAttributes datasetAttributes, final long[] gridPosition) throws IOException { - Codec.DataBlockInputStream dataBlockStream = null; - InputStream stream = in; - stream = dataBlockStream = datasetAttributes.getArrayToBytesCodec().decode(datasetAttributes, gridPosition, - stream); - for (final Codec codec : datasetAttributes.getCodecs()) { - stream = ((Codec.BytesToBytes)codec).decode(stream); + final BytesCodec[] codecs = datasetAttributes.getCodecs(); + final ArrayCodec arrayCodec = datasetAttributes.getArrayCodec(); + final DataBlockInputStream dataBlockStream = arrayCodec.decode(datasetAttributes, gridPosition, in); + + InputStream stream = dataBlockStream; + for (final BytesCodec codec : codecs) { + stream = codec.decode(stream); } final DataBlock dataBlock = dataBlockStream.allocateDataBlock(); dataBlock.readData(dataBlockStream.getDataInput(stream)); + stream.close(); + return dataBlock; } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java index 4c9a18e9..fd7450ef 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java @@ -29,8 +29,8 @@ import java.io.OutputStream; import java.nio.ByteBuffer; -import org.janelia.saalfeldlab.n5.codec.Codec; -import org.janelia.saalfeldlab.n5.codec.Codec.ArrayToBytes; +import org.janelia.saalfeldlab.n5.codec.Codec.ArrayCodec; +import org.janelia.saalfeldlab.n5.codec.Codec.BytesCodec; import org.janelia.saalfeldlab.n5.codec.Codec.DataBlockOutputStream; /** @@ -73,20 +73,15 @@ public static void writeBlock( final DatasetAttributes datasetAttributes, final DataBlock dataBlock) throws IOException { - OutputStream stream = out; - final Codec[] codecs = datasetAttributes.getCodecs(); - final ArrayToBytes arrayToBytes = datasetAttributes.getArrayToBytesCodec(); - final DataBlockOutputStream dataBlockOutput; - stream = dataBlockOutput = arrayToBytes.encode(datasetAttributes, dataBlock, stream); - for (final Codec codec : codecs) { - stream = ((Codec.BytesToBytes)codec).encode(stream); - } + final BytesCodec[] codecs = datasetAttributes.getCodecs(); + final ArrayCodec arrayCodec = datasetAttributes.getArrayCodec(); + final DataBlockOutputStream dataBlockOutput = arrayCodec.encode(datasetAttributes, dataBlock, out); - dataBlock.writeData(dataBlockOutput.getDataOutput(stream)); - stream.flush(); + OutputStream stream = dataBlockOutput; + for (final BytesCodec codec : codecs) + stream = codec.encode(stream); - //FIXME Caleb: The stream must be closed BUT it shouldn't be `writeBlock`'s responsibility. - // Whoever opens the stream should close it + dataBlock.writeData(dataBlockOutput.getDataOutput(stream)); stream.close(); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java b/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java index 54aacf80..ffed4c1e 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java @@ -25,6 +25,8 @@ */ package org.janelia.saalfeldlab.n5; +import org.apache.commons.io.input.BoundedInputStream; + import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -73,6 +75,8 @@ protected class LockedFileChannel implements LockedChannel { protected final boolean truncate; + protected long len; + protected LockedFileChannel(final String path, final boolean readOnly) throws IOException { this(fileSystem.getPath(path), readOnly, 0, Long.MAX_VALUE); @@ -93,7 +97,7 @@ protected LockedFileChannel(final Path path, final boolean readOnly, final long final long start = startByte < 0 ? 0L : startByte; - final long len = size < 0 ? Long.MAX_VALUE : size; + len = size < 0 ? Long.MAX_VALUE : size; //TODO Caleb: How does this handle if manually overwriting the entire file? (e.g. len > file size) truncate = (start == 0 && len == Long.MAX_VALUE); @@ -159,7 +163,7 @@ public Writer newWriter() throws IOException { @Override public InputStream newInputStream() throws IOException { - return Channels.newInputStream(channel); + return new BoundedInputStream(Channels.newInputStream(channel), len); } @Override @@ -201,11 +205,11 @@ public LockedFileChannel lockForReading(final String normalPath) throws IOExcept } @Override - public LockedFileChannel lockForReading(final String normalPath, final long startByte, final long endByte) + public LockedFileChannel lockForReading(final String normalPath, final long startByte, final long size) throws IOException { try { - return new LockedFileChannel(normalPath, true, startByte, endByte); + return new LockedFileChannel(normalPath, true, startByte, size); } catch (final NoSuchFileException e) { throw new N5Exception.N5NoSuchKeyException("No such file", e); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java index 2545853e..60ce9299 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java @@ -26,6 +26,7 @@ package org.janelia.saalfeldlab.n5; import java.io.IOException; +import java.io.OutputStream; import java.io.UncheckedIOException; import java.util.Arrays; import java.util.List; @@ -222,7 +223,7 @@ default void writeBlock( if (datasetAttributes instanceof ShardedDatasetAttributes) { ShardedDatasetAttributes shardDatasetAttrs = (ShardedDatasetAttributes)datasetAttributes; final long[] shardPos = shardDatasetAttrs.getShardPositionForBlock(dataBlock.getGridPosition()); - final String shardPath = absoluteShardPath(N5URI.normalizeGroupPath(path), dataBlock.getGridPosition()); + final String shardPath = absoluteShardPath(N5URI.normalizeGroupPath(path), shardPos); final VirtualShard shard = new VirtualShard<>(shardDatasetAttrs, shardPos, getKeyValueAccess(), shardPath); shard.writeBlock(dataBlock); return; @@ -230,7 +231,9 @@ default void writeBlock( final String blockPath = absoluteDataBlockPath(N5URI.normalizeGroupPath(path), dataBlock.getGridPosition()); try (final LockedChannel lock = getKeyValueAccess().lockForWriting(blockPath)) { - DefaultBlockWriter.writeBlock(lock.newOutputStream(), datasetAttributes, dataBlock); + try ( final OutputStream out = lock.newOutputStream()) { + DefaultBlockWriter.writeBlock(out, datasetAttributes, dataBlock); + } } catch (final IOException | UncheckedIOException e) { throw new N5IOException( "Failed to write block " + Arrays.toString(dataBlock.getGridPosition()) + " into dataset " + path, diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GzipCompression.java b/src/main/java/org/janelia/saalfeldlab/n5/GzipCompression.java index ecaf2f0e..3091ad28 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GzipCompression.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GzipCompression.java @@ -48,11 +48,13 @@ public class GzipCompression implements DefaultBlockReader, DefaultBlockWriter, @CompressionParameter @NameConfig.Parameter //TODO Caleb: How to handle serialization of parameter-less constructor. - // For N5, default is -1, for zarr, range is 0-9 and is required. + // For N5 the default is -1. + // For zarr the range is 0-9 and is required. // How to map -1 to some default (1?) when serializing to zarr? private final int level; @CompressionParameter + @NameConfig.Parameter private final boolean useZlib; private final transient GzipParameters parameters = new GzipParameters(); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/KeyValueAccess.java b/src/main/java/org/janelia/saalfeldlab/n5/KeyValueAccess.java index a4fa42b8..138bb73c 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/KeyValueAccess.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/KeyValueAccess.java @@ -157,7 +157,7 @@ public default String compose(final URI uri, final String... components) { */ public LockedChannel lockForReading(final String normalPath) throws IOException; - public LockedChannel lockForReading(String normalPath, final long startByte, final long endByte) + public LockedChannel lockForReading(String normalPath, final long startByte, final long size) throws IOException; /** diff --git a/src/main/java/org/janelia/saalfeldlab/n5/NameConfigAdapter.java b/src/main/java/org/janelia/saalfeldlab/n5/NameConfigAdapter.java index 2cb72463..30e45d80 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/NameConfigAdapter.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/NameConfigAdapter.java @@ -58,7 +58,7 @@ public class NameConfigAdapter implements JsonDeserializer, JsonSerializer private static void registerAdapter(Class cls) { - adapters.put(cls, new NameConfigAdapter(cls)); + adapters.put(cls, new NameConfigAdapter<>(cls)); update(adapters.get(cls)); } private final HashMap> constructors = new HashMap<>(); @@ -77,6 +77,7 @@ private static ArrayList getDeclaredFields(Class clazz) { @SuppressWarnings("unchecked") public static synchronized void update(final NameConfigAdapter adapter) { + final String prefix = adapter.type.getAnnotation(NameConfig.Prefix.class).value(); final ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); final Index annotationIndex = Index.load(NameConfig.Name.class, classLoader); for (final IndexItem item : annotationIndex) { @@ -84,7 +85,6 @@ public static synchronized void update(final NameConfigAdapter adapter) { try { clazz = (Class)Class.forName(item.className()); final String name = clazz.getAnnotation(NameConfig.Name.class).value(); - final String prefix = adapter.type.getAnnotation(NameConfig.Prefix.class).value(); final String type = prefix + "." + name; final Constructor constructor = clazz.getDeclaredConstructor(); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java index b74002f8..c72bad29 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java @@ -3,6 +3,10 @@ import java.util.Arrays; import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.codec.Codec.ArrayCodec; +import org.janelia.saalfeldlab.n5.codec.Codec.BytesCodec; +import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; +import org.janelia.saalfeldlab.n5.shard.ShardIndex; import org.janelia.saalfeldlab.n5.shard.ShardingCodec; import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; @@ -12,22 +16,51 @@ public class ShardedDatasetAttributes extends DatasetAttributes { private final int[] shardSize; - private final IndexLocation indexLocation; + private final ShardingCodec shardingCodec; - public ShardedDatasetAttributes( + + public ShardedDatasetAttributes ( final long[] dimensions, - final int[] shardSize, - final int[] blockSize, - final IndexLocation shardIndexLocation, + final int[] shardSize, //in pixels + final int[] blockSize, //in pixels final DataType dataType, - final Compression compression, - final Codec[] codecs) { + final Codec[] blocksCodecs, + final DeterministicSizeCodec[] indexCodecs, + final IndexLocation indexLocation + ) { + super(dimensions, blockSize, dataType, null, blocksCodecs); + this.shardSize = shardSize; + this.shardingCodec = new ShardingCodec( + blockSize, + blocksCodecs, + indexCodecs, + indexLocation + ); + } - super(dimensions, blockSize, dataType, compression, codecs); + public ShardedDatasetAttributes( + final long[] dimensions, + final int[] shardSize, //in pixels + final int[] blockSize, //in pixels + final DataType dataType, + final ShardingCodec codec) { + super(dimensions, blockSize, dataType, null, codec.getCodecs()); this.shardSize = shardSize; - this.indexLocation = shardIndexLocation; + this.shardingCodec = codec; + } - // TODO figure out codecs + public ShardingCodec getShardingCodec() { + return shardingCodec; + } + + @Override public ArrayCodec getArrayCodec() { + + return shardingCodec.getArrayCodec(); + } + + @Override public BytesCodec[] getCodecs() { + + return shardingCodec.getCodecs(); } public int[] getShardSize() { @@ -36,9 +69,9 @@ public int[] getShardSize() { } /** - * Returns the number of blocks a shard contains along all dimensions. + * Returns the number of shards per dimension for the dataset. * - * @return the size of the block grid of a shard + * @return the size of the shard grid of a dataset */ public int[] getShardBlockGridSize() { @@ -51,6 +84,22 @@ public int[] getShardBlockGridSize() { return shardBlockGridSize; } + /** + * Returns the number of blocks per dimension for a shard. + * + * @return the size of the block grid of a shard + */ + public int[] getBlocksPerShard() { + + final int nd = getNumDimensions(); + final int[] blocksPerShard = new int[nd]; + final int[] blockSize = getBlockSize(); + for (int i = 0; i < nd; i++) + blocksPerShard[i] = getShardSize()[i] / blockSize[i]; + + return blocksPerShard; + } + /** * Given a block's position relative to the array, returns the position of the shard containing that block relative to the shard grid. * @@ -61,10 +110,10 @@ public int[] getShardBlockGridSize() { public long[] getShardPositionForBlock(final long... blockGridPosition) { // TODO have this return a shard - final int[] shardBlockDimensions = getShardBlockGridSize(); + final int[] blocksPerShard = getBlocksPerShard(); final long[] shardGridPosition = new long[blockGridPosition.length]; for (int i = 0; i < shardGridPosition.length; i++) { - shardGridPosition[i] = (int)Math.floor((double)blockGridPosition[i] / shardBlockDimensions[i]); + shardGridPosition[i] = (int)Math.floor((double)blockGridPosition[i] / blocksPerShard[i]); } return shardGridPosition; @@ -75,7 +124,7 @@ public long[] getShardPositionForBlock(final long... blockGridPosition) { * * @return the shard position */ - public int[] getBlockPositionInShard(final long[] shardPosition, final long[] blockPosition) { + public long[] getBlockPositionInShard(final long[] shardPosition, final long[] blockPosition) { final long[] shardPos = getShardPositionForBlock(blockPosition); if (!Arrays.equals(shardPosition, shardPos)) @@ -85,7 +134,7 @@ public int[] getBlockPositionInShard(final long[] shardPosition, final long[] bl final int[] blkSize = getBlockSize(); final int[] blkGridSize = getShardBlockGridSize(); - final int[] blockShardPos = new int[shardSize.length]; + final long[] blockShardPos = new long[shardSize.length]; for (int i = 0; i < shardSize.length; i++) { final long shardP = shardPos[i] * shardSize[i]; final long blockP = blockPosition[i] * blkSize[i]; @@ -100,7 +149,7 @@ public int[] getBlockPositionInShard(final long[] shardPosition, final long[] bl */ public long getNumBlocks() { - return Arrays.stream(getShardBlockGridSize()).reduce(1, (x, y) -> x * y); + return Arrays.stream(getBlocksPerShard()).reduce(1, (x, y) -> x * y); } public static int[] getBlockSize(Codec[] codecs) { @@ -114,6 +163,18 @@ public static int[] getBlockSize(Codec[] codecs) { public IndexLocation getIndexLocation() { - return indexLocation; + return getShardingCodec().getIndexLocation(); + } + + public ShardIndex createIndex() { + return new ShardIndex(getBlocksPerShard(), getShardingCodec().getIndexCodecs()); + } + + public DatasetAttributes getIndexAttributes() { + return createShardIndexAttributes(getShardingCodec().getIndexCodecs()); + } + + private static DatasetAttributes createShardIndexAttributes(Codec[] indexCodecs) { + return new DatasetAttributes(null, null, null, null, indexCodecs); } } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java index c3f8e69b..e8883c75 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/AsTypeCodec.java @@ -10,7 +10,7 @@ import org.janelia.saalfeldlab.n5.serialization.NameConfig; @NameConfig.Name(AsTypeCodec.TYPE) -public class AsTypeCodec implements Codec.BytesToBytes { +public class AsTypeCodec implements Codec.BytesCodec { private static final long serialVersionUID = 1031322606191894484L; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java index 8cb8e116..76b015cf 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java @@ -25,7 +25,7 @@ import com.google.gson.JsonSerializer; @NameConfig.Name(value = BytesCodec.TYPE) -public class BytesCodec implements Codec.ArrayToBytes { +public class BytesCodec implements Codec.ArrayCodec { private static final long serialVersionUID = 3282569607795127005L; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java index a8fdd999..a78df016 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java @@ -23,7 +23,7 @@ @NameConfig.Prefix("codec") public interface Codec extends Serializable { - public interface BytesToBytes extends Codec { + public interface BytesCodec extends Codec { /** * Decode an {@link InputStream}. @@ -44,7 +44,7 @@ public interface BytesToBytes extends Codec { public OutputStream encode(final OutputStream out) throws IOException; } - interface ArrayToBytes extends Codec { + interface ArrayCodec extends DeterministicSizeCodec { /** * Decode an {@link InputStream}. @@ -63,6 +63,15 @@ interface ArrayToBytes extends Codec { public DataBlockOutputStream encode(final DatasetAttributes attributes, final DataBlock datablock, final OutputStream out) throws IOException; + @Override default long encodedSize(long size) { + + return size; + } + + @Override default long decodedSize(long size) { + + return size; + } } public abstract class DataBlockInputStream extends ProxyInputStream { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/ComposedCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/ComposedCodec.java deleted file mode 100644 index de720bbc..00000000 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/ComposedCodec.java +++ /dev/null @@ -1,50 +0,0 @@ -package org.janelia.saalfeldlab.n5.codec; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; - -/** - * A {@link Codec} that is composition of a collection of codecs. - */ -public class ComposedCodec implements Codec.BytesToBytes { //TODO Caleb: Remove? - - private static final long serialVersionUID = 5068349140842235924L; - - protected static final String TYPE = "composed"; - - private final Codec[] codecs; - - public ComposedCodec(final Codec... codec) { - - this.codecs = codec; - } - - @Override - public String getType() { - - return TYPE; - } - - @Override - public InputStream decode(InputStream in) throws IOException { - - // note that decoding is in reverse order - InputStream decoded = in; - for (int i = codecs.length - 1; i >= 0; i--){} -// decoded = codecs[i].decode(decoded); - - return decoded; - } - - @Override - public OutputStream encode(OutputStream out) throws IOException { - - OutputStream encoded = out; - for (int i = 0; i < codecs.length; i++){} -// encoded = codecs[i].encode(encoded); - - return encoded; - } - -} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/DeterministicSizeCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/DeterministicSizeCodec.java index b0288adf..9ac0a1fe 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/DeterministicSizeCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/DeterministicSizeCodec.java @@ -4,7 +4,7 @@ * A {@link Codec} that can deterministically determine the size of encoded data from the size of the raw data and vice versa from the data length alone (i.e. encoding is data * independent). */ -public interface DeterministicSizeCodec extends Codec.BytesToBytes { +public interface DeterministicSizeCodec extends Codec { public abstract long encodedSize(long size); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java index b308d31b..93a384dd 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/IdentityCodec.java @@ -7,8 +7,7 @@ import org.janelia.saalfeldlab.n5.serialization.NameConfig; @NameConfig.Name(IdentityCodec.TYPE) -@NameConfig.Prefix("codec") -public class IdentityCodec implements Codec.BytesToBytes { +public class IdentityCodec implements Codec.BytesCodec { private static final long serialVersionUID = 8354269325800855621L; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BytesCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BlockCodec.java similarity index 95% rename from src/main/java/org/janelia/saalfeldlab/n5/codec/N5BytesCodec.java rename to src/main/java/org/janelia/saalfeldlab/n5/codec/N5BlockCodec.java index bdc71225..c4866fa1 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BytesCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BlockCodec.java @@ -17,8 +17,8 @@ import com.google.common.io.LittleEndianDataInputStream; import com.google.common.io.LittleEndianDataOutputStream; -@NameConfig.Name(value = N5BytesCodec.TYPE) -public class N5BytesCodec implements Codec.ArrayToBytes { +@NameConfig.Name(value = N5BlockCodec.TYPE) +public class N5BlockCodec implements Codec.ArrayCodec { private static final long serialVersionUID = 3523505403978222360L; @@ -27,12 +27,12 @@ public class N5BytesCodec implements Codec.ArrayToBytes { @NameConfig.Parameter(value = "endian", optional = true) protected final ByteOrder byteOrder; - public N5BytesCodec() { + public N5BlockCodec() { this(ByteOrder.BIG_ENDIAN); } - public N5BytesCodec(final ByteOrder byteOrder) { + public N5BlockCodec(final ByteOrder byteOrder) { this.byteOrder = byteOrder; } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumCodec.java index 5cf83cf0..7d7a58fb 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/checksum/ChecksumCodec.java @@ -8,12 +8,14 @@ import java.util.zip.CheckedOutputStream; import java.util.zip.Checksum; +import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.codec.Codec.BytesCodec; import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; /** * A {@link Codec} that appends a checksum to data when encoding and can validate against that checksum when decoding. */ -public abstract class ChecksumCodec implements DeterministicSizeCodec { +public abstract class ChecksumCodec implements BytesCodec, DeterministicSizeCodec { private static final long serialVersionUID = 3141427377277375077L; @@ -41,14 +43,18 @@ public int numChecksumBytes() { public CheckedOutputStream encode(final OutputStream out) throws IOException { // when do we validate? - return new CheckedOutputStream(out, getChecksum()); - } - - public void encode(final OutputStream out, ByteBuffer buffer) throws IOException { - - final CheckedOutputStream cout = new CheckedOutputStream(out, getChecksum()); - cout.write(buffer.array()); - writeChecksum(out); + return new CheckedOutputStream(out, getChecksum()) { + + private boolean closed = false; + @Override public void close() throws IOException { + + if (!closed) { + writeChecksum(out); + closed = true; + out.close(); + } + } + }; } @Override diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java index 43a90ca6..d6d009dd 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java @@ -60,22 +60,8 @@ default int[] getBlockGridSize() { */ default long[] getBlockPosition(long... blockPosition) { - final long[] shardPos = getShard(blockPosition); - if (!Arrays.equals(getGridPosition(), shardPos)) - return null; - - final int[] shardSize = getSize(); - final int[] blkSize = getBlockSize(); - final int[] blkGridSize = getBlockGridSize(); - - final long[] blockShardPos = new long[shardSize.length]; - for (int i = 0; i < shardSize.length; i++) { - final long shardP = shardPos[i] * shardSize[i]; - final long blockP = blockPosition[i] * blkSize[i]; - blockShardPos[i] = (int)((blockP - shardP) / blkGridSize[i]); - } - - return blockShardPos; + final long[] shardPos = getDatasetAttributes().getShardPositionForBlock(blockPosition); + return getDatasetAttributes().getBlockPositionInShard(shardPos, blockPosition); } /** @@ -111,7 +97,7 @@ public static Shard createEmpty(final ShardedDatasetAttributes attributes final long[] emptyIndex = new long[(int)(2 * attributes.getNumBlocks())]; Arrays.fill(emptyIndex, EMPTY_INDEX_NBYTES); - final ShardIndex shardIndex = new ShardIndex(attributes.getShardBlockGridSize(), emptyIndex); + final ShardIndex shardIndex = new ShardIndex(attributes.getBlocksPerShard(), emptyIndex); return new InMemoryShard(attributes, shardPosition, shardIndex); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java index 31f51049..36ad5cd7 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java @@ -20,6 +20,8 @@ import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; import org.janelia.saalfeldlab.n5.RawCompression; import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; +import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; public class ShardIndex extends LongArrayDataBlock { @@ -30,14 +32,20 @@ public class ShardIndex extends LongArrayDataBlock { private static final long[] DUMMY_GRID_POSITION = null; - public ShardIndex(int[] shardBlockGridSize, long[] data) { + private long byteOffset = -1; + + private final DeterministicSizeCodec[] codecs; + + public ShardIndex(int[] shardBlockGridSize, long[] data, final DeterministicSizeCodec... codecs) { super(prepend(LONGS_PER_BLOCK, shardBlockGridSize), DUMMY_GRID_POSITION, data); + this.codecs = codecs; } - public ShardIndex(int[] shardBlockGridSize) { + public ShardIndex(int[] shardBlockGridSize, final DeterministicSizeCodec... codecs) { super(prepend(LONGS_PER_BLOCK, shardBlockGridSize), DUMMY_GRID_POSITION, emptyIndexData(shardBlockGridSize)); + this.codecs = codecs; } public boolean exists(long... gridPosition) { @@ -77,28 +85,46 @@ private int getOffsetIndex(long... gridPosition) { private int getNumBytesIndex(long... gridPosition) { - return getOffsetIndex() + 1; + return getOffsetIndex(gridPosition) + 1; } - public static ShardIndex read(final KeyValueAccess keyValueAccess, final String key, - final ShardedDatasetAttributes datasetAttributes) throws IOException { + public static ShardIndex read( + final KeyValueAccess keyValueAccess, + final String key, + final ShardedDatasetAttributes datasetAttributes + ) throws IOException { - return read(keyValueAccess, key, datasetAttributes.getShardBlockGridSize(), datasetAttributes.getIndexLocation()); + final IndexLocation indexLocation = datasetAttributes.getIndexLocation(); + return read(keyValueAccess, key, datasetAttributes.createIndex(), indexLocation); + } + + public long numBytes() { + + final int numEntries = Arrays.stream(getSize()).reduce(1, (x, y) -> x * y); + final int numBytesFromBlocks = numEntries * BYTES_PER_LONG; + long totalNumBytes = numBytesFromBlocks; + for (Codec codec : codecs) { + if (codec instanceof DeterministicSizeCodec) { + totalNumBytes = ((DeterministicSizeCodec)codec).encodedSize(totalNumBytes); + } + } + return totalNumBytes; } public static ShardIndex read( final KeyValueAccess keyValueAccess, final String key, - final int[] shardBlockGridSize, - final IndexLocation indexLocation) throws IOException { + final ShardIndex idx, + final IndexLocation indexLocation + ) throws IOException { - final ShardIndex idx = new ShardIndex(shardBlockGridSize); - final IndexByteBounds byteBounds = byteBounds(idx.getSize(), indexLocation, keyValueAccess.size(key)); + final IndexByteBounds byteBounds = byteBounds(idx.numBytes(), indexLocation, keyValueAccess.size(key)); + idx.byteOffset = byteBounds.start; try (final LockedChannel lockedChannel = keyValueAccess.lockForReading(key, byteBounds.start, byteBounds.end)) { final byte[] bytes = new byte[idx.getNumElements() * ShardIndex.BYTES_PER_LONG]; lockedChannel.newInputStream().read(bytes); - idx.readData(ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN)); // TODO generalize byte order + idx.readData(ByteBuffer.wrap(bytes).order(ByteOrder.BIG_ENDIAN)); // TODO generalize byte order return idx; } catch (final N5Exception.N5NoSuchKeyException e) { @@ -114,7 +140,7 @@ public static void write(ShardIndex index, final int[] shardBlockGridSize, final IndexLocation indexLocation) throws IOException { - final IndexByteBounds byteBounds = byteBounds(index.getSize(), indexLocation, keyValueAccess.size(key)); + final IndexByteBounds byteBounds = byteBounds(index.numBytes(), indexLocation, keyValueAccess.size(key)); try (final LockedChannel lockedChannel = keyValueAccess.lockForWriting(key, byteBounds.start, byteBounds.end)) { final OutputStream os = lockedChannel.newOutputStream(); @@ -139,21 +165,23 @@ public static DatasetAttributes indexDatasetAttributes(final int[] indexBlockSiz public static IndexByteBounds byteBounds(ShardedDatasetAttributes datasetAttributes, final long objectSize) { - final int[] indexShape = prepend(2, datasetAttributes.getShardBlockGridSize()); - return byteBounds(indexShape, datasetAttributes.getIndexLocation(), objectSize); + final long indexSize = datasetAttributes.createIndex().numBytes(); + return byteBounds(indexSize, datasetAttributes.getIndexLocation(), objectSize); } - public static IndexByteBounds byteBounds(final int[] indexShape, final IndexLocation indexLocation, final long objectSize) { - - final int indexSize = (int)Arrays.stream(indexShape).reduce(1, (x, y) -> x * y); + public static IndexByteBounds byteBounds(final long indexSize, final IndexLocation indexLocation, final long objectSize) { if (indexLocation == IndexLocation.START) { return new IndexByteBounds(0L, indexSize); } else { - return new IndexByteBounds(objectSize - (BYTES_PER_LONG * indexSize), objectSize - 1); + return new IndexByteBounds(objectSize - indexSize, objectSize - 1); } } + public long getByteOffset() { + return byteOffset; + } + private static class IndexByteBounds { private final long start; @@ -170,7 +198,7 @@ public static ShardIndex read(FileChannel channel, ShardedDatasetAttributes data // TODO need codecs // TODO FileChannel is too specific - generalize - final int[] indexShape = prepend(2, datasetAttributes.getShardBlockGridSize()); + final int[] indexShape = prepend(2, datasetAttributes.getBlocksPerShard()); final int indexSize = (int)Arrays.stream(indexShape).reduce(1, (x, y) -> x * y); final int indexBytes = BYTES_PER_LONG * indexSize; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java index 4584d1db..edab4ec0 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java @@ -5,10 +5,11 @@ import org.janelia.saalfeldlab.n5.DefaultBlockReader; import org.janelia.saalfeldlab.n5.N5FSReader; import org.janelia.saalfeldlab.n5.N5Reader; -import org.janelia.saalfeldlab.n5.RawCompression; import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; import org.janelia.saalfeldlab.n5.codec.IdentityCodec; +import org.janelia.saalfeldlab.n5.codec.N5BlockCodec; import org.janelia.saalfeldlab.n5.codec.checksum.Crc32cChecksumCodec; import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; @@ -50,7 +51,7 @@ public DataBlock readBlock( private long getIndexIndex(long... shardPosition) { - final int[] indexDimensions = datasetAttributes.getShardBlockGridSize(); + final int[] indexDimensions = datasetAttributes.getBlocksPerShard(); long idx = 0; for (int i = 0; i < indexDimensions.length; i++) { idx += shardPosition[i] * indexDimensions[i]; @@ -76,17 +77,14 @@ public static void main(String[] args) { private static ShardedDatasetAttributes buildTestAttributes() { - final Codec[] codecs = new Codec[]{ - new IdentityCodec(), - new ShardingCodec( - new int[]{2, 2}, - new Codec[]{new RawCompression(), new IdentityCodec()}, - new Codec[]{new Crc32cChecksumCodec()}, - IndexLocation.END - ) - }; - - return new ShardedDatasetAttributes(new long[]{4, 4}, new int[]{2, 2}, new int[]{2, 2}, IndexLocation.END, DataType.INT32, new RawCompression(), codecs); + return new ShardedDatasetAttributes( + new long[]{4, 4}, + new int[]{2, 2}, + new int[]{2, 2}, + DataType.INT32, + new Codec[]{new N5BlockCodec(), new IdentityCodec()}, + new DeterministicSizeCodec[]{new Crc32cChecksumCodec()}, + IndexLocation.END); } } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java index 1c18e564..4a288878 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java @@ -1,5 +1,9 @@ package org.janelia.saalfeldlab.n5.shard; +import org.janelia.saalfeldlab.n5.DataBlock; +import org.janelia.saalfeldlab.n5.DefaultBlockWriter; +import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; + import java.io.ByteArrayOutputStream; import java.io.DataOutputStream; import java.io.IOException; @@ -9,10 +13,6 @@ import java.util.Arrays; import java.util.List; -import org.janelia.saalfeldlab.n5.DataBlock; -import org.janelia.saalfeldlab.n5.DefaultBlockWriter; -import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; - public class ShardWriter { private static final int BYTES_PER_LONG = 8; @@ -77,21 +77,22 @@ private void prepareForWritingDataBlock() throws IOException { // final ShardingProperties shardProps = new ShardingProperties(datasetAttributes); // indexData = new ShardIndexDataBlock(shardProps.getIndexDimensions()); - indexData = new ShardIndex(new int[]{blocks.size()}); + indexData = datasetAttributes.createIndex(); blockBytes = new ArrayList<>(); long cumulativeBytes = 0; final long[] shardPosition = new long[1]; for (int i = 0; i < blocks.size(); i++) { - final ByteArrayOutputStream blockOut = new ByteArrayOutputStream(); - DefaultBlockWriter.writeBlock(blockOut, datasetAttributes, blocks.get(i)); - System.out.println(String.format("block %d is %d bytes", i, blockOut.size())); + try (final ByteArrayOutputStream blockOut = new ByteArrayOutputStream()) { + DefaultBlockWriter.writeBlock(blockOut, datasetAttributes, blocks.get(i)); + System.out.println(String.format("block %d is %d bytes", i, blockOut.size())); - shardPosition[0] = i; - indexData.set(cumulativeBytes, blockOut.size(), shardPosition); - cumulativeBytes += blockOut.size(); + shardPosition[0] = i; + indexData.set(cumulativeBytes, blockOut.size(), shardPosition); + cumulativeBytes += blockOut.size(); - blockBytes.add(blockOut.toByteArray()); + blockBytes.add(blockOut.toByteArray()); + } } System.out.println(Arrays.toString(indexData.getData())); @@ -105,15 +106,17 @@ private void prepareForWriting() throws IOException { long cumulativeBytes = 0; for (int i = 0; i < blocks.size(); i++) { - final ByteArrayOutputStream blockOut = new ByteArrayOutputStream(); - DefaultBlockWriter.writeBlock(blockOut, datasetAttributes, blocks.get(i)); - System.out.println(String.format("block %d is %d bytes", i, blockOut.size())); + try (final ByteArrayOutputStream blockOut = new ByteArrayOutputStream()) { + + DefaultBlockWriter.writeBlock(blockOut, datasetAttributes, blocks.get(i)); + System.out.println(String.format("block %d is %d bytes", i, blockOut.size())); - blockIndexes.putLong(cumulativeBytes); - blockSizes.putLong(blockOut.size()); - cumulativeBytes += blockOut.size(); + blockIndexes.putLong(cumulativeBytes); + blockSizes.putLong(blockOut.size()); + cumulativeBytes += blockOut.size(); - blockBytes.add(blockOut.toByteArray()); + blockBytes.add(blockOut.toByteArray()); + } } } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java index dc5392d4..af06ec0e 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java @@ -7,7 +7,10 @@ import com.google.gson.JsonPrimitive; import com.google.gson.JsonSerializationContext; import com.google.gson.JsonSerializer; +import org.janelia.saalfeldlab.n5.DataBlock; +import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; import org.janelia.saalfeldlab.n5.serialization.NameConfig; import java.io.IOException; @@ -16,7 +19,7 @@ import java.lang.reflect.Type; @NameConfig.Name(ShardingCodec.TYPE) -public class ShardingCodec implements Codec.BytesToBytes { //TODO Caleb: should be ArrayToBytes +public class ShardingCodec implements Codec.ArrayCodec { private static final long serialVersionUID = -5879797314954717810L; @@ -28,7 +31,7 @@ public class ShardingCodec implements Codec.BytesToBytes { //TODO Caleb: should public static final String INDEX_CODECS_KEY = "index_codecs"; public enum IndexLocation { - START, END + START, END; } @NameConfig.Parameter(CHUNK_SHAPE_KEY) @@ -38,7 +41,7 @@ public enum IndexLocation { private final Codec[] codecs; @NameConfig.Parameter(INDEX_CODECS_KEY) - private final Codec[] indexCodecs; + private final DeterministicSizeCodec[] indexCodecs; @NameConfig.Parameter(INDEX_LOCATION_KEY) private final IndexLocation indexLocation; @@ -54,7 +57,7 @@ private ShardingCodec() { public ShardingCodec( final int[] blockSize, final Codec[] codecs, - final Codec[] indexCodecs, + final DeterministicSizeCodec[] indexCodecs, final IndexLocation indexLocation) { this.blockSize = blockSize; @@ -73,25 +76,31 @@ public IndexLocation getIndexLocation() { return indexLocation; } - @Override - public InputStream decode(InputStream in) throws IOException { + public ArrayCodec getArrayCodec() { - // TODO Auto-generated method stub - // This method actually makes no sense for a sharding codec - return in; + return (Codec.ArrayCodec)codecs[0]; } - @Override - public OutputStream encode(OutputStream out) throws IOException { + public BytesCodec[] getCodecs() { + + final BytesCodec[] bytesCodecs = new BytesCodec[codecs.length - 1]; + System.arraycopy(codecs, 1, bytesCodecs, 0, bytesCodecs.length); + return bytesCodecs; + } + + public DeterministicSizeCodec[] getIndexCodecs() { + + return indexCodecs; + } + + @Override public DataBlockInputStream decode(DatasetAttributes attributes, long[] gridPosition, InputStream in) throws IOException { - // TODO Auto-generated method stub - // This method actually makes no sense for a sharding codec - return out; + return getArrayCodec().decode(attributes, gridPosition, in); } - public static boolean isShardingCodec(final Codec codec) { + @Override public DataBlockOutputStream encode(DatasetAttributes attributes, DataBlock datablock, OutputStream out) throws IOException { - return codec instanceof ShardingCodec; + return getArrayCodec().encode(attributes, datablock, out); } @Override @@ -106,7 +115,8 @@ public static class IndexLocationAdapter implements JsonSerializer extends AbstractShard { - private KeyValueAccess keyValueAccess; - private String path; + final private KeyValueAccess keyValueAccess; + final private String path; public VirtualShard(final ShardedDatasetAttributes datasetAttributes, long[] gridPosition, final KeyValueAccess keyValueAccess, final String path) { @@ -38,18 +40,15 @@ public DataBlock getBlock(long... blockGridPosition) { final ShardIndex idx = getIndex(); final long startByte = idx.getOffset(relativePosition); - final long endByte = startByte + idx.getNumBytes(relativePosition); - try (final LockedChannel lockedChannel = keyValueAccess.lockForReading(path, startByte, endByte)) { - // TODO add codecs, generalize to use any BlockReader - final DataBlock dataBlock = (DataBlock)datasetAttributes.getDataType().createDataBlock( - datasetAttributes.getBlockSize(), - blockGridPosition, - numBlockElements(datasetAttributes)); - - DefaultBlockReader.readFromStream(dataBlock, lockedChannel.newInputStream()); - return dataBlock; + if (startByte == Shard.EMPTY_INDEX_NBYTES ) + return null; + final long size = idx.getNumBytes(relativePosition); + try (final LockedChannel lockedChannel = keyValueAccess.lockForReading(path, startByte, size)) { + try ( final InputStream channelIn = lockedChannel.newInputStream()) { + return (DataBlock)DefaultBlockReader.readBlock(channelIn, datasetAttributes, blockGridPosition); + } } catch (final N5Exception.N5NoSuchKeyException e) { return null; } catch (final IOException | UncheckedIOException e) { @@ -65,21 +64,31 @@ public void writeBlock(final DataBlock block) { throw new N5IOException("Attempted to write block in the wrong shard."); final ShardIndex idx = getIndex(); - final long startByte = idx.getOffset(relativePosition) == Shard.EMPTY_INDEX_NBYTES ? 0 : idx.getOffset(relativePosition); - final long size = idx.getNumBytes(relativePosition) == Shard.EMPTY_INDEX_NBYTES ? Long.MAX_VALUE : idx.getNumBytes(relativePosition); - // TODO this assumes that the block exists in the shard and - // that the available space is sufficient. Should generalize - try (final LockedChannel lockedChannel = keyValueAccess.lockForWriting(path, startByte, size)) { - // TODO codecs - final CountingOutputStream out = new CountingOutputStream(lockedChannel.newOutputStream()); - datasetAttributes.getCompression().getWriter().write(block, out); + //TODO Caleb: reusing the offset of a prior block write is only safe when writing the same amount, or less, data. + // This is not generally guaranteed, since we compress the data. + // Either need to known the compressed size before writing, append only, or only overwrite when not compressing + final long getBlockOffset = idx.getOffset(relativePosition); + final long startByte; + if (getBlockOffset == Shard.EMPTY_INDEX_NBYTES) { + final long indexByteOffset = idx.getByteOffset(); + startByte = indexByteOffset == -1 ? 0 : idx.getByteOffset(); + } else { + startByte = getBlockOffset; + } + final long size = Long.MAX_VALUE - startByte; - // TODO update index when we know how many bytes were written - idx.set(startByte, out.getNumBytes(), relativePosition); - out.write(index.toByteBuffer().array()); - out.realClose(); + try (final LockedChannel lockedChannel = keyValueAccess.lockForWriting(path, startByte, size)) { + try ( final OutputStream channelOut = lockedChannel.newOutputStream()) { + try (final CountingOutputStream out = new CountingOutputStream(channelOut)) { + DefaultBlockWriter.writeBlock(out, datasetAttributes, block); + + /* Update and write the index to the shard*/ + idx.set(startByte, out.getNumBytes(), relativePosition); + DefaultBlockWriter.writeBlock(out, datasetAttributes.getIndexAttributes(), idx); + } + } } catch (final IOException | UncheckedIOException e) { throw new N5IOException("Failed to read block from " + path, e); } @@ -100,21 +109,21 @@ private static int numBlockElements(DatasetAttributes datasetAttributes) { public ShardIndex createIndex() { // Empty index of the correct size - index = new ShardIndex(datasetAttributes.getShardBlockGridSize()); - return index; + return datasetAttributes.createIndex(); } @Override public ShardIndex getIndex() { try { - final ShardIndex result = ShardIndex.read(keyValueAccess, path, datasetAttributes); - return result == null ? createIndex() : result; + final ShardIndex readIndex = ShardIndex.read(keyValueAccess, path, datasetAttributes); + index = readIndex == null ? createIndex() : readIndex; } catch (final NoSuchFileException e) { - return createIndex(); + index = createIndex(); } catch (IOException e) { throw new N5IOException("Failed to read index at " + path, e); } + return index; } @@ -155,10 +164,6 @@ public void close() throws IOException { } - private void realClose() throws IOException { - out.close(); - } - public long getNumBytes() { return numBytes; } diff --git a/src/test/java/org/janelia/saalfeldlab/n5/AbstractN5Test.java b/src/test/java/org/janelia/saalfeldlab/n5/AbstractN5Test.java index 939b9925..d4d3591b 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/AbstractN5Test.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/AbstractN5Test.java @@ -52,7 +52,7 @@ import org.janelia.saalfeldlab.n5.N5Reader.Version; import org.janelia.saalfeldlab.n5.codec.AsTypeCodec; import org.janelia.saalfeldlab.n5.codec.Codec; -import org.janelia.saalfeldlab.n5.codec.N5BytesCodec; +import org.janelia.saalfeldlab.n5.codec.N5BlockCodec; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -257,7 +257,7 @@ public void testWriteReadByteBlockMultipleCodecs() { * maybe is not the behavior we actually want*/ try (final N5Writer n5 = createTempN5Writer()) { final Codec[] codecs = { - new N5BytesCodec(), + new N5BlockCodec(), new AsTypeCodec(DataType.INT32, DataType.INT8), new AsTypeCodec(DataType.INT64, DataType.INT32), }; diff --git a/src/test/java/org/janelia/saalfeldlab/n5/codec/AsTypeTests.java b/src/test/java/org/janelia/saalfeldlab/n5/codec/AsTypeTests.java index ee36a51d..59aa3298 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/codec/AsTypeTests.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/codec/AsTypeTests.java @@ -47,20 +47,20 @@ public void testDouble2Byte() throws IOException { testEncodingAndDecoding(new AsTypeCodec(DataType.INT8, DataType.FLOAT64), decodedDoubles, encodedBytes); } - public static void testEncodingAndDecoding(Codec.BytesToBytes codec, byte[] encodedBytes, byte[] decodedBytes) throws IOException { + public static void testEncodingAndDecoding(Codec.BytesCodec codec, byte[] encodedBytes, byte[] decodedBytes) throws IOException { testEncoding(codec, encodedBytes, decodedBytes); testDecoding(codec, decodedBytes, encodedBytes); } - public static void testDecoding(final Codec.BytesToBytes codec, final byte[] expected, final byte[] input) throws IOException { + public static void testDecoding(final Codec.BytesCodec codec, final byte[] expected, final byte[] input) throws IOException { final InputStream result = codec.decode(new ByteArrayInputStream(input)); for (int i = 0; i < expected.length; i++) assertEquals(expected[i], (byte)result.read()); } - public static void testEncoding(final Codec.BytesToBytes codec, final byte[] expected, final byte[] data) throws IOException { + public static void testEncoding(final Codec.BytesCodec codec, final byte[] expected, final byte[] data) throws IOException { final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(expected.length); final OutputStream encodedStream = codec.encode(outputStream); diff --git a/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java b/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java index fa407163..f911ec6d 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java @@ -29,7 +29,7 @@ public void testSerialization() { final N5Writer reader = factory.openWriter("n5:src/test/resources/shardExamples/test.zarr"); final Codec bytes = reader.getAttribute("mid_sharded", "codecs[0]/configuration/codecs[0]", Codec.class); - assertTrue("as BytesCodec", bytes instanceof N5BytesCodec); + assertTrue("as BytesCodec", bytes instanceof N5BlockCodec); final N5Writer writer = factory.openWriter("n5:src/test/resources/shardExamples/test.n5"); @@ -39,7 +39,7 @@ public void testSerialization() { DataType.UINT8, new RawCompression(), new Codec[]{ - new N5BytesCodec(ByteOrder.LITTLE_ENDIAN), + new N5BlockCodec(ByteOrder.LITTLE_ENDIAN), new IdentityCodec() } ); @@ -48,8 +48,8 @@ public void testSerialization() { assertEquals("1 codecs", 1, deserialized.getCodecs().length); assertTrue("Identity", deserialized.getCodecs()[0] instanceof IdentityCodec); - assertTrue("Bytes", deserialized.getArrayToBytesCodec() instanceof N5BytesCodec); + assertTrue("Bytes", deserialized.getArrayCodec() instanceof N5BlockCodec); assertEquals("LittleEndian", ByteOrder.LITTLE_ENDIAN, - ((N5BytesCodec)deserialized.getArrayToBytesCodec()).byteOrder); + ((N5BlockCodec)deserialized.getArrayCodec()).byteOrder); } } diff --git a/src/test/java/org/janelia/saalfeldlab/n5/serialization/CodecSerialization.java b/src/test/java/org/janelia/saalfeldlab/n5/serialization/CodecSerialization.java index f12150b6..0610c7c5 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/serialization/CodecSerialization.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/serialization/CodecSerialization.java @@ -1,9 +1,11 @@ package org.janelia.saalfeldlab.n5.serialization; +import static org.janelia.saalfeldlab.n5.NameConfigAdapter.getJsonAdapter; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import org.janelia.saalfeldlab.n5.DataType; +import org.janelia.saalfeldlab.n5.GsonUtils; import org.janelia.saalfeldlab.n5.GzipCompression; import org.janelia.saalfeldlab.n5.NameConfigAdapter; import org.janelia.saalfeldlab.n5.codec.AsTypeCodec; @@ -26,16 +28,9 @@ public class CodecSerialization { @Before public void before() { - final GsonBuilder gsonBuilder = new GsonBuilder(); - gsonBuilder.registerTypeAdapter(IdentityCodec.class, NameConfigAdapter.getJsonAdapter(IdentityCodec.class)); - gsonBuilder.registerTypeAdapter(AsTypeCodec.class, NameConfigAdapter.getJsonAdapter(AsTypeCodec.class)); - gsonBuilder.registerTypeAdapter(FixedScaleOffsetCodec.class, - NameConfigAdapter.getJsonAdapter(FixedScaleOffsetCodec.class)); - gsonBuilder.registerTypeAdapter(GzipCompression.class, - NameConfigAdapter.getJsonAdapter(GzipCompression.class)); - gsonBuilder.registerTypeAdapter(Codec.class, - NameConfigAdapter.getJsonAdapter(Codec.class)); + final GsonBuilder gsonBuilder = new GsonBuilder(); + GsonUtils.registerGson(gsonBuilder); gson = gsonBuilder.create(); } @@ -44,7 +39,7 @@ public void testSerializeIdentity() { final IdentityCodec id = new IdentityCodec(); final JsonObject jsonId = gson.toJsonTree(id).getAsJsonObject(); - final JsonElement expected = gson.fromJson("{\"name\":\"id\", \"configuration\":{}}", JsonElement.class); + final JsonElement expected = gson.fromJson("{\"name\":\"id\"}", JsonElement.class); assertEquals("identity", expected, jsonId.getAsJsonObject()); } @@ -54,7 +49,7 @@ public void testSerializeAsType() { final AsTypeCodec asTypeCodec = new AsTypeCodec(DataType.FLOAT64, DataType.INT16); final JsonObject jsonAsType = gson.toJsonTree(asTypeCodec).getAsJsonObject(); final JsonElement expected = gson.fromJson( - "{\"name\":\"astype\",\"configuration\":{\"dataType\":\"FLOAT64\",\"encodedType\":\"INT16\"}}", + "{\"name\":\"astype\",\"configuration\":{\"dataType\":\"float64\",\"encodedType\":\"int16\"}}", JsonElement.class); assertEquals("asType", expected, jsonAsType.getAsJsonObject()); } @@ -68,7 +63,7 @@ public void testSerializeCodecArray() { }; JsonArray jsonCodecArray = gson.toJsonTree(codecs).getAsJsonArray(); JsonElement expected = gson.fromJson( - "[{\"name\":\"id\",\"configuration\":{}},{\"name\":\"astype\",\"configuration\":{\"dataType\":\"FLOAT64\",\"encodedType\":\"INT16\"}}]", + "[{\"name\":\"id\"},{\"name\":\"astype\",\"configuration\":{\"dataType\":\"float64\",\"encodedType\":\"int16\"}}]", JsonElement.class); assertEquals("codec array", expected, jsonCodecArray.getAsJsonArray()); @@ -83,7 +78,7 @@ public void testSerializeCodecArray() { }; jsonCodecArray = gson.toJsonTree(codecs).getAsJsonArray(); expected = gson.fromJson( - "[{\"name\":\"astype\",\"configuration\":{\"dataType\":\"FLOAT64\",\"encodedType\":\"INT16\"}},{\"name\":\"gzip\",\"configuration\":{\"level\":-1,\"use_z_lib\":false}}]", + "[{\"name\":\"astype\",\"configuration\":{\"dataType\":\"float64\",\"encodedType\":\"int16\"}},{\"name\":\"gzip\",\"configuration\":{\"level\":-1,\"useZlib\":false}}]", JsonElement.class); assertEquals("codec array", expected, jsonCodecArray.getAsJsonArray()); diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java index 50981dc6..4bab3d45 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java @@ -6,17 +6,19 @@ import org.janelia.saalfeldlab.n5.FileSystemKeyValueAccess; import org.janelia.saalfeldlab.n5.GzipCompression; import org.janelia.saalfeldlab.n5.N5Writer; -import org.janelia.saalfeldlab.n5.RawCompression; import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; -import org.janelia.saalfeldlab.n5.codec.N5BytesCodec; +import org.janelia.saalfeldlab.n5.codec.BytesCodec; import org.janelia.saalfeldlab.n5.codec.Codec; -import org.janelia.saalfeldlab.n5.codec.IdentityCodec; +import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; +import org.janelia.saalfeldlab.n5.codec.N5BlockCodec; import org.janelia.saalfeldlab.n5.codec.checksum.Crc32cChecksumCodec; import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; import org.janelia.saalfeldlab.n5.universe.N5Factory; +import org.junit.Assert; import org.junit.Test; import java.net.MalformedURLException; +import java.nio.ByteOrder; import java.nio.file.FileSystems; import java.nio.file.Path; import java.nio.file.Paths; @@ -30,15 +32,22 @@ public static void main(String[] args) throws MalformedURLException { System.out.println(p); final String key = p.toString(); - final ShardedDatasetAttributes dsetAttrs = new ShardedDatasetAttributes(new long[]{6, 4}, new int[]{6, 4}, - new int[]{3, 2}, IndexLocation.END, DataType.UINT8, new RawCompression(), null); + final ShardedDatasetAttributes dsetAttrs = new ShardedDatasetAttributes( + new long[]{6, 4}, + new int[]{6, 4}, + new int[]{3, 2}, + DataType.UINT8, + new Codec[]{new N5BlockCodec()}, + new DeterministicSizeCodec[]{new BytesCodec(), new Crc32cChecksumCodec()}, + IndexLocation.END + ); final FileSystemKeyValueAccess kva = new FileSystemKeyValueAccess(FileSystems.getDefault()); final VirtualShard shard = new VirtualShard<>(dsetAttrs, new long[]{0, 0}, kva, key); final DataBlock blk = shard.getBlock(0, 0); - final byte[] data = (byte[])blk.getData(); + final byte[] data = blk.getData(); System.out.println(Arrays.toString(data)); // fill the block with a weird value @@ -49,7 +58,7 @@ public static void main(String[] args) throws MalformedURLException { // re-read the block and check the data it contains final DataBlock blkReread = shard.getBlock(0, 0); - final byte[] dataReRead = (byte[])blkReread.getData(); + final byte[] dataReRead = blkReread.getData(); System.out.println(Arrays.toString(dataReRead)); } @@ -68,30 +77,34 @@ public void writeReadBlockTest() { new long[]{8, 8}, new int[]{4, 4}, new int[]{2, 2}, - IndexLocation.END, DataType.UINT8, - new RawCompression(), - new Codec[]{ - new N5BytesCodec(), - new ShardingCodec( - new int[]{2, 2}, - new Codec[]{new N5BytesCodec(), new GzipCompression(4)}, - new Codec[]{new Crc32cChecksumCodec()}, - IndexLocation.END - ) - } + new Codec[]{new N5BlockCodec(), new GzipCompression(4)}, + new DeterministicSizeCodec[]{new BytesCodec(ByteOrder.BIG_ENDIAN), new Crc32cChecksumCodec()}, + IndexLocation.END ); writer.createDataset("shard", datasetAttributes); + writer.deleteBlock("shard", 0, 0); - final DataBlock dataBlock = datasetAttributes.getDataType().createDataBlock(datasetAttributes.getBlockSize(), new long[]{0, 0}, 2 * 2); - byte[] data = (byte[])dataBlock.getData(); - for (int i = 0; i < data.length; i++) { - data[i] = (byte)i; - } + final int[] blockSize = datasetAttributes.getBlockSize(); + final DataType dataType = datasetAttributes.getDataType(); + final int numElements = 2 * 2; + + for (int idx1 = 1; idx1 >= 0; idx1--) { + for (int idx2 = 1; idx2 >= 0; idx2--) { - writer.deleteBlock("shard", 0,0 ); - writer.writeBlock("shard", datasetAttributes, dataBlock); - writer.readBlock("shard", datasetAttributes, 0, 0); + final long[] gridPosition = {idx1, idx2}; + final DataBlock dataBlock = (DataBlock)dataType.createDataBlock(blockSize, gridPosition, numElements); + byte[] data = dataBlock.getData(); + for (int i = 0; i < data.length; i++) { + data[i] = (byte)((idx1 * 100) + (idx2 * 10) + i); + } + writer.writeBlock("shard", datasetAttributes, dataBlock); + + final DataBlock block = (DataBlock)writer.readBlock("shard", datasetAttributes, gridPosition); + + Assert.assertArrayEquals("Read from shard doesn't match", data, block.getData()); + } + } } } From 7a1bbcb33c358a1c78f52f0867b92b2fe9202380 Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Tue, 17 Sep 2024 14:39:58 -0400 Subject: [PATCH 055/124] feat: rethrow NoSuchFile as NoSuchKey --- .../janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java b/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java index ffed4c1e..317c2c5d 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java @@ -266,8 +266,11 @@ public boolean exists(final String normalPath) { @Override public long size(final String normalPath) throws IOException { - final Path path = fileSystem.getPath(normalPath); - return Files.size(path); + try { + return Files.size(fileSystem.getPath(normalPath)); + } catch (NoSuchFileException e) { + throw new N5Exception.N5NoSuchKeyException("No such file", e); + } } @Override From 9fac328eb1f7fa88f5c3eccdf49ec2cc58680528 Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Tue, 17 Sep 2024 14:41:50 -0400 Subject: [PATCH 056/124] feat: wip support for ShardIndex location and bytesorder This is very preliminary. It works in a way, but is not as performant or reasonable as it likely should be. Consider this an initial implementation proof of concept for implementing the rest of sharding, especially working towards multiple read/write block aggregations --- .../n5/ShardedDatasetAttributes.java | 18 ++- .../saalfeldlab/n5/codec/N5BlockCodec.java | 4 +- .../janelia/saalfeldlab/n5/shard/Shard.java | 2 +- .../saalfeldlab/n5/shard/ShardIndex.java | 131 +++++++++--------- .../saalfeldlab/n5/shard/VirtualShard.java | 38 ++--- .../saalfeldlab/n5/shard/ShardDemos.java | 2 +- 6 files changed, 98 insertions(+), 97 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java index c72bad29..794b8861 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java @@ -132,7 +132,10 @@ public long[] getBlockPositionInShard(final long[] shardPosition, final long[] b final int[] shardSize = getShardSize(); final int[] blkSize = getBlockSize(); - final int[] blkGridSize = getShardBlockGridSize(); + final int[] blkGridSize = getBlocksPerShard(); +// final int[] shardSize = getSize(); +// final int[] blkSize = getBlockSize(); +// final int[] blkGridSize = getBlockGridSize(); final long[] blockShardPos = new long[shardSize.length]; for (int i = 0; i < shardSize.length; i++) { @@ -142,6 +145,9 @@ public long[] getBlockPositionInShard(final long[] shardPosition, final long[] b } return blockShardPos; + + + } /** @@ -167,14 +173,6 @@ public IndexLocation getIndexLocation() { } public ShardIndex createIndex() { - return new ShardIndex(getBlocksPerShard(), getShardingCodec().getIndexCodecs()); - } - - public DatasetAttributes getIndexAttributes() { - return createShardIndexAttributes(getShardingCodec().getIndexCodecs()); - } - - private static DatasetAttributes createShardIndexAttributes(Codec[] indexCodecs) { - return new DatasetAttributes(null, null, null, null, indexCodecs); + return new ShardIndex(getBlocksPerShard(), getIndexLocation(), getShardingCodec().getIndexCodecs()); } } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BlockCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BlockCodec.java index c4866fa1..82f118bb 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BlockCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BlockCodec.java @@ -70,7 +70,7 @@ public DataBlock allocateDataBlock() throws IOException { } private void readHeader() throws IOException { - final DataInputStream dis = new DataInputStream(in); + final DataInput dis = getDataInput(in); mode = dis.readShort(); if (mode != 2) { final int nDim = dis.readShort(); @@ -119,7 +119,7 @@ protected void beforeWrite(int n) throws IOException { } private void writeHeader() throws IOException { - final DataOutputStream dos = new DataOutputStream(out); + final DataOutput dos = getDataOutput(out); final int mode; if (attributes.getDataType() == DataType.OBJECT || dataBlock.getSize() == null) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java index d6d009dd..31134370 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java @@ -97,7 +97,7 @@ public static Shard createEmpty(final ShardedDatasetAttributes attributes final long[] emptyIndex = new long[(int)(2 * attributes.getNumBlocks())]; Arrays.fill(emptyIndex, EMPTY_INDEX_NBYTES); - final ShardIndex shardIndex = new ShardIndex(attributes.getBlocksPerShard(), emptyIndex); + final ShardIndex shardIndex = new ShardIndex(attributes.getBlocksPerShard(), emptyIndex, ShardingCodec.IndexLocation.END); return new InMemoryShard(attributes, shardPosition, shardIndex); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java index 36ad5cd7..1c840b1e 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java @@ -1,29 +1,29 @@ package org.janelia.saalfeldlab.n5.shard; -import java.io.DataInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.io.UncheckedIOException; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.channels.Channels; -import java.nio.channels.FileChannel; -import java.util.Arrays; - +import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DataType; import org.janelia.saalfeldlab.n5.DatasetAttributes; +import org.janelia.saalfeldlab.n5.DefaultBlockReader; +import org.janelia.saalfeldlab.n5.DefaultBlockWriter; import org.janelia.saalfeldlab.n5.KeyValueAccess; import org.janelia.saalfeldlab.n5.LockedChannel; import org.janelia.saalfeldlab.n5.LongArrayDataBlock; import org.janelia.saalfeldlab.n5.N5Exception; import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; -import org.janelia.saalfeldlab.n5.RawCompression; import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; import org.janelia.saalfeldlab.n5.codec.Codec; import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.UncheckedIOException; +import java.nio.channels.Channels; +import java.nio.channels.FileChannel; +import java.util.Arrays; + public class ShardIndex extends LongArrayDataBlock { private static final int BYTES_PER_LONG = 8; @@ -31,21 +31,25 @@ public class ShardIndex extends LongArrayDataBlock { private static final int LONGS_PER_BLOCK = 2; private static final long[] DUMMY_GRID_POSITION = null; - - private long byteOffset = -1; + private final IndexLocation location; private final DeterministicSizeCodec[] codecs; - public ShardIndex(int[] shardBlockGridSize, long[] data, final DeterministicSizeCodec... codecs) { + public ShardIndex(int[] shardBlockGridSize, long[] data, IndexLocation location, final DeterministicSizeCodec... codecs) { super(prepend(LONGS_PER_BLOCK, shardBlockGridSize), DUMMY_GRID_POSITION, data); this.codecs = codecs; + this.location = location; } - public ShardIndex(int[] shardBlockGridSize, final DeterministicSizeCodec... codecs) { + public ShardIndex(int[] shardBlockGridSize, IndexLocation location, DeterministicSizeCodec... codecs) { - super(prepend(LONGS_PER_BLOCK, shardBlockGridSize), DUMMY_GRID_POSITION, emptyIndexData(shardBlockGridSize)); - this.codecs = codecs; + this(shardBlockGridSize, emptyIndexData(shardBlockGridSize), location, codecs); + } + + public ShardIndex(int[] shardBlockGridSize, DeterministicSizeCodec... codecs) { + + this(shardBlockGridSize, emptyIndexData(shardBlockGridSize), IndexLocation.END, codecs); } public boolean exists(long... gridPosition) { @@ -54,6 +58,11 @@ public boolean exists(long... gridPosition) { getNumBytes(gridPosition) != Shard.EMPTY_INDEX_NBYTES; } + public IndexLocation getLocation() { + + return location; + } + public long getOffset(long... gridPosition) { return data[getOffsetIndex(gridPosition)]; @@ -88,16 +97,6 @@ private int getNumBytesIndex(long... gridPosition) { return getOffsetIndex(gridPosition) + 1; } - public static ShardIndex read( - final KeyValueAccess keyValueAccess, - final String key, - final ShardedDatasetAttributes datasetAttributes - ) throws IOException { - - final IndexLocation indexLocation = datasetAttributes.getIndexLocation(); - return read(keyValueAccess, key, datasetAttributes.createIndex(), indexLocation); - } - public long numBytes() { final int numEntries = Arrays.stream(getSize()).reduce(1, (x, y) -> x * y); @@ -114,53 +113,55 @@ public long numBytes() { public static ShardIndex read( final KeyValueAccess keyValueAccess, final String key, - final ShardIndex idx, - final IndexLocation indexLocation - ) throws IOException { + final ShardIndex index + ) throws IOException { - final IndexByteBounds byteBounds = byteBounds(idx.numBytes(), indexLocation, keyValueAccess.size(key)); - idx.byteOffset = byteBounds.start; + final IndexByteBounds byteBounds = byteBounds(index, keyValueAccess.size(key)); try (final LockedChannel lockedChannel = keyValueAccess.lockForReading(key, byteBounds.start, byteBounds.end)) { - - final byte[] bytes = new byte[idx.getNumElements() * ShardIndex.BYTES_PER_LONG]; - lockedChannel.newInputStream().read(bytes); - idx.readData(ByteBuffer.wrap(bytes).order(ByteOrder.BIG_ENDIAN)); // TODO generalize byte order - return idx; - + final long[] indexData; + try (final InputStream in = lockedChannel.newInputStream()) { + final DataBlock indexBlock = (DataBlock)DefaultBlockReader.readBlock( + in, + index.getIndexAttributes(), + index.gridPosition); + indexData = indexBlock.getData(); + } + System.arraycopy(indexData, 0, index.data, 0, index.data.length); + return index; } catch (final N5Exception.N5NoSuchKeyException e) { return null; } catch (final IOException | UncheckedIOException e) { - throw new N5IOException("Failed to read from " + key, e); + throw new N5IOException("Failed to read shard index from " + key, e); } } - public static void write(ShardIndex index, + public static void write( + final ShardIndex index, final KeyValueAccess keyValueAccess, - final String key, - final int[] shardBlockGridSize, - final IndexLocation indexLocation) throws IOException { - - final IndexByteBounds byteBounds = byteBounds(index.numBytes(), indexLocation, keyValueAccess.size(key)); - try (final LockedChannel lockedChannel = keyValueAccess.lockForWriting(key, byteBounds.start, byteBounds.end)) { - - final OutputStream os = lockedChannel.newOutputStream(); - os.write(index.toByteBuffer().array()); + final String key + ) throws IOException { + final long start = index.location == IndexLocation.START ? 0 : keyValueAccess.size(key); + try (final LockedChannel lockedChannel = keyValueAccess.lockForWriting(key, start, index.numBytes())) { + try (final OutputStream os = lockedChannel.newOutputStream()) { + DefaultBlockWriter.writeBlock(os, index.getIndexAttributes(), index); + } } catch (final IOException | UncheckedIOException e) { - throw new N5IOException("Failed to read from " + key, e); + throw new N5IOException("Failed to write shard index to " + key, e); } } - public static DatasetAttributes indexDatasetAttributes(final int[] indexBlockSize) { + private DatasetAttributes getIndexAttributes() { - final int[] blkSize = new int[indexBlockSize.length]; - final long[] size = new long[indexBlockSize.length]; - for (int i = 0; i < blkSize.length; i++) { - blkSize[i] = (int)indexBlockSize[i]; - } - - // TODO codecs - return new DatasetAttributes(size, blkSize, DataType.UINT64, new RawCompression(), null); + final DatasetAttributes indexAttributes = + new DatasetAttributes( + Arrays.stream(getSize()).mapToLong(it -> it).toArray(), + getSize(), + DataType.UINT64, + null, + codecs + ); + return indexAttributes; } public static IndexByteBounds byteBounds(ShardedDatasetAttributes datasetAttributes, final long objectSize) { @@ -169,6 +170,10 @@ public static IndexByteBounds byteBounds(ShardedDatasetAttributes datasetAttribu return byteBounds(indexSize, datasetAttributes.getIndexLocation(), objectSize); } + public static IndexByteBounds byteBounds(final ShardIndex index, long objectSize) { + return byteBounds(index.numBytes(), index.location, objectSize); + } + public static IndexByteBounds byteBounds(final long indexSize, final IndexLocation indexLocation, final long objectSize) { if (indexLocation == IndexLocation.START) { @@ -178,10 +183,6 @@ public static IndexByteBounds byteBounds(final long indexSize, final IndexLocati } } - public long getByteOffset() { - return byteOffset; - } - private static class IndexByteBounds { private final long start; @@ -214,7 +215,7 @@ public static ShardIndex read(FileChannel channel, ShardedDatasetAttributes data indexes[i] = dis.readLong(); } - return new ShardIndex(indexShape, indexes); + return new ShardIndex(indexShape, indexes, IndexLocation.END); } private static long[] emptyIndexData(final int[] size) { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java index 332f77b6..1dd63f34 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java @@ -1,5 +1,6 @@ package org.janelia.saalfeldlab.n5.shard; +import java.io.DataOutput; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -39,6 +40,8 @@ public DataBlock getBlock(long... blockGridPosition) { throw new N5IOException("Attempted to read a block from the wrong shard."); final ShardIndex idx = getIndex(); + + final long startByte = idx.getOffset(relativePosition); if (startByte == Shard.EMPTY_INDEX_NBYTES ) @@ -63,19 +66,14 @@ public void writeBlock(final DataBlock block) { if (relativePosition == null) throw new N5IOException("Attempted to write block in the wrong shard."); - final ShardIndex idx = getIndex(); - - - //TODO Caleb: reusing the offset of a prior block write is only safe when writing the same amount, or less, data. - // This is not generally guaranteed, since we compress the data. - // Either need to known the compressed size before writing, append only, or only overwrite when not compressing - final long getBlockOffset = idx.getOffset(relativePosition); - final long startByte; - if (getBlockOffset == Shard.EMPTY_INDEX_NBYTES) { - final long indexByteOffset = idx.getByteOffset(); - startByte = indexByteOffset == -1 ? 0 : idx.getByteOffset(); - } else { - startByte = getBlockOffset; + final ShardIndex index = getIndex(); + long startByte = 0; + try { + startByte = keyValueAccess.size(path); + } catch (N5Exception.N5NoSuchKeyException e) { + startByte = index.getLocation() == ShardingCodec.IndexLocation.START ? index.numBytes() : 0; + } catch (IOException e) { + throw new N5IOException(e); } final long size = Long.MAX_VALUE - startByte; @@ -85,14 +83,18 @@ public void writeBlock(final DataBlock block) { DefaultBlockWriter.writeBlock(out, datasetAttributes, block); /* Update and write the index to the shard*/ - idx.set(startByte, out.getNumBytes(), relativePosition); - DefaultBlockWriter.writeBlock(out, datasetAttributes.getIndexAttributes(), idx); + index.set(startByte, out.getNumBytes(), relativePosition); } } } catch (final IOException | UncheckedIOException e) { - throw new N5IOException("Failed to read block from " + path, e); + throw new N5IOException("Failed to write block to shard " + path, e); } + try { + ShardIndex.write(index, keyValueAccess, path); + } catch (IOException e) { + throw new N5IOException("Failed to write index to shard " + path, e); + } } @Override @@ -116,9 +118,9 @@ public ShardIndex createIndex() { public ShardIndex getIndex() { try { - final ShardIndex readIndex = ShardIndex.read(keyValueAccess, path, datasetAttributes); + final ShardIndex readIndex = ShardIndex.read(keyValueAccess, path, datasetAttributes.createIndex()); index = readIndex == null ? createIndex() : readIndex; - } catch (final NoSuchFileException e) { + } catch (final N5Exception.N5NoSuchKeyException e) { index = createIndex(); } catch (IOException e) { throw new N5IOException("Failed to read index at " + path, e); diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java index 4bab3d45..42cc1474 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java @@ -78,7 +78,7 @@ public void writeReadBlockTest() { new int[]{4, 4}, new int[]{2, 2}, DataType.UINT8, - new Codec[]{new N5BlockCodec(), new GzipCompression(4)}, + new Codec[]{new N5BlockCodec(ByteOrder.LITTLE_ENDIAN), new GzipCompression(4)}, new DeterministicSizeCodec[]{new BytesCodec(ByteOrder.BIG_ENDIAN), new Crc32cChecksumCodec()}, IndexLocation.END ); From f667bb53e91038953a62863e1caec453abedd4e8 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Thu, 19 Sep 2024 15:39:06 -0400 Subject: [PATCH 057/124] fix: ShardingCodec indexLocation should default to END --- .../java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java index af06ec0e..ebea15e6 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java @@ -43,7 +43,7 @@ public enum IndexLocation { @NameConfig.Parameter(INDEX_CODECS_KEY) private final DeterministicSizeCodec[] indexCodecs; - @NameConfig.Parameter(INDEX_LOCATION_KEY) + @NameConfig.Parameter(value = INDEX_LOCATION_KEY, optional = true) private final IndexLocation indexLocation; private ShardingCodec() { @@ -51,7 +51,7 @@ private ShardingCodec() { blockSize = null; codecs = null; indexCodecs = null; - indexLocation = null; + indexLocation = IndexLocation.END; } public ShardingCodec( From 66351079e151cb42c44c1610663d0d70a2db195f Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Thu, 19 Sep 2024 15:44:10 -0400 Subject: [PATCH 058/124] style: ShardingCodec --- .../java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java index ebea15e6..af1f23e0 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java @@ -25,7 +25,7 @@ public class ShardingCodec implements Codec.ArrayCodec { public static final String TYPE = "sharding_indexed"; - public final static String CHUNK_SHAPE_KEY = "chunk_shape"; + public static final String CHUNK_SHAPE_KEY = "chunk_shape"; public static final String INDEX_LOCATION_KEY = "index_location"; public static final String CODECS_KEY = "codecs"; public static final String INDEX_CODECS_KEY = "index_codecs"; @@ -46,6 +46,7 @@ public enum IndexLocation { @NameConfig.Parameter(value = INDEX_LOCATION_KEY, optional = true) private final IndexLocation indexLocation; + @SuppressWarnings("unused") private ShardingCodec() { blockSize = null; From 4489116ce5e5a003d2108b37436df565b4a4bb54 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Fri, 20 Sep 2024 11:34:50 -0400 Subject: [PATCH 059/124] fix: getBlockPositionInShard --- .../saalfeldlab/n5/ShardedDatasetAttributes.java | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java index 794b8861..ff69e4dc 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java @@ -18,7 +18,6 @@ public class ShardedDatasetAttributes extends DatasetAttributes { private final ShardingCodec shardingCodec; - public ShardedDatasetAttributes ( final long[] dimensions, final int[] shardSize, //in pixels @@ -44,7 +43,7 @@ public ShardedDatasetAttributes( final int[] blockSize, //in pixels final DataType dataType, final ShardingCodec codec) { - super(dimensions, blockSize, dataType, null, codec.getCodecs()); + super(dimensions, blockSize, dataType, null, null); this.shardSize = shardSize; this.shardingCodec = codec; } @@ -131,23 +130,12 @@ public long[] getBlockPositionInShard(final long[] shardPosition, final long[] b return null; final int[] shardSize = getShardSize(); - final int[] blkSize = getBlockSize(); - final int[] blkGridSize = getBlocksPerShard(); -// final int[] shardSize = getSize(); -// final int[] blkSize = getBlockSize(); -// final int[] blkGridSize = getBlockGridSize(); - final long[] blockShardPos = new long[shardSize.length]; for (int i = 0; i < shardSize.length; i++) { - final long shardP = shardPos[i] * shardSize[i]; - final long blockP = blockPosition[i] * blkSize[i]; - blockShardPos[i] = (int)((blockP - shardP) / blkGridSize[i]); + blockShardPos[i] = blockPosition[i] % shardSize[i]; } return blockShardPos; - - - } /** From eb6de85b51c70b96f02c35218de8d3795262c8ed Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Fri, 20 Sep 2024 11:35:55 -0400 Subject: [PATCH 060/124] fix/wip: sharding codec block sizes needs reversing in zarr --- .../java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java index af1f23e0..0847f51d 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java @@ -11,6 +11,7 @@ import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.codec.Codec; import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; +import org.janelia.saalfeldlab.n5.serialization.N5Annotations; import org.janelia.saalfeldlab.n5.serialization.NameConfig; import java.io.IOException; @@ -34,6 +35,7 @@ public enum IndexLocation { START, END; } + @N5Annotations.ReverseArray // TODO need to reverse for zarr, not for n5 @NameConfig.Parameter(CHUNK_SHAPE_KEY) private final int[] blockSize; From a8df678ff3e3e048bbc1f378ee1eff655b2a38ec Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Fri, 20 Sep 2024 11:36:55 -0400 Subject: [PATCH 061/124] feat: add getShardAttributes method to DatasetAttributes * and minor clean up --- .../saalfeldlab/n5/DatasetAttributes.java | 16 ++++++++++++++++ .../saalfeldlab/n5/shard/ShardingCodec.java | 3 +++ .../saalfeldlab/n5/shard/VirtualShard.java | 2 -- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java index 1ada1708..1cabbc3a 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java @@ -148,6 +148,22 @@ public BytesCodec[] getCodecs() { return byteCodecs; } + public ShardedDatasetAttributes getShardAttributes() { + + if (getArrayCodec() instanceof ShardingCodec) { + + final ShardingCodec shardingCodec = (ShardingCodec)getArrayCodec(); + return new ShardedDatasetAttributes( + dimensions, + blockSize, + shardingCodec.getBlockSize(), + getDataType(), + shardingCodec); + + } else + return null; + } + public HashMap asMap() { final HashMap map = new HashMap<>(); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java index 0847f51d..cb65f1a4 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java @@ -86,6 +86,9 @@ public ArrayCodec getArrayCodec() { public BytesCodec[] getCodecs() { + if (codecs.length == 1) + return new BytesCodec[]{}; + final BytesCodec[] bytesCodecs = new BytesCodec[codecs.length - 1]; System.arraycopy(codecs, 1, bytesCodecs, 0, bytesCodecs.length); return bytesCodecs; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java index 1dd63f34..27099432 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java @@ -1,11 +1,9 @@ package org.janelia.saalfeldlab.n5.shard; -import java.io.DataOutput; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.UncheckedIOException; -import java.nio.file.NoSuchFileException; import java.util.Arrays; import org.janelia.saalfeldlab.n5.DataBlock; From 865c861a6ff268e143d46a5f61d8d0213b311f31 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Tue, 19 Nov 2024 11:17:21 -0500 Subject: [PATCH 062/124] wip: n5 exception and InMemoryShard --- .../janelia/saalfeldlab/n5/N5Exception.java | 27 +++++ .../org/janelia/saalfeldlab/n5/N5Writer.java | 4 +- .../n5/ShardedDatasetAttributes.java | 2 +- .../saalfeldlab/n5/shard/InMemoryShard.java | 105 +++++++++++++++++- .../janelia/saalfeldlab/n5/shard/Shard.java | 16 +++ .../n5/shard/ShardIndexBuilder.java | 67 +++++++++++ 6 files changed, 216 insertions(+), 5 deletions(-) create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java diff --git a/src/main/java/org/janelia/saalfeldlab/n5/N5Exception.java b/src/main/java/org/janelia/saalfeldlab/n5/N5Exception.java index 345a7cd0..7fbe0135 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/N5Exception.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/N5Exception.java @@ -121,4 +121,31 @@ protected N5NoSuchKeyException( super(message, cause, enableSuppression, writableStackTrace); } } + + public static class N5ShardException extends N5IOException { + + public N5ShardException(final String message) { + + super(message); + } + + public N5ShardException(final String message, final Throwable cause) { + + super(message, cause); + } + + public N5ShardException(final Throwable cause) { + + super(cause); + } + + protected N5ShardException( + final String message, + final Throwable cause, + final boolean enableSuppression, + final boolean writableStackTrace) { + + super(message, cause, enableSuppression, writableStackTrace); + } + } } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java index 0c734e16..93c86bcf 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java @@ -304,7 +304,7 @@ default void writeBlocks( * @param datasetPath * dataset path * @param datasetAttributes - * the dataset attributes + * the sharded dataset attributes * @param dataBlock * the data block * @param @@ -314,7 +314,7 @@ default void writeBlocks( */ void writeShard( final String datasetPath, - final DatasetAttributes datasetAttributes, + final ShardedDatasetAttributes datasetAttributes, final Shard shard) throws N5Exception; /** diff --git a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java index ff69e4dc..ab6e0f9b 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java @@ -119,7 +119,7 @@ public long[] getShardPositionForBlock(final long... blockGridPosition) { } /** - * Returns of the block at the given position relative to this shard, or null if this shard does not contain the given block. + * Returns the block at the given position relative to this shard, or null if this shard does not contain the given block. * * @return the shard position */ diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java index baacc8c5..e79bf5ac 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java @@ -1,14 +1,28 @@ package org.janelia.saalfeldlab.n5.shard; +import java.io.IOException; +import java.io.OutputStream; import java.util.ArrayList; import java.util.List; +import org.apache.commons.io.output.ByteArrayOutputStream; +import org.apache.commons.io.output.CountingOutputStream; import org.janelia.saalfeldlab.n5.DataBlock; +import org.janelia.saalfeldlab.n5.DefaultBlockWriter; import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; +import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; public class InMemoryShard extends AbstractShard { private List> blocks; + + private ShardIndexBuilder indexBuilder; + + /* + * TODO: + * Use morton- or c-ording instead of writing blocks out in the order they're added? + * (later) + */ public InMemoryShard(final ShardedDatasetAttributes datasetAttributes, final long[] gridPosition, ShardIndex index) { @@ -19,16 +33,103 @@ public InMemoryShard(final ShardedDatasetAttributes datasetAttributes, final lon @Override public void writeBlock(DataBlock block) { + + addBlock(block); + } + + public void addBlock(DataBlock block) { + + blocks.add(block); + } - // TODO Auto-generated method stub + public int numBlocks() { + return blocks.size(); + } + + public DataBlock getBlock(int i) { + + return blocks.get(i); } @Override public void writeShard() { - // TODO Auto-generated method stub + } + + public static void writeShard( + final OutputStream out, + InMemoryShard shard ) throws IOException { + + final ShardedDatasetAttributes datasetAttributes = shard.getDatasetAttributes(); + + if( shard.getIndex().getLocation() == IndexLocation.END) + writeShardEnd( out, shard); + else + writeShardStart( out, shard); + + } + + protected static void writeShardEnd( + final OutputStream out, + InMemoryShard shard ) throws IOException { + + final ShardedDatasetAttributes datasetAttributes = shard.getDatasetAttributes(); + + final ShardIndexBuilder indexBuilder = new ShardIndexBuilder(shard); + indexBuilder.indexLocation(IndexLocation.END); + + final CountingOutputStream cout = new CountingOutputStream(out); + + long offset = 0; + for (int i = 0; i < shard.numBlocks(); i++) { + + final DataBlock block = shard.getBlock(i); + DefaultBlockWriter.writeBlock(cout, datasetAttributes, block); + + indexBuilder.addBLock( block.getGridPosition(), offset); + offset = cout.getByteCount(); + } + + final ShardIndex index = indexBuilder.build(); + DefaultBlockWriter.writeBlock(out, datasetAttributes, index); + } + + protected static void writeShardStart( + final OutputStream out, + InMemoryShard shard ) throws IOException { + + final ShardedDatasetAttributes datasetAttributes = shard.getDatasetAttributes(); + final ShardIndexBuilder indexBuilder = new ShardIndexBuilder(shard); + indexBuilder.indexLocation(IndexLocation.START); + + long offset = 0; + final List blockData = new ArrayList<>(shard.numBlocks()); + for (int i = 0; i < shard.numBlocks(); i++) { + + final DataBlock block = shard.getBlock(i); + + final ByteArrayOutputStream os = new ByteArrayOutputStream(); + DefaultBlockWriter.writeBlock(os, datasetAttributes, block); + final byte[] data = os.toByteArray(); + + blockData.add(data); + indexBuilder.addBLock( block.getGridPosition(), offset); + offset += data.length; + } + + final ShardIndex index = indexBuilder.build(); + try { + DefaultBlockWriter.writeBlock(out, datasetAttributes, index); + + for( byte[] data : blockData ) + out.write(data); + + } catch (Exception e) { + e.printStackTrace(); + } } + } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java index 31134370..704e6c98 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java @@ -111,6 +111,22 @@ public static long flatIndex(long[] gridPosition, int[] gridSize) { } return index; } + + /** + * + * @param + * the type + * @param dataBlocks + * an array + * @return a shard containing the given blocks + */ + public static Shard fromDataBlocks( + final ShardedDatasetAttributes attributes, + final DataBlock[] dataBlocks) { + + // TODO implement me + return null; + } /** * Say we want async datablock access diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java new file mode 100644 index 00000000..35073c66 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java @@ -0,0 +1,67 @@ +package org.janelia.saalfeldlab.n5.shard; + +import java.util.Arrays; + +import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; +import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; + +public class ShardIndexBuilder { + + private final Shard shard; + + private final ShardIndex temporaryIndex; + + private IndexLocation location = IndexLocation.END; + + private DeterministicSizeCodec[] codecs; + + private long currentOffset = 0; + + public ShardIndexBuilder(Shard shard) { + + this.shard = shard; + this.temporaryIndex = new ShardIndex(shard.getBlockGridSize(), location); + } + + public ShardIndex build() { + + return new ShardIndex( + shard.getBlockGridSize(), + temporaryIndex.getData(), + location, + codecs); + } + + public ShardIndexBuilder indexLocation(IndexLocation location) { + + this.location = location; + return this; + } + + public ShardIndexBuilder setCodecs(DeterministicSizeCodec... codecs) { + + this.codecs = codecs; + return this; + } + + public ShardIndexBuilder addBLock(long[] blockPosition, long numBytes) { + + final long[] blockPositionInShard = shard.getDatasetAttributes().getBlockPositionInShard( + shard.getGridPosition(), + blockPosition); + + if (blockPositionInShard == null) { + throw new IllegalArgumentException(String.format( + "The block at position %s is not contained in the shard at position : %s and size : %s )", + Arrays.toString(blockPosition), + Arrays.toString(shard.getGridPosition()), + Arrays.toString(shard.getSize()))); + } + + temporaryIndex.set(currentOffset, numBytes, blockPositionInShard); + currentOffset += numBytes; + + return this; + } + +} From fccdb9abe4149f1a5175305a006e90e9c1aa7609 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Tue, 19 Nov 2024 11:39:59 -0500 Subject: [PATCH 063/124] wip: dummy impl of writeShard --- .../org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java index 60ce9299..7cf3518b 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java @@ -244,13 +244,10 @@ default void writeBlock( @Override default void writeShard( final String path, - final DatasetAttributes datasetAttributes, + final ShardedDatasetAttributes datasetAttributes, final Shard shard) throws N5Exception { - if (!(datasetAttributes instanceof ShardedDatasetAttributes)) - throw new N5IOException("Can not write shard into non-sharded dataset " + path); - - // TODO implement me + throw new N5Exception("not implemented"); } @Override From 19b8618251284654752286f68221c7ec88105ba5 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Mon, 16 Dec 2024 16:07:25 -0500 Subject: [PATCH 064/124] doc: getShardSize --- .../org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java index ab6e0f9b..dfb01e6f 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java @@ -62,6 +62,11 @@ public ShardingCodec getShardingCodec() { return shardingCodec.getCodecs(); } + /** + * The size of the blocks in pixel units. + * + * @return the number of pixels per dimension for this shard. + */ public int[] getShardSize() { return shardSize; From 3229fdd6c1bcae0984b6f702b5145c5115dc5ad8 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Mon, 16 Dec 2024 16:07:51 -0500 Subject: [PATCH 065/124] fix: ShardedDatasetAttributes.getBlockPosition --- .../org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java index dfb01e6f..0c04c372 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java @@ -134,7 +134,7 @@ public long[] getBlockPositionInShard(final long[] shardPosition, final long[] b if (!Arrays.equals(shardPosition, shardPos)) return null; - final int[] shardSize = getShardSize(); + final int[] shardSize = getBlocksPerShard(); final long[] blockShardPos = new long[shardSize.length]; for (int i = 0; i < shardSize.length; i++) { blockShardPos[i] = blockPosition[i] % shardSize[i]; From b270ece22224cbe326f0ea1a7f0140a05b4b4d9f Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Wed, 18 Dec 2024 13:17:36 -0500 Subject: [PATCH 066/124] perf: override writeData * (Short/Float/Double)DataBlock --- .../org/janelia/saalfeldlab/n5/DoubleArrayDataBlock.java | 8 ++++++++ .../org/janelia/saalfeldlab/n5/FloatArrayDataBlock.java | 8 ++++++++ .../org/janelia/saalfeldlab/n5/ShortArrayDataBlock.java | 8 ++++++++ 3 files changed, 24 insertions(+) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DoubleArrayDataBlock.java b/src/main/java/org/janelia/saalfeldlab/n5/DoubleArrayDataBlock.java index 8cbb1511..0240e6fa 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DoubleArrayDataBlock.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DoubleArrayDataBlock.java @@ -26,6 +26,7 @@ package org.janelia.saalfeldlab.n5; import java.io.DataInput; +import java.io.DataOutput; import java.io.IOException; import java.nio.ByteBuffer; @@ -57,6 +58,13 @@ public void readData(final DataInput inputStream) throws IOException { data[i] = inputStream.readDouble(); } + @Override + public void writeData(final DataOutput output) throws IOException { + + for (int i = 0; i < data.length; i++) + output.writeDouble(data[i]); + } + @Override public int getNumElements() { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/FloatArrayDataBlock.java b/src/main/java/org/janelia/saalfeldlab/n5/FloatArrayDataBlock.java index aa97ce3f..a2bc2c69 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/FloatArrayDataBlock.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/FloatArrayDataBlock.java @@ -26,6 +26,7 @@ package org.janelia.saalfeldlab.n5; import java.io.DataInput; +import java.io.DataOutput; import java.io.IOException; import java.nio.ByteBuffer; @@ -57,6 +58,13 @@ public void readData(final DataInput inputStream) throws IOException { data[i] = inputStream.readFloat(); } + @Override + public void writeData(final DataOutput output) throws IOException { + + for (int i = 0; i < data.length; i++) + output.writeFloat(data[i]); + } + @Override public int getNumElements() { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/ShortArrayDataBlock.java b/src/main/java/org/janelia/saalfeldlab/n5/ShortArrayDataBlock.java index 34c5a883..c7d141f3 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/ShortArrayDataBlock.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/ShortArrayDataBlock.java @@ -26,6 +26,7 @@ package org.janelia.saalfeldlab.n5; import java.io.DataInput; +import java.io.DataOutput; import java.io.IOException; import java.nio.ByteBuffer; @@ -57,6 +58,13 @@ public void readData(final DataInput dataInput) throws IOException { data[i] = dataInput.readShort(); } + @Override + public void writeData(final DataOutput output) throws IOException { + + for (int i = 0; i < data.length; i++) + output.writeShort(data[i]); + } + @Override public int getNumElements() { From f46aa524c230863194c3423235c31a4cc90f5f3b Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Wed, 18 Dec 2024 13:20:21 -0500 Subject: [PATCH 067/124] chore: bump pom-scijva to 40.0.0 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 9ea763f0..cab565a7 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ org.scijava pom-scijava - 38.0.1 + 40.0.0 From 362c74d21e944f26eff70418c9b710a43b4888f7 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Fri, 20 Dec 2024 10:27:51 -0500 Subject: [PATCH 068/124] perf: initialize cache only if using it --- .../java/org/janelia/saalfeldlab/n5/N5KeyValueReader.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/N5KeyValueReader.java b/src/main/java/org/janelia/saalfeldlab/n5/N5KeyValueReader.java index edbc6947..dd4b9cc2 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/N5KeyValueReader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/N5KeyValueReader.java @@ -123,7 +123,11 @@ protected N5KeyValueReader( this.keyValueAccess = keyValueAccess; this.gson = GsonUtils.registerGson(gsonBuilder); this.cacheMeta = cacheMeta; - this.cache = newCache(); + + if (this.cacheMeta) + this.cache = newCache(); + else + this.cache = null; try { uri = keyValueAccess.uri(basePath); From edbdef6cda0a781e601d975ce72442b3583426f8 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Mon, 23 Dec 2024 08:34:56 -0500 Subject: [PATCH 069/124] fix: make removeAttribute methods' behavior more consistent * this change allows re-use of this method by zarr v3 --- .../java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java index 7cf3518b..3039f79a 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java @@ -196,7 +196,7 @@ default T removeAttribute(final String pathName, final String key, final Cla throw new N5Exception.N5ClassCastException(e); } if (obj != null) { - writeAttributes(normalPath, attributes); + setAttributes(normalPath, attributes); } return obj; } From 1b672ded40616286244bd772ae6d49a19cc9bc9b Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Mon, 23 Dec 2024 09:04:27 -0500 Subject: [PATCH 070/124] fix: gzip make uzeZlib parameter optional * e.g. not specified in zarrs written by tensorstore --- src/main/java/org/janelia/saalfeldlab/n5/GzipCompression.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GzipCompression.java b/src/main/java/org/janelia/saalfeldlab/n5/GzipCompression.java index 3091ad28..b03a4d93 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GzipCompression.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GzipCompression.java @@ -54,7 +54,7 @@ public class GzipCompression implements DefaultBlockReader, DefaultBlockWriter, private final int level; @CompressionParameter - @NameConfig.Parameter + @NameConfig.Parameter(optional = true) private final boolean useZlib; private final transient GzipParameters parameters = new GzipParameters(); From bec29965b77a6981c42848c8eb578ada7883f904 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Mon, 23 Dec 2024 11:44:26 -0500 Subject: [PATCH 071/124] fix: BlockWriter should not close stream * rather, should be closed where it is opened * this change enables the stream to be re-used by multiple block writers, e.g. during sharding --- src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java index fd7450ef..badf9f24 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java @@ -82,7 +82,6 @@ public static void writeBlock( stream = codec.encode(stream); dataBlock.writeData(dataBlockOutput.getDataOutput(stream)); - stream.close(); } public static void writeFromStream(final DataBlock dataBlock, final OutputStream out) throws IOException { From 2c753a1a0b191fc1221693055eb7d65b130a0589 Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Thu, 2 Jan 2025 15:01:10 -0500 Subject: [PATCH 072/124] Revert "fix: BlockWriter should not close stream" This reverts commit bec29965b77a6981c42848c8eb578ada7883f904. --- src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java index badf9f24..fd7450ef 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockWriter.java @@ -82,6 +82,7 @@ public static void writeBlock( stream = codec.encode(stream); dataBlock.writeData(dataBlockOutput.getDataOutput(stream)); + stream.close(); } public static void writeFromStream(final DataBlock dataBlock, final OutputStream out) throws IOException { From 275aaa61d4a45f1c1d7978de645ad9a369f4b14a Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Thu, 2 Jan 2025 15:34:05 -0500 Subject: [PATCH 073/124] feat(test): parameterize ShardDemo read/write test; add new test --- .../saalfeldlab/n5/shard/ShardDemos.java | 53 +++++++++++++++++-- 1 file changed, 48 insertions(+), 5 deletions(-) diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java index 42cc1474..f06e9dae 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java @@ -1,12 +1,15 @@ package org.janelia.saalfeldlab.n5.shard; import com.google.gson.GsonBuilder; +import org.janelia.saalfeldlab.n5.Bzip2Compression; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DataType; import org.janelia.saalfeldlab.n5.FileSystemKeyValueAccess; import org.janelia.saalfeldlab.n5.GzipCompression; +import org.janelia.saalfeldlab.n5.Lz4Compression; import org.janelia.saalfeldlab.n5.N5Writer; import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; +import org.janelia.saalfeldlab.n5.XzCompression; import org.janelia.saalfeldlab.n5.codec.BytesCodec; import org.janelia.saalfeldlab.n5.codec.Codec; import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; @@ -16,14 +19,21 @@ import org.janelia.saalfeldlab.n5.universe.N5Factory; import org.junit.Assert; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import java.net.MalformedURLException; import java.nio.ByteOrder; import java.nio.file.FileSystems; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +@RunWith(Parameterized.class) public class ShardDemos { public static void main(String[] args) throws MalformedURLException { @@ -62,6 +72,30 @@ public static void main(String[] args) throws MalformedURLException { System.out.println(Arrays.toString(dataReRead)); } + @Parameterized.Parameters(name = "IndexLocation({0}), Block ByteOrder({1}), Index ByteOrder({2})") + public static Collection data() { + final ArrayList params = new ArrayList<>(); + for (IndexLocation indexLoc : IndexLocation.values()) { + for (ByteOrder blockByteOrder : new ByteOrder[]{ByteOrder.BIG_ENDIAN, ByteOrder.LITTLE_ENDIAN}) { + for (ByteOrder indexByteOrder : new ByteOrder[]{ByteOrder.BIG_ENDIAN, ByteOrder.LITTLE_ENDIAN}) { + params.add(new Object[]{indexLoc, blockByteOrder, indexByteOrder}); + } + } + } + final Object[][] paramArray = new Object[params.size()][]; + Arrays.setAll(paramArray, params::get); + return Arrays.asList(paramArray); + } + + @Parameterized.Parameter() + public IndexLocation indexLocation; + + @Parameterized.Parameter(1) + public ByteOrder dataByteOrder; + + @Parameterized.Parameter(2) + public ByteOrder indexByteOrder; + @Test public void writeReadBlockTest() { @@ -78,9 +112,9 @@ public void writeReadBlockTest() { new int[]{4, 4}, new int[]{2, 2}, DataType.UINT8, - new Codec[]{new N5BlockCodec(ByteOrder.LITTLE_ENDIAN), new GzipCompression(4)}, - new DeterministicSizeCodec[]{new BytesCodec(ByteOrder.BIG_ENDIAN), new Crc32cChecksumCodec()}, - IndexLocation.END + new Codec[]{new N5BlockCodec(dataByteOrder), new GzipCompression(4)}, + new DeterministicSizeCodec[]{new BytesCodec(indexByteOrder), new Crc32cChecksumCodec()}, + indexLocation ); writer.createDataset("shard", datasetAttributes); writer.deleteBlock("shard", 0, 0); @@ -89,9 +123,10 @@ public void writeReadBlockTest() { final DataType dataType = datasetAttributes.getDataType(); final int numElements = 2 * 2; + final HashMap writtenBlocks = new HashMap<>(); + for (int idx1 = 1; idx1 >= 0; idx1--) { for (int idx2 = 1; idx2 >= 0; idx2--) { - final long[] gridPosition = {idx1, idx2}; final DataBlock dataBlock = (DataBlock)dataType.createDataBlock(blockSize, gridPosition, numElements); byte[] data = dataBlock.getData(); @@ -101,8 +136,16 @@ public void writeReadBlockTest() { writer.writeBlock("shard", datasetAttributes, dataBlock); final DataBlock block = (DataBlock)writer.readBlock("shard", datasetAttributes, gridPosition); - Assert.assertArrayEquals("Read from shard doesn't match", data, block.getData()); + + for (Map.Entry entry : writtenBlocks.entrySet()) { + final long[] otherGridPosition = entry.getKey(); + final byte[] otherData = entry.getValue(); + final DataBlock otherBlock = (DataBlock)writer.readBlock("shard", datasetAttributes, otherGridPosition); + Assert.assertArrayEquals("Read prior write from shard no loner matches", otherData, otherBlock.getData()); + } + + writtenBlocks.put(gridPosition, data); } } } From 9183d11f72971130cf526d9b61c03b40393f6bfb Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Thu, 2 Jan 2025 15:42:50 -0500 Subject: [PATCH 074/124] feat(wip): toward an implementation of writeShard --- .../saalfeldlab/n5/DatasetAttributes.java | 2 +- .../saalfeldlab/n5/GsonKeyValueN5Reader.java | 4 +- .../saalfeldlab/n5/GsonKeyValueN5Writer.java | 19 +++- .../org/janelia/saalfeldlab/n5/N5Writer.java | 26 ++--- .../n5/ShardedDatasetAttributes.java | 50 ++++++++- .../saalfeldlab/n5/shard/InMemoryShard.java | 100 +++++++++++++----- .../janelia/saalfeldlab/n5/shard/Shard.java | 58 +++++++--- .../saalfeldlab/n5/shard/ShardIndex.java | 7 +- .../n5/shard/ShardIndexBuilder.java | 18 +++- .../saalfeldlab/n5/shard/ShardWriter.java | 35 ++---- .../saalfeldlab/n5/shard/VirtualShard.java | 15 +-- .../saalfeldlab/n5/util/GridIterator.java | 98 +++++++++++++++++ 12 files changed, 330 insertions(+), 102 deletions(-) create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/util/GridIterator.java diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java index 1cabbc3a..04ec1b35 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java @@ -221,7 +221,7 @@ private static Compression getCompressionVersion0(final String compressionVersio return null; } - private Codec[] concatenateCodecs() { + protected Codec[] concatenateCodecs() { final Codec[] allCodecs = new Codec[byteCodecs.length + 1]; allCodecs[0] = arrayCodec; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java index e6a3429f..288136ba 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java @@ -103,8 +103,8 @@ default DataBlock readBlock( final DatasetAttributes datasetAttributes, final long... gridPosition) throws N5Exception { - if (datasetAttributes instanceof ShardedDatasetAttributes) { - final ShardedDatasetAttributes shardedAttrs = (ShardedDatasetAttributes)datasetAttributes; + final ShardedDatasetAttributes shardedAttrs = datasetAttributes.getShardAttributes(); + if (shardedAttrs != null) { final long[] shardPosition = shardedAttrs.getShardPositionForBlock(gridPosition); final Shard shard = getShard(pathName, shardedAttrs, shardPosition); return shard.getBlock(gridPosition); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java index 3039f79a..ee5ab203 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java @@ -33,6 +33,7 @@ import java.util.Map; import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; +import org.janelia.saalfeldlab.n5.shard.InMemoryShard; import org.janelia.saalfeldlab.n5.shard.Shard; import com.google.gson.Gson; @@ -231,7 +232,7 @@ default void writeBlock( final String blockPath = absoluteDataBlockPath(N5URI.normalizeGroupPath(path), dataBlock.getGridPosition()); try (final LockedChannel lock = getKeyValueAccess().lockForWriting(blockPath)) { - try ( final OutputStream out = lock.newOutputStream()) { + try (final OutputStream out = lock.newOutputStream()) { DefaultBlockWriter.writeBlock(out, datasetAttributes, dataBlock); } } catch (final IOException | UncheckedIOException e) { @@ -244,10 +245,22 @@ default void writeBlock( @Override default void writeShard( final String path, - final ShardedDatasetAttributes datasetAttributes, + final DatasetAttributes datasetAttributes, final Shard shard) throws N5Exception { - throw new N5Exception("not implemented"); + if( datasetAttributes.getShardAttributes() == null ) + throw new N5IOException("Tried to write shard into a not-sharded dataset: " + path); + + final String shardPath = absoluteDataBlockPath(N5URI.normalizeGroupPath(path), shard.getGridPosition()); + try (final LockedChannel lock = getKeyValueAccess().lockForWriting(shardPath)) { + try (final OutputStream out = lock.newOutputStream()) { + InMemoryShard.fromShard(shard).write(out); + out.close(); + } + } catch (final IOException | UncheckedIOException e) { + throw new N5IOException( + "Failed to write shard " + Arrays.toString(shard.getGridPosition()) + " into dataset " + path, e); + } } @Override diff --git a/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java index 93c86bcf..b0ed462f 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java @@ -295,26 +295,26 @@ default void writeBlocks( final String datasetPath, final DatasetAttributes datasetAttributes, final DataBlock... dataBlocks) throws N5Exception { - //TODO Caleb: write this + + // TODO Caleb: write this + + // default method is naive + for (DataBlock block : dataBlocks) + writeBlock(datasetPath, datasetAttributes, block); } /** - * Writes a complete {@link Shard} to a dataset. + * Writes a {@link Shard}. * - * @param datasetPath - * dataset path - * @param datasetAttributes - * the sharded dataset attributes - * @param dataBlock - * the data block - * @param - * the data block data type - * @throws N5Exception - * if the requested dataset is not sharded + * @param datasetPath dataset path + * @param datasetAttributes the dataset attributes + * @param shard the shard + * @param the data block data type + * @throws N5Exception the exception */ void writeShard( final String datasetPath, - final ShardedDatasetAttributes datasetAttributes, + final DatasetAttributes datasetAttributes, final Shard shard) throws N5Exception; /** diff --git a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java index 0c04c372..c8d3ee6a 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java @@ -48,6 +48,10 @@ public ShardedDatasetAttributes( this.shardingCodec = codec; } + public ShardedDatasetAttributes getShardAttributes() { + return this; + } + public ShardingCodec getShardingCodec() { return shardingCodec; } @@ -62,6 +66,12 @@ public ShardingCodec getShardingCodec() { return shardingCodec.getCodecs(); } + @Override + protected Codec[] concatenateCodecs() { + + return new Codec[] { shardingCodec }; + } + /** * The size of the blocks in pixel units. * @@ -126,10 +136,11 @@ public long[] getShardPositionForBlock(final long... blockGridPosition) { /** * Returns the block at the given position relative to this shard, or null if this shard does not contain the given block. * - * @return the shard position + * @return the block position */ public long[] getBlockPositionInShard(final long[] shardPosition, final long[] blockPosition) { + // TODO check correctness final long[] shardPos = getShardPositionForBlock(blockPosition); if (!Arrays.equals(shardPosition, shardPos)) return null; @@ -143,6 +154,43 @@ public long[] getBlockPositionInShard(final long[] shardPosition, final long[] b return blockShardPos; } + /** + * Given a block's position relative to a shard, returns its position in pixels + * relative to the image. + * + * @return the block position + */ + public long[] getBlockMinFromShardPosition(final long[] shardPosition, final long[] blockPosition) { + + // is this useful? + final int[] blockSize = getBlockSize(); + final int[] shardSize = getShardSize(); + final long[] blockImagePos = new long[shardSize.length]; + for (int i = 0; i < shardSize.length; i++) { + blockImagePos[i] = (shardPosition[i] * shardSize[i]) + (blockPosition[i] * blockSize[i]); + } + + return blockImagePos; + } + + /** + * Given a block's position relative to a shard, returns its position relative + * to the image. + * + * @return the block position + */ + public long[] getBlockPositionFromShardPosition(final long[] shardPosition, final long[] blockPosition) { + + // is this useful? + final int[] shardBlockSize = getBlocksPerShard(); + final long[] blockImagePos = new long[shardSize.length]; + for (int i = 0; i < shardSize.length; i++) { + blockImagePos[i] = (shardPosition[i] * shardBlockSize[i]) + (blockPosition[i]); + } + + return blockImagePos; + } + /** * @return the number of blocks per shard */ diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java index e79bf5ac..a3d9d168 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java @@ -3,6 +3,7 @@ import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import org.apache.commons.io.output.ByteArrayOutputStream; @@ -24,6 +25,13 @@ public class InMemoryShard extends AbstractShard { * (later) */ + public InMemoryShard(final ShardedDatasetAttributes datasetAttributes, final long[] gridPosition) { + + this( datasetAttributes, gridPosition, null); + indexBuilder = new ShardIndexBuilder(this); + indexBuilder.indexLocation(datasetAttributes.getIndexLocation()); + } + public InMemoryShard(final ShardedDatasetAttributes datasetAttributes, final long[] gridPosition, ShardIndex index) { @@ -38,7 +46,7 @@ public void writeBlock(DataBlock block) { } public void addBlock(DataBlock block) { - + blocks.add(block); } @@ -52,25 +60,49 @@ public DataBlock getBlock(int i) { return blocks.get(i); } + protected IndexLocation indexLocation() { + + if (index != null) + return index.getLocation(); + else + return indexBuilder.getLocation(); + } + @Override - public void writeShard() { + public ShardIndex getIndex() { + if( index != null ) + return index; + else + return indexBuilder.build(); } - - public static void writeShard( - final OutputStream out, - InMemoryShard shard ) throws IOException { - final ShardedDatasetAttributes datasetAttributes = shard.getDatasetAttributes(); - - if( shard.getIndex().getLocation() == IndexLocation.END) - writeShardEnd( out, shard); + public void write(final OutputStream out) throws IOException { + + if (indexLocation() == IndexLocation.END) + writeShardEnd(out, this); else - writeShardStart( out, shard); + writeShardStart(out, this); + } + public static void writeShard(final OutputStream out, final Shard shard) throws IOException { + + fromShard(shard).write(out); } - protected static void writeShardEnd( + public static InMemoryShard fromShard(Shard shard) { + + if (shard instanceof InMemoryShard) + return (InMemoryShard) shard; + + final InMemoryShard inMemoryShard = new InMemoryShard(shard.getDatasetAttributes(), + shard.getGridPosition()); + + shard.forEach(blk -> inMemoryShard.addBlock(blk)); + return inMemoryShard; + } + + protected static void writeShardEndStream( final OutputStream out, InMemoryShard shard ) throws IOException { @@ -87,7 +119,7 @@ protected static void writeShardEnd( final DataBlock block = shard.getBlock(i); DefaultBlockWriter.writeBlock(cout, datasetAttributes, block); - indexBuilder.addBLock( block.getGridPosition(), offset); + indexBuilder.addBlock( block.getGridPosition(), offset); offset = cout.getByteCount(); } @@ -95,6 +127,29 @@ protected static void writeShardEnd( DefaultBlockWriter.writeBlock(out, datasetAttributes, index); } + protected static void writeShardEnd( + final OutputStream out, + InMemoryShard shard ) throws IOException { + + final ShardedDatasetAttributes datasetAttributes = shard.getDatasetAttributes(); + + final ShardIndexBuilder indexBuilder = new ShardIndexBuilder(shard); + indexBuilder.indexLocation(IndexLocation.END); + indexBuilder.setCodecs(datasetAttributes.getShardingCodec().getIndexCodecs()); + + for (int i = 0; i < shard.numBlocks(); i++) { + + final ByteArrayOutputStream os = new ByteArrayOutputStream(); + final DataBlock block = shard.getBlock(i); + DefaultBlockWriter.writeBlock(os, datasetAttributes, block); + + indexBuilder.addBlock(block.getGridPosition(), os.size()); + out.write(os.toByteArray()); + } + + ShardIndex.write(indexBuilder.build(), out); + } + protected static void writeShardStart( final OutputStream out, InMemoryShard shard ) throws IOException { @@ -102,25 +157,23 @@ protected static void writeShardStart( final ShardedDatasetAttributes datasetAttributes = shard.getDatasetAttributes(); final ShardIndexBuilder indexBuilder = new ShardIndexBuilder(shard); indexBuilder.indexLocation(IndexLocation.START); + indexBuilder.setCodecs(datasetAttributes.getShardingCodec().getIndexCodecs()); - long offset = 0; final List blockData = new ArrayList<>(shard.numBlocks()); for (int i = 0; i < shard.numBlocks(); i++) { - final DataBlock block = shard.getBlock(i); - final ByteArrayOutputStream os = new ByteArrayOutputStream(); + final DataBlock block = shard.getBlock(i); DefaultBlockWriter.writeBlock(os, datasetAttributes, block); - final byte[] data = os.toByteArray(); - blockData.add(data); - indexBuilder.addBLock( block.getGridPosition(), offset); - offset += data.length; + blockData.add(os.toByteArray()); + indexBuilder.addBlock(block.getGridPosition(), os.size()); } - - final ShardIndex index = indexBuilder.build(); + try { - DefaultBlockWriter.writeBlock(out, datasetAttributes, index); + final ByteArrayOutputStream os = new ByteArrayOutputStream(); + ShardIndex.write(indexBuilder.build(), os); + out.write(os.toByteArray()); for( byte[] data : blockData ) out.write(data); @@ -131,5 +184,4 @@ protected static void writeShardStart( } - } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java index 704e6c98..abaf7e16 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java @@ -1,11 +1,13 @@ package org.janelia.saalfeldlab.n5.shard; import java.util.Arrays; +import java.util.Iterator; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; +import org.janelia.saalfeldlab.n5.util.GridIterator; -public interface Shard { +public interface Shard extends Iterable> { long EMPTY_INDEX_NBYTES = 0xFFFFFFFFFFFFFFFFL; @@ -63,6 +65,21 @@ default long[] getBlockPosition(long... blockPosition) { final long[] shardPos = getDatasetAttributes().getShardPositionForBlock(blockPosition); return getDatasetAttributes().getBlockPositionInShard(shardPos, blockPosition); } + + /** + * Returns the position in pixels of the + * + * @return the min + */ + default long[] getShardMinPosition(long... shardPosition) { + + final int[] shardSize = getSize(); + final long[] shardMin = new long[shardSize.length]; + for (int i = 0; i < shardSize.length; i++) { + shardMin[i] = shardPosition[i] * shardSize[i]; + } + return shardMin; + } /** * Returns the position of the shard containing the block with the given block position. @@ -84,7 +101,10 @@ default long[] getShard(long... blockPosition) { public void writeBlock(DataBlock block); - public void writeShard(); + default Iterator> iterator() { + + return new DataBlockIterator(this); + } default DataBlock[] getAllBlocks(long... position) { //TODO Caleb: Do we want this? @@ -111,21 +131,27 @@ public static long flatIndex(long[] gridPosition, int[] gridSize) { } return index; } - - /** - * - * @param - * the type - * @param dataBlocks - * an array - * @return a shard containing the given blocks - */ - public static Shard fromDataBlocks( - final ShardedDatasetAttributes attributes, - final DataBlock[] dataBlocks) { - // TODO implement me - return null; + public static class DataBlockIterator implements Iterator> { + + private final GridIterator it; + private final Shard shard; + + public DataBlockIterator(final Shard shard) { + + this.shard = shard; + it = new GridIterator(shard.getBlockGridSize()); + } + + @Override + public boolean hasNext() { + return it.hasNext(); + } + + @Override + public DataBlock next() { + return shard.getBlock(it.next()); + } } /** diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java index 1c840b1e..b400b863 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java @@ -144,13 +144,18 @@ public static void write( final long start = index.location == IndexLocation.START ? 0 : keyValueAccess.size(key); try (final LockedChannel lockedChannel = keyValueAccess.lockForWriting(key, start, index.numBytes())) { try (final OutputStream os = lockedChannel.newOutputStream()) { - DefaultBlockWriter.writeBlock(os, index.getIndexAttributes(), index); + write(index, os); } } catch (final IOException | UncheckedIOException e) { throw new N5IOException("Failed to write shard index to " + key, e); } } + public static void write(final ShardIndex index, OutputStream out) throws IOException { + + DefaultBlockWriter.writeBlock(out, index.getIndexAttributes(), index); + } + private DatasetAttributes getIndexAttributes() { final DatasetAttributes indexAttributes = diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java index 35073c66..7a7480f8 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java @@ -9,7 +9,7 @@ public class ShardIndexBuilder { private final Shard shard; - private final ShardIndex temporaryIndex; + private ShardIndex temporaryIndex; private IndexLocation location = IndexLocation.END; @@ -35,16 +35,30 @@ public ShardIndex build() { public ShardIndexBuilder indexLocation(IndexLocation location) { this.location = location; + this.temporaryIndex = new ShardIndex(shard.getBlockGridSize(), location); + + if (location == IndexLocation.END) + currentOffset = 0; + else + currentOffset = temporaryIndex.numBytes(); + return this; } + public IndexLocation getLocation() { + + return this.location; + } + public ShardIndexBuilder setCodecs(DeterministicSizeCodec... codecs) { this.codecs = codecs; + final ShardIndex newIndex = new ShardIndex(temporaryIndex.getSize(), temporaryIndex.getLocation(), codecs); + this.temporaryIndex = newIndex; return this; } - public ShardIndexBuilder addBLock(long[] blockPosition, long numBytes) { + public ShardIndexBuilder addBlock(long[] blockPosition, long numBytes) { final long[] blockPositionInShard = shard.getDatasetAttributes().getBlockPositionInShard( shard.getGridPosition(), diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java index 4a288878..792019fd 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java @@ -19,7 +19,7 @@ public class ShardWriter { private final List> blocks; - private final ShardedDatasetAttributes datasetAttributes; + private ShardedDatasetAttributes attributes; private ByteBuffer blockSizes; @@ -32,15 +32,14 @@ public class ShardWriter { public ShardWriter(final ShardedDatasetAttributes datasetAttributes) { blocks = new ArrayList<>(); - this.datasetAttributes = datasetAttributes; + attributes = datasetAttributes; } public void reset() { blocks.clear(); - blockSizes = null; blockBytes.clear(); - + blockSizes = null; indexData = null; } @@ -49,21 +48,12 @@ public void addBlock(final DataBlock block) { blocks.add(block); } - public void write(final OutputStream out) throws IOException { - - // TODO need codecs - - // prepareForWriting(); - // if (datasetAttributes.getShardingConfiguration().getIndexLocation()) { - // writeIndexes(out); - // writeBlocks(out); - // } else { - // writeBlocks(out); - // writeIndexes(out); - // } + public void write(final Shard shard, final OutputStream out) throws IOException { + + attributes = shard.getDatasetAttributes(); prepareForWritingDataBlock(); - if (datasetAttributes.getIndexLocation() == ShardingCodec.IndexLocation.START) { + if (attributes.getIndexLocation() == ShardingCodec.IndexLocation.START) { writeIndexBlock(out); writeBlocks(out); } else { @@ -77,14 +67,14 @@ private void prepareForWritingDataBlock() throws IOException { // final ShardingProperties shardProps = new ShardingProperties(datasetAttributes); // indexData = new ShardIndexDataBlock(shardProps.getIndexDimensions()); - indexData = datasetAttributes.createIndex(); + indexData = attributes.createIndex(); blockBytes = new ArrayList<>(); long cumulativeBytes = 0; final long[] shardPosition = new long[1]; for (int i = 0; i < blocks.size(); i++) { try (final ByteArrayOutputStream blockOut = new ByteArrayOutputStream()) { - DefaultBlockWriter.writeBlock(blockOut, datasetAttributes, blocks.get(i)); + DefaultBlockWriter.writeBlock(blockOut, attributes, blocks.get(i)); System.out.println(String.format("block %d is %d bytes", i, blockOut.size())); shardPosition[0] = i; @@ -108,7 +98,7 @@ private void prepareForWriting() throws IOException { try (final ByteArrayOutputStream blockOut = new ByteArrayOutputStream()) { - DefaultBlockWriter.writeBlock(blockOut, datasetAttributes, blocks.get(i)); + DefaultBlockWriter.writeBlock(blockOut, attributes, blocks.get(i)); System.out.println(String.format("block %d is %d bytes", i, blockOut.size())); blockIndexes.putLong(cumulativeBytes); @@ -126,11 +116,6 @@ private void writeBlocks(final OutputStream out) throws IOException { out.write(bytes); } - private void writeIndexes(final OutputStream out) throws IOException { - - out.write(blockSizes.array()); - } - private void writeIndexBlock(final OutputStream out) throws IOException { final DataOutputStream dos = new DataOutputStream(out); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java index 27099432..1a769887 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java @@ -4,10 +4,8 @@ import java.io.InputStream; import java.io.OutputStream; import java.io.UncheckedIOException; -import java.util.Arrays; import org.janelia.saalfeldlab.n5.DataBlock; -import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.DefaultBlockReader; import org.janelia.saalfeldlab.n5.DefaultBlockWriter; import org.janelia.saalfeldlab.n5.KeyValueAccess; @@ -39,12 +37,12 @@ public DataBlock getBlock(long... blockGridPosition) { final ShardIndex idx = getIndex(); - final long startByte = idx.getOffset(relativePosition); if (startByte == Shard.EMPTY_INDEX_NBYTES ) return null; + System.out.println("read from start: " + startByte); final long size = idx.getNumBytes(relativePosition); try (final LockedChannel lockedChannel = keyValueAccess.lockForReading(path, startByte, size)) { try ( final InputStream channelIn = lockedChannel.newInputStream()) { @@ -95,17 +93,6 @@ public void writeBlock(final DataBlock block) { } } - @Override - public void writeShard() { - - // TODO - } - - private static int numBlockElements(DatasetAttributes datasetAttributes) { - - return Arrays.stream(datasetAttributes.getBlockSize()).reduce(1, (x, y) -> x * y); - } - public ShardIndex createIndex() { // Empty index of the correct size diff --git a/src/main/java/org/janelia/saalfeldlab/n5/util/GridIterator.java b/src/main/java/org/janelia/saalfeldlab/n5/util/GridIterator.java new file mode 100644 index 00000000..1fcb118c --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/util/GridIterator.java @@ -0,0 +1,98 @@ +package org.janelia.saalfeldlab.n5.util; + +import java.util.Iterator; + +/** + * Essentially imglib2's IntervalIterator, but N5 does not depend on imglib2. + */ +public class GridIterator implements Iterator { + + final protected long[] dimensions; + + final protected long[] steps; + + final protected long[] position; + + final protected int lastIndex; + + protected int index = -1; + + public GridIterator(final long[] dimensions) { + + final int n = dimensions.length; + this.dimensions = new long[n]; + this.position = new long[n]; + steps = new long[n]; + + final int m = n - 1; + long k = steps[0] = 1; + for (int d = 0; d < m;) { + final long dimd = dimensions[d]; + this.dimensions[d] = dimd; + k *= dimd; + steps[++d] = k; + } + final long dimm = dimensions[m]; + this.dimensions[m] = dimm; + lastIndex = (int)(k * dimm - 1); + } + + public GridIterator(final int[] dimensions) { + + this(int2long(dimensions)); + } + + public void fwd() { + ++index; + } + + public void reset() { + index = -1; + } + + @Override + public boolean hasNext() { + return index < lastIndex; + } + + @Override + public long[] next() { + fwd(); + indexToPosition(index, dimensions, position); + return position; + } + + public int getIndex() { + return index; + } + + final static public void indexToPosition(long index, final long[] dimensions, final long[] position) { + final int maxDim = dimensions.length - 1; + for (int d = 0; d < maxDim; ++d) { + final long j = index / dimensions[d]; + position[d] = index - j * dimensions[d]; + index = j; + } + position[maxDim] = index; + + } + + final static public int[] long2int(final long[] a) { + final int[] i = new int[a.length]; + + for (int d = 0; d < a.length; ++d) + i[d] = (int) a[d]; + + return i; + } + + final static public long[] int2long(final int[] i) { + final long[] l = new long[i.length]; + + for (int d = 0; d < l.length; ++d) + l[d] = i[d]; + + return l; + } + +} From d4142c5fe1df9091342d8e5e14a32fb9c69669ae Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Thu, 2 Jan 2025 15:43:09 -0500 Subject: [PATCH 075/124] chore: stop using deprecated BoundedInputStream constructor --- .../org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java b/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java index 317c2c5d..05afb2d1 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccess.java @@ -163,7 +163,7 @@ public Writer newWriter() throws IOException { @Override public InputStream newInputStream() throws IOException { - return new BoundedInputStream(Channels.newInputStream(channel), len); + return BoundedInputStream.builder().setInputStream(Channels.newInputStream(channel)).setMaxCount(len).get(); } @Override From bc5d103662c0d993e78bb589db34ae94c866d400 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Thu, 2 Jan 2025 15:51:06 -0500 Subject: [PATCH 076/124] fix: be quiet --- src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java index 1a769887..7765d42b 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java @@ -42,7 +42,6 @@ public DataBlock getBlock(long... blockGridPosition) { if (startByte == Shard.EMPTY_INDEX_NBYTES ) return null; - System.out.println("read from start: " + startByte); final long size = idx.getNumBytes(relativePosition); try (final LockedChannel lockedChannel = keyValueAccess.lockForReading(path, startByte, size)) { try ( final InputStream channelIn = lockedChannel.newInputStream()) { From 1c9e6018ae9de65e40fed328d03519f75802e087 Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Thu, 2 Jan 2025 15:58:57 -0500 Subject: [PATCH 077/124] fix(test): failing on github actions --- src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java b/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java index f911ec6d..5d58657d 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java @@ -43,6 +43,7 @@ public void testSerialization() { new IdentityCodec() } ); + writer.createGroup("shard"); //Should already exist, but this will ensure. writer.setAttribute("shard", "/", datasetAttributes); final DatasetAttributes deserialized = writer.getAttribute("shard", "/", DatasetAttributes.class); From b62dc00db0805ad4be3297eac94eb1fbbb7ef3ad Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Thu, 2 Jan 2025 16:31:59 -0500 Subject: [PATCH 078/124] fix/test: add writeShardTest * fix ShardIndexBuilder --- .../n5/shard/ShardIndexBuilder.java | 19 +++--- .../saalfeldlab/n5/shard/ShardDemos.java | 58 ++++++++++++++++++- 2 files changed, 67 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java index 7a7480f8..c8511dc8 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java @@ -36,12 +36,7 @@ public ShardIndexBuilder indexLocation(IndexLocation location) { this.location = location; this.temporaryIndex = new ShardIndex(shard.getBlockGridSize(), location); - - if (location == IndexLocation.END) - currentOffset = 0; - else - currentOffset = temporaryIndex.numBytes(); - + updateInitialOffset(); return this; } @@ -53,8 +48,9 @@ public IndexLocation getLocation() { public ShardIndexBuilder setCodecs(DeterministicSizeCodec... codecs) { this.codecs = codecs; - final ShardIndex newIndex = new ShardIndex(temporaryIndex.getSize(), temporaryIndex.getLocation(), codecs); + final ShardIndex newIndex = new ShardIndex(shard.getBlockGridSize(), temporaryIndex.getLocation(), codecs); this.temporaryIndex = newIndex; + updateInitialOffset(); return this; } @@ -78,4 +74,13 @@ public ShardIndexBuilder addBlock(long[] blockPosition, long numBytes) { return this; } + private void updateInitialOffset() { + + if (location == IndexLocation.END) + currentOffset = 0; + else + currentOffset = temporaryIndex.numBytes(); + + } + } diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java index f06e9dae..ee4a8411 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java @@ -1,15 +1,12 @@ package org.janelia.saalfeldlab.n5.shard; import com.google.gson.GsonBuilder; -import org.janelia.saalfeldlab.n5.Bzip2Compression; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DataType; import org.janelia.saalfeldlab.n5.FileSystemKeyValueAccess; import org.janelia.saalfeldlab.n5.GzipCompression; -import org.janelia.saalfeldlab.n5.Lz4Compression; import org.janelia.saalfeldlab.n5.N5Writer; import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; -import org.janelia.saalfeldlab.n5.XzCompression; import org.janelia.saalfeldlab.n5.codec.BytesCodec; import org.janelia.saalfeldlab.n5.codec.Codec; import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; @@ -150,4 +147,59 @@ public void writeReadBlockTest() { } } + @Test + public void writeReadShardTest() { + + final N5Factory factory = new N5Factory(); + final GsonBuilder gsonBuilder = new GsonBuilder(); + gsonBuilder.setPrettyPrinting(); + factory.gsonBuilder(gsonBuilder); + factory.cacheAttributes(false); + + final N5Writer writer = factory.openWriter("src/test/resources/shardExamples/test.n5"); + + final ShardedDatasetAttributes datasetAttributes = new ShardedDatasetAttributes( + new long[]{4, 4}, + new int[]{4, 4}, + new int[]{2, 2}, + DataType.UINT8, + new Codec[]{new N5BlockCodec(dataByteOrder)}, + new DeterministicSizeCodec[]{new BytesCodec(indexByteOrder), new Crc32cChecksumCodec()}, + indexLocation + ); + writer.createDataset("wholeShard", datasetAttributes); + writer.deleteBlock("wholeShard", 0, 0); + + final int[] blockSize = datasetAttributes.getBlockSize(); + final DataType dataType = datasetAttributes.getDataType(); + final int numElements = 2 * 2; + + final HashMap writtenBlocks = new HashMap<>(); + + final InMemoryShard shard = new InMemoryShard(datasetAttributes, new long[]{0, 0}); + + for (int idx1 = 1; idx1 >= 0; idx1--) { + for (int idx2 = 1; idx2 >= 0; idx2--) { + final long[] gridPosition = {idx1, idx2}; + final DataBlock dataBlock = (DataBlock)dataType.createDataBlock(blockSize, gridPosition, numElements); + byte[] data = dataBlock.getData(); + for (int i = 0; i < data.length; i++) { + data[i] = (byte)((idx1 * 100) + (idx2 * 10) + i); + } + + shard.addBlock(dataBlock); + writtenBlocks.put(gridPosition, data); + } + } + + writer.writeShard("wholeShard", datasetAttributes, shard); + + for (Map.Entry entry : writtenBlocks.entrySet()) { + final long[] otherGridPosition = entry.getKey(); + final byte[] otherData = entry.getValue(); + final DataBlock otherBlock = (DataBlock)writer.readBlock("wholeShard", datasetAttributes, otherGridPosition); + Assert.assertArrayEquals("Read prior write from shard no loner matches", otherData, otherBlock.getData()); + } + } + } From 1ced5708260110a7699031772c09dcce8e26e556 Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Thu, 2 Jan 2025 17:03:53 -0500 Subject: [PATCH 079/124] feat: writeShardEndStream --- .../saalfeldlab/n5/GsonKeyValueN5Writer.java | 1 - .../saalfeldlab/n5/shard/InMemoryShard.java | 29 +++++++++++++------ .../n5/shard/ShardIndexBuilder.java | 2 +- .../saalfeldlab/n5/shard/ShardDemos.java | 1 + 4 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java index ee5ab203..53f95c5e 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java @@ -255,7 +255,6 @@ default void writeShard( try (final LockedChannel lock = getKeyValueAccess().lockForWriting(shardPath)) { try (final OutputStream out = lock.newOutputStream()) { InMemoryShard.fromShard(shard).write(out); - out.close(); } } catch (final IOException | UncheckedIOException e) { throw new N5IOException( diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java index a3d9d168..61194500 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java @@ -8,6 +8,7 @@ import org.apache.commons.io.output.ByteArrayOutputStream; import org.apache.commons.io.output.CountingOutputStream; +import org.apache.commons.io.output.ProxyOutputStream; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DefaultBlockWriter; import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; @@ -80,7 +81,7 @@ public ShardIndex getIndex() { public void write(final OutputStream out) throws IOException { if (indexLocation() == IndexLocation.END) - writeShardEnd(out, this); + writeShardEndStream(out, this); else writeShardStart(out, this); } @@ -110,21 +111,31 @@ protected static void writeShardEndStream( final ShardIndexBuilder indexBuilder = new ShardIndexBuilder(shard); indexBuilder.indexLocation(IndexLocation.END); + indexBuilder.setCodecs(datasetAttributes.getShardingCodec().getIndexCodecs()); - final CountingOutputStream cout = new CountingOutputStream(out); - - long offset = 0; + final ProxyOutputStream nop = new ProxyOutputStream(out) { + + @Override public void close() { + //nop + } + }; + + final CountingOutputStream cout = new CountingOutputStream(nop); + + long bytesWritten = 0; for (int i = 0; i < shard.numBlocks(); i++) { final DataBlock block = shard.getBlock(i); DefaultBlockWriter.writeBlock(cout, datasetAttributes, block); - - indexBuilder.addBlock( block.getGridPosition(), offset); - offset = cout.getByteCount(); + + + final long size = cout.getByteCount() - bytesWritten; + bytesWritten = cout.getByteCount(); + + indexBuilder.addBlock( block.getGridPosition(), size); } - final ShardIndex index = indexBuilder.build(); - DefaultBlockWriter.writeBlock(out, datasetAttributes, index); + ShardIndex.write(indexBuilder.build(), out); } protected static void writeShardEnd( diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java index c8511dc8..cf16f719 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java @@ -55,7 +55,7 @@ public ShardIndexBuilder setCodecs(DeterministicSizeCodec... codecs) { } public ShardIndexBuilder addBlock(long[] blockPosition, long numBytes) { - + //TODO Caleb: Maybe move to ShardIndex? final long[] blockPositionInShard = shard.getDatasetAttributes().getBlockPositionInShard( shard.getGridPosition(), blockPosition); diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java index ee4a8411..0ab1c466 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java @@ -71,6 +71,7 @@ public static void main(String[] args) throws MalformedURLException { @Parameterized.Parameters(name = "IndexLocation({0}), Block ByteOrder({1}), Index ByteOrder({2})") public static Collection data() { + final ArrayList params = new ArrayList<>(); for (IndexLocation indexLoc : IndexLocation.values()) { for (ByteOrder blockByteOrder : new ByteOrder[]{ByteOrder.BIG_ENDIAN, ByteOrder.LITTLE_ENDIAN}) { From 0e045cd05deac296b331e22144da044031e21e6c Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Fri, 3 Jan 2025 10:55:38 -0500 Subject: [PATCH 080/124] feat: serialize shardSize in DatasetAttributes for n5 --- .../saalfeldlab/n5/DatasetAttributes.java | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java index 04ec1b35..dc2b984c 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java @@ -43,6 +43,7 @@ public class DatasetAttributes implements Serializable { public static final String DIMENSIONS_KEY = "dimensions"; public static final String BLOCK_SIZE_KEY = "blockSize"; + public static final String SHARD_SIZE_KEY = "shardSize"; public static final String DATA_TYPE_KEY = "dataType"; public static final String COMPRESSION_KEY = "compression"; public static final String CODEC_KEY = "codecs"; @@ -250,6 +251,12 @@ public static class DatasetAttributesAdapter implements JsonSerializer blocksPerShard[i] * blockSize[i]); return new ShardedDatasetAttributes( dimensions, shardSize, @@ -287,6 +291,12 @@ public static class DatasetAttributesAdapter implements JsonSerializer Date: Fri, 3 Jan 2025 11:39:04 -0500 Subject: [PATCH 081/124] test: BytesTest operates on n5 container --- .../saalfeldlab/n5/codec/BytesTests.java | 4 +-- .../shardExamples/test.n5/attributes.json | 3 ++ .../test.n5/mid_sharded/attributes.json | 30 +++++++++++++++++++ 3 files changed, 35 insertions(+), 2 deletions(-) create mode 100644 src/test/resources/shardExamples/test.n5/attributes.json create mode 100644 src/test/resources/shardExamples/test.n5/mid_sharded/attributes.json diff --git a/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java b/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java index 5d58657d..66ef8632 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java @@ -27,9 +27,9 @@ public void testSerialization() { gsonBuilder.registerTypeAdapter(ByteOrder.class, BytesCodec.byteOrderAdapter); factory.gsonBuilder(gsonBuilder); - final N5Writer reader = factory.openWriter("n5:src/test/resources/shardExamples/test.zarr"); + final N5Writer reader = factory.openWriter("n5:src/test/resources/shardExamples/test.n5"); final Codec bytes = reader.getAttribute("mid_sharded", "codecs[0]/configuration/codecs[0]", Codec.class); - assertTrue("as BytesCodec", bytes instanceof N5BlockCodec); + assertTrue("as BytesCodec", bytes instanceof BytesCodec); final N5Writer writer = factory.openWriter("n5:src/test/resources/shardExamples/test.n5"); diff --git a/src/test/resources/shardExamples/test.n5/attributes.json b/src/test/resources/shardExamples/test.n5/attributes.json new file mode 100644 index 00000000..573b0188 --- /dev/null +++ b/src/test/resources/shardExamples/test.n5/attributes.json @@ -0,0 +1,3 @@ +{ + "n5": "4.0.0" +} \ No newline at end of file diff --git a/src/test/resources/shardExamples/test.n5/mid_sharded/attributes.json b/src/test/resources/shardExamples/test.n5/mid_sharded/attributes.json new file mode 100644 index 00000000..b9e575b2 --- /dev/null +++ b/src/test/resources/shardExamples/test.n5/mid_sharded/attributes.json @@ -0,0 +1,30 @@ +{ + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 2, + 3 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + } + ], + "index_location": "end" + } + } + ] +} From cc5b8c531c2ff1225ea657c005abbaf18569eea2 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Fri, 3 Jan 2025 16:33:39 -0500 Subject: [PATCH 082/124] feat: ShardedDatasetAttributes validate shard/block size on construction --- .../n5/ShardedDatasetAttributes.java | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java index c8d3ee6a..835a3a8b 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java @@ -28,6 +28,13 @@ public ShardedDatasetAttributes ( final IndexLocation indexLocation ) { super(dimensions, blockSize, dataType, null, blocksCodecs); + + if (!validateShardBlockSize(shardSize, blockSize)) { + throw new N5Exception(String.format("Invalid shard %s / block size %s", + Arrays.toString(shardSize), + Arrays.toString(blockSize))); + } + this.shardSize = shardSize; this.shardingCodec = new ShardingCodec( blockSize, @@ -48,6 +55,26 @@ public ShardedDatasetAttributes( this.shardingCodec = codec; } + /** + * Returns whether the given shard and block sizes are valid. Specifically, is + * the shard size a multiple of the block size in every dimension. + * + * @param shardSize size of the shard in pixels + * @param blockSize size of a block in pixels + * @return + */ + public static boolean validateShardBlockSize(final int[] shardSize, final int[] blockSize) { + + if (shardSize.length != blockSize.length) + return false; + + for (int i = 0; i < shardSize.length; i++) { + if (shardSize[i] % blockSize[i] != 0) + return false; + } + return true; + } + public ShardedDatasetAttributes getShardAttributes() { return this; } From 367b987c0406da4af4d27cea1633a3f6b26062fe Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Fri, 3 Jan 2025 13:53:11 -0500 Subject: [PATCH 083/124] refactor: remove `DatasetAttributes#getShardedAttributes()` unnecessary as you can do the same with an instance check --- .../saalfeldlab/n5/DatasetAttributes.java | 21 ++----------------- .../saalfeldlab/n5/GsonKeyValueN5Reader.java | 4 ++-- .../saalfeldlab/n5/GsonKeyValueN5Writer.java | 5 +---- .../org/janelia/saalfeldlab/n5/N5Writer.java | 2 +- 4 files changed, 6 insertions(+), 26 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java index dc2b984c..e67761c8 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java @@ -149,22 +149,6 @@ public BytesCodec[] getCodecs() { return byteCodecs; } - public ShardedDatasetAttributes getShardAttributes() { - - if (getArrayCodec() instanceof ShardingCodec) { - - final ShardingCodec shardingCodec = (ShardingCodec)getArrayCodec(); - return new ShardedDatasetAttributes( - dimensions, - blockSize, - shardingCodec.getBlockSize(), - getDataType(), - shardingCodec); - - } else - return null; - } - public HashMap asMap() { final HashMap map = new HashMap<>(); @@ -292,9 +276,8 @@ public static class DatasetAttributesAdapter implements JsonSerializer readBlock( final DatasetAttributes datasetAttributes, final long... gridPosition) throws N5Exception { - final ShardedDatasetAttributes shardedAttrs = datasetAttributes.getShardAttributes(); - if (shardedAttrs != null) { + if (datasetAttributes instanceof ShardedDatasetAttributes) { + final ShardedDatasetAttributes shardedAttrs = (ShardedDatasetAttributes) datasetAttributes; final long[] shardPosition = shardedAttrs.getShardPositionForBlock(gridPosition); final Shard shard = getShard(pathName, shardedAttrs, shardPosition); return shard.getBlock(gridPosition); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java index 53f95c5e..28e93806 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java @@ -245,12 +245,9 @@ default void writeBlock( @Override default void writeShard( final String path, - final DatasetAttributes datasetAttributes, + final ShardedDatasetAttributes datasetAttributes, final Shard shard) throws N5Exception { - if( datasetAttributes.getShardAttributes() == null ) - throw new N5IOException("Tried to write shard into a not-sharded dataset: " + path); - final String shardPath = absoluteDataBlockPath(N5URI.normalizeGroupPath(path), shard.getGridPosition()); try (final LockedChannel lock = getKeyValueAccess().lockForWriting(shardPath)) { try (final OutputStream out = lock.newOutputStream()) { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java index b0ed462f..2927a108 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java @@ -314,7 +314,7 @@ default void writeBlocks( */ void writeShard( final String datasetPath, - final DatasetAttributes datasetAttributes, + final ShardedDatasetAttributes datasetAttributes, final Shard shard) throws N5Exception; /** From 20a9677dbfd0f7d5f5bcce13aedc9c5fbf56a1c3 Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Fri, 3 Jan 2025 16:25:50 -0500 Subject: [PATCH 084/124] feat: writeBlocks aggregate shard --- .../saalfeldlab/n5/GsonKeyValueN5Writer.java | 29 +++++++++++++ .../org/janelia/saalfeldlab/n5/N5Writer.java | 2 - .../n5/ShardedDatasetAttributes.java | 1 - .../saalfeldlab/n5/shard/AbstractShard.java | 6 --- .../saalfeldlab/n5/shard/InMemoryShard.java | 43 ++++++++++--------- .../janelia/saalfeldlab/n5/shard/Shard.java | 2 +- 6 files changed, 53 insertions(+), 30 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java index 28e93806..e3ceb759 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java @@ -29,6 +29,7 @@ import java.io.OutputStream; import java.io.UncheckedIOException; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.Map; @@ -214,6 +215,34 @@ default boolean removeAttributes(final String pathName, final List attri return removed; } + @Override default void writeBlocks(final String datasetPath, final DatasetAttributes datasetAttributes, final DataBlock... dataBlocks) throws N5Exception { + + if (datasetAttributes instanceof ShardedDatasetAttributes) { + final ShardedDatasetAttributes shardAttributes = (ShardedDatasetAttributes)datasetAttributes; + /* Group by shard index */ + final HashMap> shardBlockMap = new HashMap<>(); + + for (DataBlock dataBlock : dataBlocks) { + final long[] shardPosition = shardAttributes.getShardPositionForBlock(dataBlock.getGridPosition()); + final int shardHash = Arrays.hashCode(shardPosition); + if (!shardBlockMap.containsKey(shardHash)) + shardBlockMap.put(shardHash, new InMemoryShard<>(shardAttributes, shardPosition)); + final InMemoryShard shard = shardBlockMap.get(shardHash); + shard.addBlock(dataBlock); + } + + for (InMemoryShard shard : shardBlockMap.values()) { + writeShard(datasetPath, shardAttributes, shard); + } + } else { + /* Just write each block */ + for (DataBlock dataBlock : dataBlocks) { + writeBlock(datasetPath, datasetAttributes, dataBlock); + } + } + + } + @Override default void writeBlock( final String path, diff --git a/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java index 2927a108..4883bd44 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java @@ -296,8 +296,6 @@ default void writeBlocks( final DatasetAttributes datasetAttributes, final DataBlock... dataBlocks) throws N5Exception { - // TODO Caleb: write this - // default method is naive for (DataBlock block : dataBlocks) writeBlock(datasetPath, datasetAttributes, block); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java index 835a3a8b..8ee40baf 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java @@ -150,7 +150,6 @@ public int[] getBlocksPerShard() { */ public long[] getShardPositionForBlock(final long... blockGridPosition) { - // TODO have this return a shard final int[] blocksPerShard = getBlocksPerShard(); final long[] shardGridPosition = new long[blockGridPosition.length]; for (int i = 0; i < shardGridPosition.length; i++) { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java index fc30eaa6..6aecd1a4 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java @@ -43,12 +43,6 @@ public long[] getGridPosition() { return gridPosition; } - @Override - public DataBlock getBlock(long... position) { - - return null; - } - @Override public ShardIndex getIndex() { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java index 61194500..7e699a1b 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java @@ -4,6 +4,7 @@ import java.io.OutputStream; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import org.apache.commons.io.output.ByteArrayOutputStream; @@ -16,8 +17,8 @@ public class InMemoryShard extends AbstractShard { - private List> blocks; - + /* Map of a hash of the DataBlocks `gridPosition` to the block */ + private final HashMap> blocks; private ShardIndexBuilder indexBuilder; /* @@ -26,9 +27,9 @@ public class InMemoryShard extends AbstractShard { * (later) */ - public InMemoryShard(final ShardedDatasetAttributes datasetAttributes, final long[] gridPosition) { + public InMemoryShard(final ShardedDatasetAttributes datasetAttributes, final long[] shardPosition) { - this( datasetAttributes, gridPosition, null); + this( datasetAttributes, shardPosition, null); indexBuilder = new ShardIndexBuilder(this); indexBuilder.indexLocation(datasetAttributes.getIndexLocation()); } @@ -37,7 +38,17 @@ public InMemoryShard(final ShardedDatasetAttributes datasetAttributes, final lon ShardIndex index) { super(datasetAttributes, gridPosition, index); - blocks = new ArrayList<>(); + blocks = new HashMap<>(); + } + + private void storeBlock(DataBlock block) { + + blocks.put(Arrays.hashCode(block.getGridPosition()), block); + } + + @Override public DataBlock getBlock(long... blockGridPosition) { + + return blocks.get(Arrays.hashCode(blockGridPosition)); } @Override @@ -48,7 +59,7 @@ public void writeBlock(DataBlock block) { public void addBlock(DataBlock block) { - blocks.add(block); + storeBlock(block); } public int numBlocks() { @@ -56,9 +67,9 @@ public int numBlocks() { return blocks.size(); } - public DataBlock getBlock(int i) { + public List> getBlocks() { - return blocks.get(i); + return new ArrayList<>(blocks.values()); } protected IndexLocation indexLocation() { @@ -113,8 +124,8 @@ protected static void writeShardEndStream( indexBuilder.indexLocation(IndexLocation.END); indexBuilder.setCodecs(datasetAttributes.getShardingCodec().getIndexCodecs()); + // Neccesary to stop `close()` when writing blocks from closing out base OutputStream final ProxyOutputStream nop = new ProxyOutputStream(out) { - @Override public void close() { //nop } @@ -123,12 +134,8 @@ protected static void writeShardEndStream( final CountingOutputStream cout = new CountingOutputStream(nop); long bytesWritten = 0; - for (int i = 0; i < shard.numBlocks(); i++) { - - final DataBlock block = shard.getBlock(i); + for (DataBlock block : shard.getBlocks()) { DefaultBlockWriter.writeBlock(cout, datasetAttributes, block); - - final long size = cout.getByteCount() - bytesWritten; bytesWritten = cout.getByteCount(); @@ -148,10 +155,8 @@ protected static void writeShardEnd( indexBuilder.indexLocation(IndexLocation.END); indexBuilder.setCodecs(datasetAttributes.getShardingCodec().getIndexCodecs()); - for (int i = 0; i < shard.numBlocks(); i++) { - + for (DataBlock block : shard.getBlocks()) { final ByteArrayOutputStream os = new ByteArrayOutputStream(); - final DataBlock block = shard.getBlock(i); DefaultBlockWriter.writeBlock(os, datasetAttributes, block); indexBuilder.addBlock(block.getGridPosition(), os.size()); @@ -171,10 +176,8 @@ protected static void writeShardStart( indexBuilder.setCodecs(datasetAttributes.getShardingCodec().getIndexCodecs()); final List blockData = new ArrayList<>(shard.numBlocks()); - for (int i = 0; i < shard.numBlocks(); i++) { - + for (DataBlock block : shard.getBlocks()) { final ByteArrayOutputStream os = new ByteArrayOutputStream(); - final DataBlock block = shard.getBlock(i); DefaultBlockWriter.writeBlock(os, datasetAttributes, block); blockData.add(os.toByteArray()); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java index abaf7e16..eef1c960 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java @@ -97,7 +97,7 @@ default long[] getShard(long... blockPosition) { return shardGridPosition; } - public DataBlock getBlock(long... position); + public DataBlock getBlock(long... blockGridPosition); public void writeBlock(DataBlock block); From c3c3ceb20dfa409ff6aae340d321c787caa52571 Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Fri, 3 Jan 2025 16:27:37 -0500 Subject: [PATCH 085/124] fix: index offset calculation --- .../org/janelia/saalfeldlab/n5/shard/ShardIndex.java | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java index b400b863..561e9ea7 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java @@ -82,14 +82,11 @@ public void set(long offset, long nbytes, long[] gridPosition) { private int getOffsetIndex(long... gridPosition) { - int idx = 0; - long stride = 2; - for (int i = 0; i < gridPosition.length; i++) { - idx += gridPosition[i] * stride; - stride *= size[i]; + int idx = (int) gridPosition[0]; + for (int i = 1; i < gridPosition.length; i++) { + idx += gridPosition[i] * size[i]; } - - return idx; + return idx * 2; } private int getNumBytesIndex(long... gridPosition) { From 5badbb7110e14ffcff999d577e1f62058f5dbfa3 Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Fri, 3 Jan 2025 16:28:15 -0500 Subject: [PATCH 086/124] feat(test): wip shard writeBlocks refactor(test): use temp writer via N5FSTest --- .../saalfeldlab/n5/AbstractN5Test.java | 2 +- .../shard/{ShardDemos.java => ShardTest.java} | 167 ++++++++++-------- 2 files changed, 91 insertions(+), 78 deletions(-) rename src/test/java/org/janelia/saalfeldlab/n5/shard/{ShardDemos.java => ShardTest.java} (57%) diff --git a/src/test/java/org/janelia/saalfeldlab/n5/AbstractN5Test.java b/src/test/java/org/janelia/saalfeldlab/n5/AbstractN5Test.java index d4d3591b..a1916327 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/AbstractN5Test.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/AbstractN5Test.java @@ -92,7 +92,7 @@ public abstract class AbstractN5Test { protected final HashSet tempWriters = new HashSet<>(); - protected final N5Writer createTempN5Writer() { + public final N5Writer createTempN5Writer() { try { return createTempN5Writer(tempN5Location()); diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java similarity index 57% rename from src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java rename to src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java index 0ab1c466..2a39e161 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardDemos.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java @@ -1,10 +1,12 @@ package org.janelia.saalfeldlab.n5.shard; -import com.google.gson.GsonBuilder; +import org.janelia.saalfeldlab.n5.ByteArrayDataBlock; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DataType; -import org.janelia.saalfeldlab.n5.FileSystemKeyValueAccess; import org.janelia.saalfeldlab.n5.GzipCompression; +import org.janelia.saalfeldlab.n5.KeyValueAccess; +import org.janelia.saalfeldlab.n5.N5FSTest; +import org.janelia.saalfeldlab.n5.N5KeyValueWriter; import org.janelia.saalfeldlab.n5.N5Writer; import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; import org.janelia.saalfeldlab.n5.codec.BytesCodec; @@ -13,17 +15,13 @@ import org.janelia.saalfeldlab.n5.codec.N5BlockCodec; import org.janelia.saalfeldlab.n5.codec.checksum.Crc32cChecksumCodec; import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; -import org.janelia.saalfeldlab.n5.universe.N5Factory; +import org.junit.After; import org.junit.Assert; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import java.net.MalformedURLException; import java.nio.ByteOrder; -import java.nio.file.FileSystems; -import java.nio.file.Path; -import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -31,43 +29,9 @@ import java.util.Map; @RunWith(Parameterized.class) -public class ShardDemos { +public class ShardTest { - public static void main(String[] args) throws MalformedURLException { - - final Path p = Paths.get("src/test/resources/shardExamples/test.zarr/mid_sharded/c/0/0"); - System.out.println(p); - - final String key = p.toString(); - final ShardedDatasetAttributes dsetAttrs = new ShardedDatasetAttributes( - new long[]{6, 4}, - new int[]{6, 4}, - new int[]{3, 2}, - DataType.UINT8, - new Codec[]{new N5BlockCodec()}, - new DeterministicSizeCodec[]{new BytesCodec(), new Crc32cChecksumCodec()}, - IndexLocation.END - ); - - final FileSystemKeyValueAccess kva = new FileSystemKeyValueAccess(FileSystems.getDefault()); - final VirtualShard shard = new VirtualShard<>(dsetAttrs, new long[]{0, 0}, kva, key); - - final DataBlock blk = shard.getBlock(0, 0); - - final byte[] data = blk.getData(); - System.out.println(Arrays.toString(data)); - - // fill the block with a weird value - Arrays.fill(data, (byte)123); - - // write the block - shard.writeBlock(blk); - - // re-read the block and check the data it contains - final DataBlock blkReread = shard.getBlock(0, 0); - final byte[] dataReRead = blkReread.getData(); - System.out.println(Arrays.toString(dataReRead)); - } + private static final N5FSTest tempN5Factory = new N5FSTest(); @Parameterized.Parameters(name = "IndexLocation({0}), Block ByteOrder({1}), Index ByteOrder({2})") public static Collection data() { @@ -94,26 +58,82 @@ public static Collection data() { @Parameterized.Parameter(2) public ByteOrder indexByteOrder; - @Test - public void writeReadBlockTest() { - - final N5Factory factory = new N5Factory(); - final GsonBuilder gsonBuilder = new GsonBuilder(); - gsonBuilder.setPrettyPrinting(); - factory.gsonBuilder(gsonBuilder); - factory.cacheAttributes(false); - - final N5Writer writer = factory.openWriter("src/test/resources/shardExamples/test.n5"); + @After + public void removeTempWriters() { + tempN5Factory.removeTempWriters(); + } - final ShardedDatasetAttributes datasetAttributes = new ShardedDatasetAttributes( - new long[]{8, 8}, - new int[]{4, 4}, - new int[]{2, 2}, + private ShardedDatasetAttributes getTestAttributes(long[] dimensions, int[] shardSize, int[] blockSize) { + return new ShardedDatasetAttributes( + dimensions, + shardSize, + blockSize, DataType.UINT8, - new Codec[]{new N5BlockCodec(dataByteOrder), new GzipCompression(4)}, + new Codec[]{new N5BlockCodec(dataByteOrder) , new GzipCompression(4)}, new DeterministicSizeCodec[]{new BytesCodec(indexByteOrder), new Crc32cChecksumCodec()}, indexLocation ); + } + + private ShardedDatasetAttributes getTestAttributes() { + return getTestAttributes(new long[]{8, 8}, new int[]{4, 4}, new int[]{2, 2}); + } + + @Test + public void writeReadBlocksTest() { + + final N5Writer writer = tempN5Factory.createTempN5Writer(); + final ShardedDatasetAttributes datasetAttributes = getTestAttributes( + new long[]{24,24}, + new int[]{8,8}, + new int[]{2,2} + ); + + writer.createDataset("shard", datasetAttributes); + + final int[] blockSize = datasetAttributes.getBlockSize(); + final int numElements = blockSize[0] * blockSize[1]; + + final byte[] data = new byte[numElements]; + for (int i = 0; i < data.length; i++) { + data[i] = (byte)((100) + (10) + i); + } + + writer.writeBlocks( + "shard", + datasetAttributes, + /* shard (0, 0) */ + new ByteArrayDataBlock(blockSize, new long[]{0,0}, data), + new ByteArrayDataBlock(blockSize, new long[]{0,1}, data), + new ByteArrayDataBlock(blockSize, new long[]{1,0}, data), + new ByteArrayDataBlock(blockSize, new long[]{1,1}, data), + + /* shard (1, 0) */ + new ByteArrayDataBlock(blockSize, new long[]{4,0}, data), + new ByteArrayDataBlock(blockSize, new long[]{5,0}, data), + + /* shard (2, 2) */ + new ByteArrayDataBlock(blockSize, new long[]{11,11}, data) + ); + + final KeyValueAccess kva = ((N5KeyValueWriter)writer).getKeyValueAccess(); + String p = writer.getURI().getPath(); + final String shard00 = kva.compose(writer.getURI(), "shard", "0", "0"); + kva.exists(shard00); + + final String shard10 = kva.compose(writer.getURI(), "shard", "0", "0"); + kva.exists(shard10); + + final String shard33 = kva.compose(writer.getURI(), "shard", "0", "0"); + kva.exists(shard33); + } + + @Test + public void writeReadBlockTest() { + + final N5Writer writer = tempN5Factory.createTempN5Writer(); + final ShardedDatasetAttributes datasetAttributes = getTestAttributes(); + writer.createDataset("shard", datasetAttributes); writer.deleteBlock("shard", 0, 0); @@ -126,21 +146,21 @@ public void writeReadBlockTest() { for (int idx1 = 1; idx1 >= 0; idx1--) { for (int idx2 = 1; idx2 >= 0; idx2--) { final long[] gridPosition = {idx1, idx2}; - final DataBlock dataBlock = (DataBlock)dataType.createDataBlock(blockSize, gridPosition, numElements); - byte[] data = dataBlock.getData(); + final DataBlock dataBlock = dataType.createDataBlock(blockSize, gridPosition, numElements); + byte[] data = (byte[])dataBlock.getData(); for (int i = 0; i < data.length; i++) { data[i] = (byte)((idx1 * 100) + (idx2 * 10) + i); } writer.writeBlock("shard", datasetAttributes, dataBlock); - final DataBlock block = (DataBlock)writer.readBlock("shard", datasetAttributes, gridPosition); - Assert.assertArrayEquals("Read from shard doesn't match", data, block.getData()); + final DataBlock block = writer.readBlock("shard", datasetAttributes, gridPosition); + Assert.assertArrayEquals("Read from shard doesn't match", data, (byte[])block.getData()); for (Map.Entry entry : writtenBlocks.entrySet()) { final long[] otherGridPosition = entry.getKey(); final byte[] otherData = entry.getValue(); - final DataBlock otherBlock = (DataBlock)writer.readBlock("shard", datasetAttributes, otherGridPosition); - Assert.assertArrayEquals("Read prior write from shard no loner matches", otherData, otherBlock.getData()); + final DataBlock otherBlock = writer.readBlock("shard", datasetAttributes, otherGridPosition); + Assert.assertArrayEquals("Read prior write from shard no loner matches", otherData, (byte[])otherBlock.getData()); } writtenBlocks.put(gridPosition, data); @@ -151,13 +171,7 @@ public void writeReadBlockTest() { @Test public void writeReadShardTest() { - final N5Factory factory = new N5Factory(); - final GsonBuilder gsonBuilder = new GsonBuilder(); - gsonBuilder.setPrettyPrinting(); - factory.gsonBuilder(gsonBuilder); - factory.cacheAttributes(false); - - final N5Writer writer = factory.openWriter("src/test/resources/shardExamples/test.n5"); + final N5Writer writer = tempN5Factory.createTempN5Writer(); final ShardedDatasetAttributes datasetAttributes = new ShardedDatasetAttributes( new long[]{4, 4}, @@ -182,13 +196,12 @@ public void writeReadShardTest() { for (int idx1 = 1; idx1 >= 0; idx1--) { for (int idx2 = 1; idx2 >= 0; idx2--) { final long[] gridPosition = {idx1, idx2}; - final DataBlock dataBlock = (DataBlock)dataType.createDataBlock(blockSize, gridPosition, numElements); - byte[] data = dataBlock.getData(); + final DataBlock dataBlock = dataType.createDataBlock(blockSize, gridPosition, numElements); + byte[] data = (byte[])dataBlock.getData(); for (int i = 0; i < data.length; i++) { data[i] = (byte)((idx1 * 100) + (idx2 * 10) + i); } - - shard.addBlock(dataBlock); + shard.addBlock((DataBlock)dataBlock); writtenBlocks.put(gridPosition, data); } } @@ -198,8 +211,8 @@ public void writeReadShardTest() { for (Map.Entry entry : writtenBlocks.entrySet()) { final long[] otherGridPosition = entry.getKey(); final byte[] otherData = entry.getValue(); - final DataBlock otherBlock = (DataBlock)writer.readBlock("wholeShard", datasetAttributes, otherGridPosition); - Assert.assertArrayEquals("Read prior write from shard no loner matches", otherData, otherBlock.getData()); + final DataBlock otherBlock = writer.readBlock("wholeShard", datasetAttributes, otherGridPosition); + Assert.assertArrayEquals("Read prior write from shard no loner matches", otherData, (byte[])otherBlock.getData()); } } From eb9fbc1b4203245599e8e532843d0b5c432960be Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Mon, 6 Jan 2025 16:29:41 -0500 Subject: [PATCH 087/124] feat/refactor: add BlockParameters and ShardParameters interfaces * will make zarr implementation less repetative --- .../saalfeldlab/n5/BlockParameters.java | 11 ++ .../saalfeldlab/n5/DatasetAttributes.java | 5 +- .../saalfeldlab/n5/GsonKeyValueN5Reader.java | 7 +- .../saalfeldlab/n5/GsonKeyValueN5Writer.java | 31 ++-- .../org/janelia/saalfeldlab/n5/N5Writer.java | 8 +- .../n5/ShardedDatasetAttributes.java | 142 ++---------------- .../saalfeldlab/n5/shard/AbstractShard.java | 12 +- .../saalfeldlab/n5/shard/InMemoryShard.java | 35 ++--- .../janelia/saalfeldlab/n5/shard/Shard.java | 14 +- .../n5/shard/ShardIndexBuilder.java | 4 +- .../saalfeldlab/n5/shard/ShardParameters.java | 140 +++++++++++++++++ .../saalfeldlab/n5/shard/ShardWriter.java | 10 +- .../saalfeldlab/n5/shard/VirtualShard.java | 6 +- .../saalfeldlab/n5/shard/ShardTest.java | 2 +- 14 files changed, 235 insertions(+), 192 deletions(-) create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/BlockParameters.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/shard/ShardParameters.java diff --git a/src/main/java/org/janelia/saalfeldlab/n5/BlockParameters.java b/src/main/java/org/janelia/saalfeldlab/n5/BlockParameters.java new file mode 100644 index 00000000..65a21497 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/BlockParameters.java @@ -0,0 +1,11 @@ +package org.janelia.saalfeldlab.n5; + +public interface BlockParameters { + + public long[] getDimensions(); + + public int getNumDimensions(); + + public int[] getBlockSize(); + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java index e67761c8..684339a2 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java @@ -37,7 +37,7 @@ * @author Stephan Saalfeld * */ -public class DatasetAttributes implements Serializable { +public class DatasetAttributes implements BlockParameters, Serializable { private static final long serialVersionUID = -4521467080388947553L; @@ -114,16 +114,19 @@ public DatasetAttributes( this(dimensions, blockSize, dataType, compression, null); } + @Override public long[] getDimensions() { return dimensions; } + @Override public int getNumDimensions() { return dimensions.length; } + @Override public int[] getBlockSize() { return blockSize; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java index 244bc1ca..8b0c49cb 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java @@ -31,6 +31,7 @@ import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; import org.janelia.saalfeldlab.n5.shard.Shard; +import org.janelia.saalfeldlab.n5.shard.ShardParameters; import org.janelia.saalfeldlab.n5.shard.VirtualShard; import com.google.gson.Gson; @@ -89,8 +90,8 @@ default JsonElement getAttributes(final String pathName) throws N5Exception { } @SuppressWarnings("rawtypes") - default Shard getShard(final String pathName, - final ShardedDatasetAttributes datasetAttributes, + default Shard getShard(final String pathName, + final A datasetAttributes, long... shardGridPosition) { final String path = absoluteDataBlockPath(N5URI.normalizeGroupPath(pathName), shardGridPosition); @@ -106,7 +107,7 @@ default DataBlock readBlock( if (datasetAttributes instanceof ShardedDatasetAttributes) { final ShardedDatasetAttributes shardedAttrs = (ShardedDatasetAttributes) datasetAttributes; final long[] shardPosition = shardedAttrs.getShardPositionForBlock(gridPosition); - final Shard shard = getShard(pathName, shardedAttrs, shardPosition); + final Shard shard = getShard(pathName, shardedAttrs, shardPosition); return shard.getBlock(gridPosition); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java index e3ceb759..e044c6bc 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java @@ -36,6 +36,7 @@ import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; import org.janelia.saalfeldlab.n5.shard.InMemoryShard; import org.janelia.saalfeldlab.n5.shard.Shard; +import org.janelia.saalfeldlab.n5.shard.ShardParameters; import com.google.gson.Gson; import com.google.gson.JsonElement; @@ -215,32 +216,34 @@ default boolean removeAttributes(final String pathName, final List attri return removed; } + @SuppressWarnings({ "rawtypes", "unchecked" }) @Override default void writeBlocks(final String datasetPath, final DatasetAttributes datasetAttributes, final DataBlock... dataBlocks) throws N5Exception { - if (datasetAttributes instanceof ShardedDatasetAttributes) { - final ShardedDatasetAttributes shardAttributes = (ShardedDatasetAttributes)datasetAttributes; + if (datasetAttributes instanceof ShardParameters) { /* Group by shard index */ - final HashMap> shardBlockMap = new HashMap<>(); + final HashMap> shardBlockMap = new HashMap<>(); + final ShardParameters shardAttributes = (ShardParameters)datasetAttributes; for (DataBlock dataBlock : dataBlocks) { final long[] shardPosition = shardAttributes.getShardPositionForBlock(dataBlock.getGridPosition()); final int shardHash = Arrays.hashCode(shardPosition); if (!shardBlockMap.containsKey(shardHash)) - shardBlockMap.put(shardHash, new InMemoryShard<>(shardAttributes, shardPosition)); - final InMemoryShard shard = shardBlockMap.get(shardHash); + shardBlockMap.put(shardHash, new InMemoryShard<>((DatasetAttributes & ShardParameters)shardAttributes, shardPosition)); + + final InMemoryShard shard = shardBlockMap.get(shardHash); shard.addBlock(dataBlock); } - for (InMemoryShard shard : shardBlockMap.values()) { - writeShard(datasetPath, shardAttributes, shard); + for (InMemoryShard shard : shardBlockMap.values()) { + writeShard(datasetPath, (DatasetAttributes & ShardParameters)shardAttributes, (Shard)shard); } + } else { /* Just write each block */ for (DataBlock dataBlock : dataBlocks) { writeBlock(datasetPath, datasetAttributes, dataBlock); } } - } @Override @@ -250,11 +253,11 @@ default void writeBlock( final DataBlock dataBlock) throws N5Exception { /* Delegate to shard for writing block? How to know what type of shard? */ - if (datasetAttributes instanceof ShardedDatasetAttributes) { - ShardedDatasetAttributes shardDatasetAttrs = (ShardedDatasetAttributes)datasetAttributes; + if (datasetAttributes instanceof ShardParameters) { + ShardParameters shardDatasetAttrs = (ShardParameters)datasetAttributes; final long[] shardPos = shardDatasetAttrs.getShardPositionForBlock(dataBlock.getGridPosition()); final String shardPath = absoluteShardPath(N5URI.normalizeGroupPath(path), shardPos); - final VirtualShard shard = new VirtualShard<>(shardDatasetAttrs, shardPos, getKeyValueAccess(), shardPath); + final VirtualShard shard = new VirtualShard<>((DatasetAttributes & ShardParameters)shardDatasetAttrs, shardPos, getKeyValueAccess(), shardPath); shard.writeBlock(dataBlock); return; } @@ -272,10 +275,10 @@ default void writeBlock( } @Override - default void writeShard( + default void writeShard( final String path, - final ShardedDatasetAttributes datasetAttributes, - final Shard shard) throws N5Exception { + final A datasetAttributes, + final Shard shard) throws N5Exception { final String shardPath = absoluteDataBlockPath(N5URI.normalizeGroupPath(path), shard.getGridPosition()); try (final LockedChannel lock = getKeyValueAccess().lockForWriting(shardPath)) { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java index 4883bd44..463d232a 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java @@ -36,6 +36,7 @@ import org.janelia.saalfeldlab.n5.codec.Codec; import org.janelia.saalfeldlab.n5.shard.Shard; +import org.janelia.saalfeldlab.n5.shard.ShardParameters; import org.janelia.saalfeldlab.n5.shard.ShardingCodec; import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; @@ -308,12 +309,13 @@ default void writeBlocks( * @param datasetAttributes the dataset attributes * @param shard the shard * @param the data block data type + * @param the attribute type * @throws N5Exception the exception */ - void writeShard( + void writeShard( final String datasetPath, - final ShardedDatasetAttributes datasetAttributes, - final Shard shard) throws N5Exception; + final A datasetAttributes, + final Shard shard) throws N5Exception; /** * Deletes the block at {@code gridPosition} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java index 8ee40baf..a4202529 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java @@ -7,10 +7,11 @@ import org.janelia.saalfeldlab.n5.codec.Codec.BytesCodec; import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; import org.janelia.saalfeldlab.n5.shard.ShardIndex; +import org.janelia.saalfeldlab.n5.shard.ShardParameters; import org.janelia.saalfeldlab.n5.shard.ShardingCodec; import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; -public class ShardedDatasetAttributes extends DatasetAttributes { +public class ShardedDatasetAttributes extends DatasetAttributes implements ShardParameters { private static final long serialVersionUID = -4559068841006651814L; @@ -75,10 +76,6 @@ public static boolean validateShardBlockSize(final int[] shardSize, final int[] return true; } - public ShardedDatasetAttributes getShardAttributes() { - return this; - } - public ShardingCodec getShardingCodec() { return shardingCodec; } @@ -99,130 +96,26 @@ protected Codec[] concatenateCodecs() { return new Codec[] { shardingCodec }; } - /** - * The size of the blocks in pixel units. - * - * @return the number of pixels per dimension for this shard. - */ - public int[] getShardSize() { - - return shardSize; - } - - /** - * Returns the number of shards per dimension for the dataset. - * - * @return the size of the shard grid of a dataset - */ - public int[] getShardBlockGridSize() { - - final int nd = getNumDimensions(); - final int[] shardBlockGridSize = new int[nd]; - final int[] blockSize = getBlockSize(); - for (int i = 0; i < nd; i++) - shardBlockGridSize[i] = (int)(Math.ceil((double)getDimensions()[i] / blockSize[i])); - - return shardBlockGridSize; - } - - /** - * Returns the number of blocks per dimension for a shard. - * - * @return the size of the block grid of a shard - */ - public int[] getBlocksPerShard() { - - final int nd = getNumDimensions(); - final int[] blocksPerShard = new int[nd]; - final int[] blockSize = getBlockSize(); - for (int i = 0; i < nd; i++) - blocksPerShard[i] = getShardSize()[i] / blockSize[i]; - - return blocksPerShard; - } - - /** - * Given a block's position relative to the array, returns the position of the shard containing that block relative to the shard grid. - * - * @param blockGridPosition - * position of a block relative to the array - * @return the position of the containing shard in the shard grid - */ - public long[] getShardPositionForBlock(final long... blockGridPosition) { - - final int[] blocksPerShard = getBlocksPerShard(); - final long[] shardGridPosition = new long[blockGridPosition.length]; - for (int i = 0; i < shardGridPosition.length; i++) { - shardGridPosition[i] = (int)Math.floor((double)blockGridPosition[i] / blocksPerShard[i]); - } - - return shardGridPosition; - } - - /** - * Returns the block at the given position relative to this shard, or null if this shard does not contain the given block. - * - * @return the block position - */ - public long[] getBlockPositionInShard(final long[] shardPosition, final long[] blockPosition) { - - // TODO check correctness - final long[] shardPos = getShardPositionForBlock(blockPosition); - if (!Arrays.equals(shardPosition, shardPos)) - return null; - - final int[] shardSize = getBlocksPerShard(); - final long[] blockShardPos = new long[shardSize.length]; - for (int i = 0; i < shardSize.length; i++) { - blockShardPos[i] = blockPosition[i] % shardSize[i]; - } + @Override + public IndexLocation getIndexLocation() { - return blockShardPos; + return getShardingCodec().getIndexLocation(); } - /** - * Given a block's position relative to a shard, returns its position in pixels - * relative to the image. - * - * @return the block position - */ - public long[] getBlockMinFromShardPosition(final long[] shardPosition, final long[] blockPosition) { - - // is this useful? - final int[] blockSize = getBlockSize(); - final int[] shardSize = getShardSize(); - final long[] blockImagePos = new long[shardSize.length]; - for (int i = 0; i < shardSize.length; i++) { - blockImagePos[i] = (shardPosition[i] * shardSize[i]) + (blockPosition[i] * blockSize[i]); - } - - return blockImagePos; + @Override + public ShardIndex createIndex() { + return new ShardIndex(getBlocksPerShard(), getIndexLocation(), getShardingCodec().getIndexCodecs()); } /** - * Given a block's position relative to a shard, returns its position relative - * to the image. + * The size of the blocks in pixel units. * - * @return the block position - */ - public long[] getBlockPositionFromShardPosition(final long[] shardPosition, final long[] blockPosition) { - - // is this useful? - final int[] shardBlockSize = getBlocksPerShard(); - final long[] blockImagePos = new long[shardSize.length]; - for (int i = 0; i < shardSize.length; i++) { - blockImagePos[i] = (shardPosition[i] * shardBlockSize[i]) + (blockPosition[i]); - } - - return blockImagePos; - } - - /** - * @return the number of blocks per shard + * @return the number of pixels per dimension for this shard. */ - public long getNumBlocks() { + @Override + public int[] getShardSize() { - return Arrays.stream(getBlocksPerShard()).reduce(1, (x, y) -> x * y); + return shardSize; } public static int[] getBlockSize(Codec[] codecs) { @@ -233,13 +126,4 @@ public static int[] getBlockSize(Codec[] codecs) { return null; } - - public IndexLocation getIndexLocation() { - - return getShardingCodec().getIndexLocation(); - } - - public ShardIndex createIndex() { - return new ShardIndex(getBlocksPerShard(), getIndexLocation(), getShardingCodec().getIndexCodecs()); - } } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java index 6aecd1a4..5ecc67c7 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java @@ -1,17 +1,16 @@ package org.janelia.saalfeldlab.n5.shard; -import org.janelia.saalfeldlab.n5.DataBlock; -import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; +import org.janelia.saalfeldlab.n5.DatasetAttributes; -public abstract class AbstractShard implements Shard { +public abstract class AbstractShard implements Shard { - protected final ShardedDatasetAttributes datasetAttributes; + protected final A datasetAttributes; protected ShardIndex index; private final long[] gridPosition; - public AbstractShard(final ShardedDatasetAttributes datasetAttributes, final long[] gridPosition, + public AbstractShard(final A datasetAttributes, final long[] gridPosition, final ShardIndex index) { this.datasetAttributes = datasetAttributes; @@ -20,7 +19,7 @@ public AbstractShard(final ShardedDatasetAttributes datasetAttributes, final lon } @Override - public ShardedDatasetAttributes getDatasetAttributes() { + public A getDatasetAttributes() { return datasetAttributes; } @@ -49,5 +48,4 @@ public ShardIndex getIndex() { return index; } - } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java index 7e699a1b..36a112e2 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java @@ -11,11 +11,11 @@ import org.apache.commons.io.output.CountingOutputStream; import org.apache.commons.io.output.ProxyOutputStream; import org.janelia.saalfeldlab.n5.DataBlock; +import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.DefaultBlockWriter; -import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; -public class InMemoryShard extends AbstractShard { +public class InMemoryShard extends AbstractShard { /* Map of a hash of the DataBlocks `gridPosition` to the block */ private final HashMap> blocks; @@ -27,14 +27,14 @@ public class InMemoryShard extends AbstractShard { * (later) */ - public InMemoryShard(final ShardedDatasetAttributes datasetAttributes, final long[] shardPosition) { + public InMemoryShard(final A datasetAttributes, final long[] shardPosition) { this( datasetAttributes, shardPosition, null); indexBuilder = new ShardIndexBuilder(this); indexBuilder.indexLocation(datasetAttributes.getIndexLocation()); } - public InMemoryShard(final ShardedDatasetAttributes datasetAttributes, final long[] gridPosition, + public InMemoryShard(final A datasetAttributes, final long[] gridPosition, ShardIndex index) { super(datasetAttributes, gridPosition, index); @@ -97,28 +97,29 @@ public void write(final OutputStream out) throws IOException { writeShardStart(out, this); } - public static void writeShard(final OutputStream out, final Shard shard) throws IOException { + public static void writeShard(final OutputStream out, final Shard shard) throws IOException { fromShard(shard).write(out); } - public static InMemoryShard fromShard(Shard shard) { + public static InMemoryShard fromShard(Shard shard) { if (shard instanceof InMemoryShard) - return (InMemoryShard) shard; + return (InMemoryShard) shard; - final InMemoryShard inMemoryShard = new InMemoryShard(shard.getDatasetAttributes(), + final InMemoryShard inMemoryShard = new InMemoryShard( + shard.getDatasetAttributes(), shard.getGridPosition()); shard.forEach(blk -> inMemoryShard.addBlock(blk)); return inMemoryShard; } - protected static void writeShardEndStream( + protected static void writeShardEndStream( final OutputStream out, - InMemoryShard shard ) throws IOException { + InMemoryShard shard ) throws IOException { - final ShardedDatasetAttributes datasetAttributes = shard.getDatasetAttributes(); + final A datasetAttributes = shard.getDatasetAttributes(); final ShardIndexBuilder indexBuilder = new ShardIndexBuilder(shard); indexBuilder.indexLocation(IndexLocation.END); @@ -145,11 +146,11 @@ protected static void writeShardEndStream( ShardIndex.write(indexBuilder.build(), out); } - protected static void writeShardEnd( + protected static void writeShardEnd( final OutputStream out, - InMemoryShard shard ) throws IOException { + InMemoryShard shard ) throws IOException { - final ShardedDatasetAttributes datasetAttributes = shard.getDatasetAttributes(); + final A datasetAttributes = shard.getDatasetAttributes(); final ShardIndexBuilder indexBuilder = new ShardIndexBuilder(shard); indexBuilder.indexLocation(IndexLocation.END); @@ -166,11 +167,11 @@ protected static void writeShardEnd( ShardIndex.write(indexBuilder.build(), out); } - protected static void writeShardStart( + protected static void writeShardStart( final OutputStream out, - InMemoryShard shard ) throws IOException { + InMemoryShard shard ) throws IOException { - final ShardedDatasetAttributes datasetAttributes = shard.getDatasetAttributes(); + final A datasetAttributes = shard.getDatasetAttributes(); final ShardIndexBuilder indexBuilder = new ShardIndexBuilder(shard); indexBuilder.indexLocation(IndexLocation.START); indexBuilder.setCodecs(datasetAttributes.getShardingCodec().getIndexCodecs()); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java index eef1c960..3e672ba5 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java @@ -4,10 +4,10 @@ import java.util.Iterator; import org.janelia.saalfeldlab.n5.DataBlock; -import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; +import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.util.GridIterator; -public interface Shard extends Iterable> { +public interface Shard extends Iterable> { long EMPTY_INDEX_NBYTES = 0xFFFFFFFFFFFFFFFFL; @@ -30,7 +30,7 @@ default int[] getBlockGridSize() { return blockGridSize; } - public ShardedDatasetAttributes getDatasetAttributes(); + public A getDatasetAttributes(); /** * Returns the size of shards in pixel units. @@ -113,12 +113,12 @@ default DataBlock[] getAllBlocks(long... position) { public ShardIndex getIndex(); - public static Shard createEmpty(final ShardedDatasetAttributes attributes, long... shardPosition) { + public static Shard createEmpty(final A attributes, long... shardPosition) { final long[] emptyIndex = new long[(int)(2 * attributes.getNumBlocks())]; Arrays.fill(emptyIndex, EMPTY_INDEX_NBYTES); final ShardIndex shardIndex = new ShardIndex(attributes.getBlocksPerShard(), emptyIndex, ShardingCodec.IndexLocation.END); - return new InMemoryShard(attributes, shardPosition, shardIndex); + return new InMemoryShard(attributes, shardPosition, shardIndex); } public static long flatIndex(long[] gridPosition, int[] gridSize) { @@ -135,9 +135,9 @@ public static long flatIndex(long[] gridPosition, int[] gridSize) { public static class DataBlockIterator implements Iterator> { private final GridIterator it; - private final Shard shard; + private final Shard shard; - public DataBlockIterator(final Shard shard) { + public DataBlockIterator(final Shard shard) { this.shard = shard; it = new GridIterator(shard.getBlockGridSize()); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java index cf16f719..f6f81dab 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java @@ -7,7 +7,7 @@ public class ShardIndexBuilder { - private final Shard shard; + private final Shard shard; private ShardIndex temporaryIndex; @@ -17,7 +17,7 @@ public class ShardIndexBuilder { private long currentOffset = 0; - public ShardIndexBuilder(Shard shard) { + public ShardIndexBuilder(Shard shard) { this.shard = shard; this.temporaryIndex = new ShardIndex(shard.getBlockGridSize(), location); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardParameters.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardParameters.java new file mode 100644 index 00000000..055666b4 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardParameters.java @@ -0,0 +1,140 @@ +package org.janelia.saalfeldlab.n5.shard; + +import java.util.Arrays; + +import org.janelia.saalfeldlab.n5.BlockParameters; +import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; + +public interface ShardParameters extends BlockParameters { + + public ShardingCodec getShardingCodec(); + + /** + * The size of the blocks in pixel units. + * + * @return the number of pixels per dimension for this shard. + */ + public int[] getShardSize(); + + public IndexLocation getIndexLocation(); + + public ShardIndex createIndex(); + + /** + * Returns the number of blocks per dimension for a shard. + * + * @return the size of the block grid of a shard + */ + default int[] getBlocksPerShard() { + + final int nd = getNumDimensions(); + final int[] blocksPerShard = new int[nd]; + final int[] blockSize = getBlockSize(); + for (int i = 0; i < nd; i++) + blocksPerShard[i] = getShardSize()[i] / blockSize[i]; + + return blocksPerShard; + } + + /** + * Given a block's position relative to the array, returns the position of the shard containing that block relative to the shard grid. + * + * @param blockGridPosition + * position of a block relative to the array + * @return the position of the containing shard in the shard grid + */ + default long[] getShardPositionForBlock(final long... blockGridPosition) { + + final int[] blocksPerShard = getBlocksPerShard(); + final long[] shardGridPosition = new long[blockGridPosition.length]; + for (int i = 0; i < shardGridPosition.length; i++) { + shardGridPosition[i] = (int)Math.floor((double)blockGridPosition[i] / blocksPerShard[i]); + } + + return shardGridPosition; + } + + /** + * Returns the number of shards per dimension for the dataset. + * + * @return the size of the shard grid of a dataset + */ + default int[] getShardBlockGridSize() { + + final int nd = getNumDimensions(); + final int[] shardBlockGridSize = new int[nd]; + final int[] blockSize = getBlockSize(); + for (int i = 0; i < nd; i++) + shardBlockGridSize[i] = (int)(Math.ceil((double)getDimensions()[i] / blockSize[i])); + + return shardBlockGridSize; + } + + /** + * Returns the block at the given position relative to this shard, or null if this shard does not contain the given block. + * + * @return the block position + */ + default long[] getBlockPositionInShard(final long[] shardPosition, final long[] blockPosition) { + + // TODO check correctness + final long[] shardPos = getShardPositionForBlock(blockPosition); + if (!Arrays.equals(shardPosition, shardPos)) + return null; + + final int[] shardSize = getBlocksPerShard(); + final long[] blockShardPos = new long[shardSize.length]; + for (int i = 0; i < shardSize.length; i++) { + blockShardPos[i] = blockPosition[i] % shardSize[i]; + } + + return blockShardPos; + } + + + /** + * Given a block's position relative to a shard, returns its position in pixels + * relative to the image. + * + * @return the block position + */ + default long[] getBlockMinFromShardPosition(final long[] shardPosition, final long[] blockPosition) { + + // is this useful? + final int[] blockSize = getBlockSize(); + final int[] shardSize = getShardSize(); + final long[] blockImagePos = new long[shardSize.length]; + for (int i = 0; i < shardSize.length; i++) { + blockImagePos[i] = (shardPosition[i] * shardSize[i]) + (blockPosition[i] * blockSize[i]); + } + + return blockImagePos; + } + + /** + * Given a block's position relative to a shard, returns its position relative + * to the image. + * + * @return the block position + */ + default long[] getBlockPositionFromShardPosition(final long[] shardPosition, final long[] blockPosition) { + + // is this useful? + final int[] shardBlockSize = getBlocksPerShard(); + final long[] blockImagePos = new long[getNumDimensions()]; + for (int i = 0; i < getNumDimensions(); i++) { + blockImagePos[i] = (shardPosition[i] * shardBlockSize[i]) + (blockPosition[i]); + } + + return blockImagePos; + } + + /** + * @return the number of blocks per shard + */ + default long getNumBlocks() { + + return Arrays.stream(getBlocksPerShard()).reduce(1, (x, y) -> x * y); + } + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java index 792019fd..b8596c8d 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java @@ -1,8 +1,8 @@ package org.janelia.saalfeldlab.n5.shard; import org.janelia.saalfeldlab.n5.DataBlock; +import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.DefaultBlockWriter; -import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; import java.io.ByteArrayOutputStream; import java.io.DataOutputStream; @@ -13,13 +13,13 @@ import java.util.Arrays; import java.util.List; -public class ShardWriter { +public class ShardWriter { private static final int BYTES_PER_LONG = 8; private final List> blocks; - private ShardedDatasetAttributes attributes; + private A attributes; private ByteBuffer blockSizes; @@ -29,7 +29,7 @@ public class ShardWriter { private List blockBytes; - public ShardWriter(final ShardedDatasetAttributes datasetAttributes) { + public ShardWriter(final A datasetAttributes) { blocks = new ArrayList<>(); attributes = datasetAttributes; @@ -48,7 +48,7 @@ public void addBlock(final DataBlock block) { blocks.add(block); } - public void write(final Shard shard, final OutputStream out) throws IOException { + public void write(final Shard shard, final OutputStream out) throws IOException { attributes = shard.getDatasetAttributes(); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java index 7765d42b..56c92871 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java @@ -6,20 +6,20 @@ import java.io.UncheckedIOException; import org.janelia.saalfeldlab.n5.DataBlock; +import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.DefaultBlockReader; import org.janelia.saalfeldlab.n5.DefaultBlockWriter; import org.janelia.saalfeldlab.n5.KeyValueAccess; import org.janelia.saalfeldlab.n5.LockedChannel; import org.janelia.saalfeldlab.n5.N5Exception; import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; -import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; -public class VirtualShard extends AbstractShard { +public class VirtualShard extends AbstractShard { final private KeyValueAccess keyValueAccess; final private String path; - public VirtualShard(final ShardedDatasetAttributes datasetAttributes, long[] gridPosition, + public VirtualShard(final A datasetAttributes, long[] gridPosition, final KeyValueAccess keyValueAccess, final String path) { super(datasetAttributes, gridPosition, null); diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java index 2a39e161..8e12ac07 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java @@ -191,7 +191,7 @@ public void writeReadShardTest() { final HashMap writtenBlocks = new HashMap<>(); - final InMemoryShard shard = new InMemoryShard(datasetAttributes, new long[]{0, 0}); + final InMemoryShard shard = new InMemoryShard<>(datasetAttributes, new long[]{0, 0}); for (int idx1 = 1; idx1 >= 0; idx1--) { for (int idx2 = 1; idx2 >= 0; idx2--) { From b0092d315567f9e477203376f599e04a4b3e2dcc Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Tue, 7 Jan 2025 09:53:31 -0500 Subject: [PATCH 088/124] fix: null compression should result in empty byteCodecs list --- src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java index 684339a2..eca190d9 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java @@ -73,7 +73,7 @@ public DatasetAttributes( this.blockSize = blockSize; this.dataType = dataType; if (codecs == null && !(compression instanceof RawCompression)) { - byteCodecs = new BytesCodec[]{compression}; + byteCodecs = new BytesCodec[]{}; arrayCodec = new N5BlockCodec(); } else if (codecs == null || codecs.length == 0) { byteCodecs = new BytesCodec[]{}; From 458295cd7e1bc7b783d3d91bf057e8aca3f6a9b7 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Tue, 7 Jan 2025 09:54:08 -0500 Subject: [PATCH 089/124] refactor: createIndex now a default method in ShardParameters --- .../org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java | 5 ----- .../org/janelia/saalfeldlab/n5/shard/ShardParameters.java | 4 +++- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java index a4202529..04a8a037 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java @@ -102,11 +102,6 @@ public IndexLocation getIndexLocation() { return getShardingCodec().getIndexLocation(); } - @Override - public ShardIndex createIndex() { - return new ShardIndex(getBlocksPerShard(), getIndexLocation(), getShardingCodec().getIndexCodecs()); - } - /** * The size of the blocks in pixel units. * diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardParameters.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardParameters.java index 055666b4..fad79130 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardParameters.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardParameters.java @@ -18,7 +18,9 @@ public interface ShardParameters extends BlockParameters { public IndexLocation getIndexLocation(); - public ShardIndex createIndex(); + default ShardIndex createIndex() { + return new ShardIndex(getBlocksPerShard(), getIndexLocation(), getShardingCodec().getIndexCodecs()); + } /** * Returns the number of blocks per dimension for a shard. From da9b9eddb19549b45881736e1c5af1f530350a93 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Tue, 7 Jan 2025 09:54:27 -0500 Subject: [PATCH 090/124] feat: add getByteOrder method for ArrayCodecs --- .../java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java | 4 ++++ .../java/org/janelia/saalfeldlab/n5/codec/N5BlockCodec.java | 3 +++ 2 files changed, 7 insertions(+) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java index 76b015cf..66e1a8b2 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java @@ -44,6 +44,10 @@ public BytesCodec(final ByteOrder byteOrder) { this.byteOrder = byteOrder; } + public ByteOrder getByteOrder() { + return byteOrder; + } + @Override public DataBlockInputStream decode(final DatasetAttributes attributes, final long[] gridPosition, InputStream in) throws IOException { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BlockCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BlockCodec.java index 82f118bb..7232d9ac 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BlockCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BlockCodec.java @@ -37,6 +37,9 @@ public N5BlockCodec(final ByteOrder byteOrder) { this.byteOrder = byteOrder; } + public ByteOrder getByteOrder() { + return byteOrder; + } @Override public DataBlockInputStream decode(final DatasetAttributes attributes, final long[] gridPosition, InputStream in) throws IOException { From 52f762eeb9bbef3f57f95f1545c363cd22ca0d90 Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Wed, 8 Jan 2025 10:05:48 -0500 Subject: [PATCH 091/124] feat: writeBlocks respects existing blocks in a given shard if not overwriting those blocks explicitly --- .../saalfeldlab/n5/GsonKeyValueN5Writer.java | 9 ++- .../saalfeldlab/n5/shard/InMemoryShard.java | 1 + .../janelia/saalfeldlab/n5/shard/Shard.java | 22 ++++++- .../saalfeldlab/n5/shard/ShardIndex.java | 33 +++++++--- .../saalfeldlab/n5/shard/ShardTest.java | 66 ++++++++++++++++--- 5 files changed, 109 insertions(+), 22 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java index e3ceb759..faa7cf53 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java @@ -232,6 +232,14 @@ default boolean removeAttributes(final String pathName, final List attri } for (InMemoryShard shard : shardBlockMap.values()) { + + /* Add existing blocks before overwriting shard */ + final Shard currentShard = (Shard)getShard(datasetPath, shardAttributes, shard.getGridPosition()); + for (DataBlock currentBlock : currentShard.getBlocks()) { + if (shard.getBlock(currentBlock.getGridPosition()) == null) + shard.addBlock(currentBlock); + } + writeShard(datasetPath, shardAttributes, shard); } } else { @@ -240,7 +248,6 @@ default boolean removeAttributes(final String pathName, final List attri writeBlock(datasetPath, datasetAttributes, dataBlock); } } - } @Override diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java index 7e699a1b..4ac202bf 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java @@ -67,6 +67,7 @@ public int numBlocks() { return blocks.size(); } + @Override public List> getBlocks() { return new ArrayList<>(blocks.values()); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java index eef1c960..5a88f370 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java @@ -1,7 +1,9 @@ package org.janelia.saalfeldlab.n5.shard; +import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; +import java.util.List; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; @@ -106,9 +108,23 @@ default Iterator> iterator() { return new DataBlockIterator(this); } - default DataBlock[] getAllBlocks(long... position) { - //TODO Caleb: Do we want this? - return null; + default List> getBlocks() { + + final ShardIndex shardIndex = getIndex(); + final ShardedDatasetAttributes attrs = getDatasetAttributes(); + final List> blocks = new ArrayList<>(); + for (long blockIdx = 0; blockIdx < attrs.getNumBlocks(); blockIdx++) { + int shardOffset = (int)blockIdx * 2; + final long[] index = shardIndex.getData(); + if (index[shardOffset] == Shard.EMPTY_INDEX_NBYTES || index[shardOffset+1] == EMPTY_INDEX_NBYTES) + continue; + + final long[] blockPosInShard = ShardIndex.shardPositionFromIndexOffset(shardOffset, attrs.getBlocksPerShard()); + final long[] blockPosInImg = attrs.getBlockPositionFromShardPosition(getGridPosition(), blockPosInShard); + blocks.add(getBlock(blockPosInImg)); + } + + return blocks; } public ShardIndex getIndex(); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java index 561e9ea7..44bca3c5 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java @@ -236,18 +236,31 @@ private static int[] prepend(final int value, final int[] array) { return indexBlockSize; } - public static void main(String[] args) { - - final ShardIndex ib = new ShardIndex(new int[]{2, 2}); + /** + * Calculate the block position in the shard grid for a given index offset. + * + * @param offset the offset into the index + * @param blocksPerShard the dimensions of the shard in blocks + * @return the relative position in the shard grid + */ + public static long[] shardPositionFromIndexOffset(int offset, int[] blocksPerShard) { + + int maxOffset = 1; + for (int i = 0; i < blocksPerShard.length; i++) { + maxOffset *= blocksPerShard[i]; + } + if (offset >= maxOffset*2) { + throw new IllegalArgumentException("Shard Index Offset " + offset + " is out of bounds for shard dimensions " + Arrays.toString(blocksPerShard)); + } - ib.set(8, 9, new long[]{1, 1}); + final long[] position = new long[blocksPerShard.length]; + int remainder = offset / 2; - // System.out.println(ib.getIndex(0, 0)); - // System.out.println(ib.getIndex(1, 0)); - // System.out.println(ib.getIndex(0, 1)); - // System.out.println(ib.getIndex(1, 1)); + for (int dim = blocksPerShard.length - 1; dim >= 0; dim--) { // Iterate backwards + position[dim] = remainder % blocksPerShard[dim]; // Calculate position for this dimension + remainder /= blocksPerShard[dim]; // Update the remainder + } - System.out.println("done"); + return position; } - } diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java index 2a39e161..c5ab89cb 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java @@ -69,7 +69,7 @@ private ShardedDatasetAttributes getTestAttributes(long[] dimensions, int[] shar shardSize, blockSize, DataType.UINT8, - new Codec[]{new N5BlockCodec(dataByteOrder) , new GzipCompression(4)}, + new Codec[]{new N5BlockCodec(dataByteOrder)}, // , new GzipCompression(4)}, new DeterministicSizeCodec[]{new BytesCodec(indexByteOrder), new Crc32cChecksumCodec()}, indexLocation ); @@ -99,6 +99,7 @@ public void writeReadBlocksTest() { data[i] = (byte)((100) + (10) + i); } + writer.writeBlocks( "shard", datasetAttributes, @@ -117,15 +118,64 @@ public void writeReadBlocksTest() { ); final KeyValueAccess kva = ((N5KeyValueWriter)writer).getKeyValueAccess(); - String p = writer.getURI().getPath(); - final String shard00 = kva.compose(writer.getURI(), "shard", "0", "0"); - kva.exists(shard00); - final String shard10 = kva.compose(writer.getURI(), "shard", "0", "0"); - kva.exists(shard10); + final String[][] keys = new String[][]{ + {"shard", "0", "0"}, + {"shard", "1", "0"}, + {"shard", "2", "2"} + }; + for (String[] key : keys) { + final String shard = kva.compose(writer.getURI(), key); + Assert.assertTrue("Shard at" + Arrays.toString(key) + "Does not exist", kva.exists(shard)); + } + + final long[][] blockIndices = new long[][]{ {0,0}, {0,1}, {1,0}, {1,1}, {4,0}, {5,0}, {11,11}}; + for (long[] blockIndex : blockIndices) { + final DataBlock block = writer.readBlock("shard", datasetAttributes, blockIndex); + Assert.assertArrayEquals("Read from shard doesn't match", data, (byte[])block.getData()); + } + + final byte[] data2 = new byte[numElements]; + for (int i = 0; i < data2.length; i++) { + data2[i] = (byte)(10 + i); + } + writer.writeBlocks( + "shard", + datasetAttributes, + /* shard (0, 0) */ + new ByteArrayDataBlock(blockSize, new long[]{0,0}, data2), + new ByteArrayDataBlock(blockSize, new long[]{1,1}, data2), - final String shard33 = kva.compose(writer.getURI(), "shard", "0", "0"); - kva.exists(shard33); + /* shard (0, 1) */ + new ByteArrayDataBlock(blockSize, new long[]{0,4}, data2), + new ByteArrayDataBlock(blockSize, new long[]{0,5}, data2), + + /* shard (2, 2) */ + new ByteArrayDataBlock(blockSize, new long[]{10,10}, data2) + ); + + final String[][] keys2 = new String[][]{ + {"shard", "0", "0"}, + {"shard", "1", "0"}, + {"shard", "0", "1"}, + {"shard", "2", "2"} + }; + for (String[] key : keys2) { + final String shard = kva.compose(writer.getURI(), key); + Assert.assertTrue("Shard at" + Arrays.toString(key) + "Does not exist", kva.exists(shard)); + } + + final long[][] oldBlockIndices = new long[][]{{0,1}, {1,0}, {4,0}, {5,0}, {11,11}}; + for (long[] blockIndex : oldBlockIndices) { + final DataBlock block = writer.readBlock("shard", datasetAttributes, blockIndex); + Assert.assertArrayEquals("Read from shard doesn't match", data, (byte[])block.getData()); + } + + final long[][] newBlockIndices = new long[][]{{0,0}, {1,1}, {0,4}, {0,5}, {10,10}}; + for (long[] blockIndex : newBlockIndices) { + final DataBlock block = writer.readBlock("shard", datasetAttributes, blockIndex); + Assert.assertArrayEquals("Read from shard doesn't match", data2, (byte[])block.getData()); + } } @Test From d4dcbe8ae72f23374a9b79de848a82b3ab01226f Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Wed, 8 Jan 2025 11:57:25 -0500 Subject: [PATCH 092/124] refactor: remove generic from ShardParameter --- .../saalfeldlab/n5/GsonKeyValueN5Reader.java | 4 ++-- .../saalfeldlab/n5/GsonKeyValueN5Writer.java | 10 ++++----- .../org/janelia/saalfeldlab/n5/N5Writer.java | 2 +- .../saalfeldlab/n5/shard/AbstractShard.java | 13 ++++++------ .../saalfeldlab/n5/shard/InMemoryShard.java | 21 ++++++++++--------- .../janelia/saalfeldlab/n5/shard/Shard.java | 13 ++++++------ .../n5/shard/ShardIndexBuilder.java | 4 ++-- .../saalfeldlab/n5/shard/ShardWriter.java | 19 ++++++++++------- .../saalfeldlab/n5/shard/VirtualShard.java | 9 ++++---- .../saalfeldlab/n5/shard/ShardTest.java | 2 +- 10 files changed, 52 insertions(+), 45 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java index 8b0c49cb..d8165146 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java @@ -90,7 +90,7 @@ default JsonElement getAttributes(final String pathName) throws N5Exception { } @SuppressWarnings("rawtypes") - default Shard getShard(final String pathName, + default Shard getShard(final String pathName, final A datasetAttributes, long... shardGridPosition) { @@ -107,7 +107,7 @@ default DataBlock readBlock( if (datasetAttributes instanceof ShardedDatasetAttributes) { final ShardedDatasetAttributes shardedAttrs = (ShardedDatasetAttributes) datasetAttributes; final long[] shardPosition = shardedAttrs.getShardPositionForBlock(gridPosition); - final Shard shard = getShard(pathName, shardedAttrs, shardPosition); + final Shard shard = getShard(pathName, shardedAttrs, shardPosition); return shard.getBlock(gridPosition); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java index e044c6bc..2ece8d05 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java @@ -221,7 +221,7 @@ default boolean removeAttributes(final String pathName, final List attri if (datasetAttributes instanceof ShardParameters) { /* Group by shard index */ - final HashMap> shardBlockMap = new HashMap<>(); + final HashMap> shardBlockMap = new HashMap<>(); final ShardParameters shardAttributes = (ShardParameters)datasetAttributes; for (DataBlock dataBlock : dataBlocks) { @@ -230,11 +230,11 @@ default boolean removeAttributes(final String pathName, final List attri if (!shardBlockMap.containsKey(shardHash)) shardBlockMap.put(shardHash, new InMemoryShard<>((DatasetAttributes & ShardParameters)shardAttributes, shardPosition)); - final InMemoryShard shard = shardBlockMap.get(shardHash); + final InMemoryShard shard = shardBlockMap.get(shardHash); shard.addBlock(dataBlock); } - for (InMemoryShard shard : shardBlockMap.values()) { + for (InMemoryShard shard : shardBlockMap.values()) { writeShard(datasetPath, (DatasetAttributes & ShardParameters)shardAttributes, (Shard)shard); } @@ -257,7 +257,7 @@ default void writeBlock( ShardParameters shardDatasetAttrs = (ShardParameters)datasetAttributes; final long[] shardPos = shardDatasetAttrs.getShardPositionForBlock(dataBlock.getGridPosition()); final String shardPath = absoluteShardPath(N5URI.normalizeGroupPath(path), shardPos); - final VirtualShard shard = new VirtualShard<>((DatasetAttributes & ShardParameters)shardDatasetAttrs, shardPos, getKeyValueAccess(), shardPath); + final VirtualShard shard = new VirtualShard<>((DatasetAttributes & ShardParameters)shardDatasetAttrs, shardPos, getKeyValueAccess(), shardPath); shard.writeBlock(dataBlock); return; } @@ -278,7 +278,7 @@ default void writeBlock( default void writeShard( final String path, final A datasetAttributes, - final Shard shard) throws N5Exception { + final Shard shard) throws N5Exception { final String shardPath = absoluteDataBlockPath(N5URI.normalizeGroupPath(path), shard.getGridPosition()); try (final LockedChannel lock = getKeyValueAccess().lockForWriting(shardPath)) { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java index 463d232a..74df1da3 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java @@ -315,7 +315,7 @@ default void writeBlocks( void writeShard( final String datasetPath, final A datasetAttributes, - final Shard shard) throws N5Exception; + final Shard shard) throws N5Exception; /** * Deletes the block at {@code gridPosition} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java index 5ecc67c7..2cdb392f 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java @@ -2,15 +2,16 @@ import org.janelia.saalfeldlab.n5.DatasetAttributes; -public abstract class AbstractShard implements Shard { - protected final A datasetAttributes; +public abstract class AbstractShard implements Shard { + + protected final DatasetAttributes datasetAttributes; protected ShardIndex index; private final long[] gridPosition; - public AbstractShard(final A datasetAttributes, final long[] gridPosition, + public AbstractShard(final A datasetAttributes, final long[] gridPosition, final ShardIndex index) { this.datasetAttributes = datasetAttributes; @@ -19,15 +20,15 @@ public AbstractShard(final A datasetAttributes, final long[] gridPosition, } @Override - public A getDatasetAttributes() { + public A getDatasetAttributes() { - return datasetAttributes; + return (A)datasetAttributes; } @Override public int[] getSize() { - return datasetAttributes.getShardSize(); + return getDatasetAttributes().getShardSize(); } @Override diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java index 36a112e2..164f5116 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java @@ -10,12 +10,13 @@ import org.apache.commons.io.output.ByteArrayOutputStream; import org.apache.commons.io.output.CountingOutputStream; import org.apache.commons.io.output.ProxyOutputStream; +import org.checkerframework.checker.units.qual.A; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.DefaultBlockWriter; import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; -public class InMemoryShard extends AbstractShard { +public class InMemoryShard extends AbstractShard { /* Map of a hash of the DataBlocks `gridPosition` to the block */ private final HashMap> blocks; @@ -27,14 +28,14 @@ public class InMemoryShard exte * (later) */ - public InMemoryShard(final A datasetAttributes, final long[] shardPosition) { + public InMemoryShard(final A datasetAttributes, final long[] shardPosition) { this( datasetAttributes, shardPosition, null); indexBuilder = new ShardIndexBuilder(this); indexBuilder.indexLocation(datasetAttributes.getIndexLocation()); } - public InMemoryShard(final A datasetAttributes, final long[] gridPosition, + public InMemoryShard(final A datasetAttributes, final long[] gridPosition, ShardIndex index) { super(datasetAttributes, gridPosition, index); @@ -97,17 +98,17 @@ public void write(final OutputStream out) throws IOException { writeShardStart(out, this); } - public static void writeShard(final OutputStream out, final Shard shard) throws IOException { + public static void writeShard(final OutputStream out, final Shard shard) throws IOException { fromShard(shard).write(out); } - public static InMemoryShard fromShard(Shard shard) { + public static InMemoryShard fromShard(Shard shard) { if (shard instanceof InMemoryShard) - return (InMemoryShard) shard; + return (InMemoryShard) shard; - final InMemoryShard inMemoryShard = new InMemoryShard( + final InMemoryShard inMemoryShard = new InMemoryShard( shard.getDatasetAttributes(), shard.getGridPosition()); @@ -117,7 +118,7 @@ public static InMemoryShard void writeShardEndStream( final OutputStream out, - InMemoryShard shard ) throws IOException { + InMemoryShard shard ) throws IOException { final A datasetAttributes = shard.getDatasetAttributes(); @@ -148,7 +149,7 @@ protected static void writeSh protected static void writeShardEnd( final OutputStream out, - InMemoryShard shard ) throws IOException { + InMemoryShard shard ) throws IOException { final A datasetAttributes = shard.getDatasetAttributes(); @@ -169,7 +170,7 @@ protected static void writeSh protected static void writeShardStart( final OutputStream out, - InMemoryShard shard ) throws IOException { + InMemoryShard shard ) throws IOException { final A datasetAttributes = shard.getDatasetAttributes(); final ShardIndexBuilder indexBuilder = new ShardIndexBuilder(shard); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java index 3e672ba5..5407154a 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java @@ -3,11 +3,12 @@ import java.util.Arrays; import java.util.Iterator; +import org.checkerframework.checker.units.qual.A; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.util.GridIterator; -public interface Shard extends Iterable> { +public interface Shard extends Iterable> { long EMPTY_INDEX_NBYTES = 0xFFFFFFFFFFFFFFFFL; @@ -30,7 +31,7 @@ default int[] getBlockGridSize() { return blockGridSize; } - public A getDatasetAttributes(); + public A getDatasetAttributes(); /** * Returns the size of shards in pixel units. @@ -113,12 +114,12 @@ default DataBlock[] getAllBlocks(long... position) { public ShardIndex getIndex(); - public static Shard createEmpty(final A attributes, long... shardPosition) { + public static Shard createEmpty(final A attributes, long... shardPosition) { final long[] emptyIndex = new long[(int)(2 * attributes.getNumBlocks())]; Arrays.fill(emptyIndex, EMPTY_INDEX_NBYTES); final ShardIndex shardIndex = new ShardIndex(attributes.getBlocksPerShard(), emptyIndex, ShardingCodec.IndexLocation.END); - return new InMemoryShard(attributes, shardPosition, shardIndex); + return new InMemoryShard(attributes, shardPosition, shardIndex); } public static long flatIndex(long[] gridPosition, int[] gridSize) { @@ -135,9 +136,9 @@ public static long flatIndex(long[] gridPosition, int[] gridSize) { public static class DataBlockIterator implements Iterator> { private final GridIterator it; - private final Shard shard; + private final Shard shard; - public DataBlockIterator(final Shard shard) { + public DataBlockIterator(final Shard shard) { this.shard = shard; it = new GridIterator(shard.getBlockGridSize()); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java index f6f81dab..cf16f719 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java @@ -7,7 +7,7 @@ public class ShardIndexBuilder { - private final Shard shard; + private final Shard shard; private ShardIndex temporaryIndex; @@ -17,7 +17,7 @@ public class ShardIndexBuilder { private long currentOffset = 0; - public ShardIndexBuilder(Shard shard) { + public ShardIndexBuilder(Shard shard) { this.shard = shard; this.temporaryIndex = new ShardIndex(shard.getBlockGridSize(), location); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java index b8596c8d..bff0f976 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java @@ -1,5 +1,6 @@ package org.janelia.saalfeldlab.n5.shard; +import org.checkerframework.checker.units.qual.A; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.DefaultBlockWriter; @@ -13,13 +14,13 @@ import java.util.Arrays; import java.util.List; -public class ShardWriter { +public class ShardWriter { private static final int BYTES_PER_LONG = 8; private final List> blocks; - private A attributes; + private DatasetAttributes attributes; private ByteBuffer blockSizes; @@ -29,12 +30,17 @@ public class ShardWriter { private List blockBytes; - public ShardWriter(final A datasetAttributes) { + public ShardWriter(final A datasetAttributes) { blocks = new ArrayList<>(); attributes = datasetAttributes; } + public A getAttributes() { + + return (A)attributes; + } + public void reset() { blocks.clear(); @@ -48,12 +54,10 @@ public void addBlock(final DataBlock block) { blocks.add(block); } - public void write(final Shard shard, final OutputStream out) throws IOException { - - attributes = shard.getDatasetAttributes(); + public void write(final Shard shard, final OutputStream out) throws IOException { prepareForWritingDataBlock(); - if (attributes.getIndexLocation() == ShardingCodec.IndexLocation.START) { + if (shard.getDatasetAttributes().getIndexLocation() == ShardingCodec.IndexLocation.START) { writeIndexBlock(out); writeBlocks(out); } else { @@ -67,7 +71,6 @@ private void prepareForWritingDataBlock() throws IOException { // final ShardingProperties shardProps = new ShardingProperties(datasetAttributes); // indexData = new ShardIndexDataBlock(shardProps.getIndexDimensions()); - indexData = attributes.createIndex(); blockBytes = new ArrayList<>(); long cumulativeBytes = 0; final long[] shardPosition = new long[1]; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java index 56c92871..86b87947 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java @@ -5,6 +5,7 @@ import java.io.OutputStream; import java.io.UncheckedIOException; +import org.checkerframework.checker.units.qual.A; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.DefaultBlockReader; @@ -14,12 +15,12 @@ import org.janelia.saalfeldlab.n5.N5Exception; import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; -public class VirtualShard extends AbstractShard { +public class VirtualShard extends AbstractShard { final private KeyValueAccess keyValueAccess; final private String path; - public VirtualShard(final A datasetAttributes, long[] gridPosition, + public VirtualShard(final A datasetAttributes, long[] gridPosition, final KeyValueAccess keyValueAccess, final String path) { super(datasetAttributes, gridPosition, null); @@ -95,14 +96,14 @@ public void writeBlock(final DataBlock block) { public ShardIndex createIndex() { // Empty index of the correct size - return datasetAttributes.createIndex(); + return getDatasetAttributes().createIndex(); } @Override public ShardIndex getIndex() { try { - final ShardIndex readIndex = ShardIndex.read(keyValueAccess, path, datasetAttributes.createIndex()); + final ShardIndex readIndex = ShardIndex.read(keyValueAccess, path, getDatasetAttributes().createIndex()); index = readIndex == null ? createIndex() : readIndex; } catch (final N5Exception.N5NoSuchKeyException e) { index = createIndex(); diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java index 8e12ac07..31f14498 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java @@ -191,7 +191,7 @@ public void writeReadShardTest() { final HashMap writtenBlocks = new HashMap<>(); - final InMemoryShard shard = new InMemoryShard<>(datasetAttributes, new long[]{0, 0}); + final InMemoryShard shard = new InMemoryShard<>(datasetAttributes, new long[]{0, 0}); for (int idx1 = 1; idx1 >= 0; idx1--) { for (int idx2 = 1; idx2 >= 0; idx2--) { From b2b3d2ba2e5dff263556b857fe0ef5514bb58fa0 Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Wed, 8 Jan 2025 11:59:36 -0500 Subject: [PATCH 093/124] refactor: some signatures --- .../n5/ShardedDatasetAttributes.java | 10 ++-- .../janelia/saalfeldlab/n5/shard/Shard.java | 10 ++-- .../saalfeldlab/n5/shard/ShardIndex.java | 51 ++++++++++--------- .../n5/shard/ShardIndexBuilder.java | 2 +- .../saalfeldlab/n5/shard/ShardReader.java | 11 +--- .../saalfeldlab/n5/shard/ShardWriter.java | 5 +- .../saalfeldlab/n5/shard/VirtualShard.java | 4 +- 7 files changed, 43 insertions(+), 50 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java index 8ee40baf..089e332d 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java @@ -164,7 +164,7 @@ public long[] getShardPositionForBlock(final long... blockGridPosition) { * * @return the block position */ - public long[] getBlockPositionInShard(final long[] shardPosition, final long[] blockPosition) { + public int[] getBlockPositionInShard(final long[] shardPosition, final long[] blockPosition) { // TODO check correctness final long[] shardPos = getShardPositionForBlock(blockPosition); @@ -172,9 +172,9 @@ public long[] getBlockPositionInShard(final long[] shardPosition, final long[] b return null; final int[] shardSize = getBlocksPerShard(); - final long[] blockShardPos = new long[shardSize.length]; + final int[] blockShardPos = new int[shardSize.length]; for (int i = 0; i < shardSize.length; i++) { - blockShardPos[i] = blockPosition[i] % shardSize[i]; + blockShardPos[i] = (int)(blockPosition[i] % shardSize[i]); } return blockShardPos; @@ -205,7 +205,7 @@ public long[] getBlockMinFromShardPosition(final long[] shardPosition, final lon * * @return the block position */ - public long[] getBlockPositionFromShardPosition(final long[] shardPosition, final long[] blockPosition) { + public long[] getBlockPositionFromShardPosition(final long[] shardPosition, final int[] blockPosition) { // is this useful? final int[] shardBlockSize = getBlocksPerShard(); @@ -220,7 +220,7 @@ public long[] getBlockPositionFromShardPosition(final long[] shardPosition, fina /** * @return the number of blocks per shard */ - public long getNumBlocks() { + public int getNumBlocks() { return Arrays.stream(getBlocksPerShard()).reduce(1, (x, y) -> x * y); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java index 5a88f370..211ad69e 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java @@ -62,7 +62,7 @@ default int[] getBlockGridSize() { * * @return the shard position */ - default long[] getBlockPosition(long... blockPosition) { + default int[] getBlockPosition(long... blockPosition) { final long[] shardPos = getDatasetAttributes().getShardPositionForBlock(blockPosition); return getDatasetAttributes().getBlockPositionInShard(shardPos, blockPosition); @@ -113,13 +113,11 @@ default List> getBlocks() { final ShardIndex shardIndex = getIndex(); final ShardedDatasetAttributes attrs = getDatasetAttributes(); final List> blocks = new ArrayList<>(); - for (long blockIdx = 0; blockIdx < attrs.getNumBlocks(); blockIdx++) { - int shardOffset = (int)blockIdx * 2; - final long[] index = shardIndex.getData(); - if (index[shardOffset] == Shard.EMPTY_INDEX_NBYTES || index[shardOffset+1] == EMPTY_INDEX_NBYTES) + for (int blockIdx = 0; blockIdx < attrs.getNumBlocks(); blockIdx++) { + if (!shardIndex.exists(blockIdx)) continue; - final long[] blockPosInShard = ShardIndex.shardPositionFromIndexOffset(shardOffset, attrs.getBlocksPerShard()); + final int[] blockPosInShard = ShardIndex.blockPosition(blockIdx, attrs.getBlocksPerShard()); final long[] blockPosInImg = attrs.getBlockPositionFromShardPosition(getGridPosition(), blockPosInShard); blocks.add(getBlock(blockPosInImg)); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java index 44bca3c5..a91c470c 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java @@ -52,35 +52,41 @@ public ShardIndex(int[] shardBlockGridSize, DeterministicSizeCodec... codecs) { this(shardBlockGridSize, emptyIndexData(shardBlockGridSize), IndexLocation.END, codecs); } - public boolean exists(long... gridPosition) { + public boolean exists(int[] gridPosition) { - return getOffset(gridPosition) != Shard.EMPTY_INDEX_NBYTES && + return getOffset(gridPosition) != Shard.EMPTY_INDEX_NBYTES || getNumBytes(gridPosition) != Shard.EMPTY_INDEX_NBYTES; } + public boolean exists(int blockNum) { + + return data[blockNum * 2] != Shard.EMPTY_INDEX_NBYTES || + data[blockNum * 2 + 1] != Shard.EMPTY_INDEX_NBYTES; + } + public IndexLocation getLocation() { return location; } - public long getOffset(long... gridPosition) { + public long getOffset(int... gridPosition) { return data[getOffsetIndex(gridPosition)]; } - public long getNumBytes(long... gridPosition) { + public long getNumBytes(int... gridPosition) { return data[getNumBytesIndex(gridPosition)]; } - public void set(long offset, long nbytes, long[] gridPosition) { + public void set(long offset, long nbytes, int[] gridPosition) { final int i = getOffsetIndex(gridPosition); data[i] = offset; data[i + 1] = nbytes; } - private int getOffsetIndex(long... gridPosition) { + private int getOffsetIndex(int... gridPosition) { int idx = (int) gridPosition[0]; for (int i = 1; i < gridPosition.length; i++) { @@ -89,7 +95,7 @@ private int getOffsetIndex(long... gridPosition) { return idx * 2; } - private int getNumBytesIndex(long... gridPosition) { + private int getNumBytesIndex(int... gridPosition) { return getOffsetIndex(gridPosition) + 1; } @@ -237,28 +243,25 @@ private static int[] prepend(final int value, final int[] array) { } /** - * Calculate the block position in the shard grid for a given index offset. + * Calculate the relative block position in the shard for a given block index. * - * @param offset the offset into the index - * @param blocksPerShard the dimensions of the shard in blocks - * @return the relative position in the shard grid + * @param blockIdx the block index in the shard + * @param blocksPerShard the dimensions of the shard in blocks + * @return the relative position in the shard */ - public static long[] shardPositionFromIndexOffset(int offset, int[] blocksPerShard) { + public static int[] blockPosition(int blockIdx, int[] blocksPerShard) { + + int numBlocks = Arrays.stream(blocksPerShard).reduce(1, (x, y) -> x * y); + if (blockIdx >= numBlocks) + throw new IllegalArgumentException("Shard Index Offset " + blockIdx + " is out of bounds for shard dimensions " + Arrays.toString(blocksPerShard)); - int maxOffset = 1; - for (int i = 0; i < blocksPerShard.length; i++) { - maxOffset *= blocksPerShard[i]; - } - if (offset >= maxOffset*2) { - throw new IllegalArgumentException("Shard Index Offset " + offset + " is out of bounds for shard dimensions " + Arrays.toString(blocksPerShard)); - } - final long[] position = new long[blocksPerShard.length]; - int remainder = offset / 2; + final int[] position = new int[blocksPerShard.length]; + int remainder = blockIdx ; - for (int dim = blocksPerShard.length - 1; dim >= 0; dim--) { // Iterate backwards - position[dim] = remainder % blocksPerShard[dim]; // Calculate position for this dimension - remainder /= blocksPerShard[dim]; // Update the remainder + for (int dim = blocksPerShard.length - 1; dim >= 0; dim--) { + position[dim] = remainder % blocksPerShard[dim]; + remainder /= blocksPerShard[dim]; } return position; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java index cf16f719..41d505af 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndexBuilder.java @@ -56,7 +56,7 @@ public ShardIndexBuilder setCodecs(DeterministicSizeCodec... codecs) { public ShardIndexBuilder addBlock(long[] blockPosition, long numBytes) { //TODO Caleb: Maybe move to ShardIndex? - final long[] blockPositionInShard = shard.getDatasetAttributes().getBlockPositionInShard( + final int[] blockPositionInShard = shard.getDatasetAttributes().getBlockPositionInShard( shard.getGridPosition(), blockPosition); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java index edab4ec0..991b3acd 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java @@ -37,16 +37,7 @@ public DataBlock readBlock( final FileChannel in, long... blockPosition) throws IOException { - // TODO generalize from FileChannel - // TODO this assumes the "file" holding the shard is known, - // the logic to figure that out will have to go somewhere - - final ShardIndex index = readIndexes(in); - - final long[] shardPosition = datasetAttributes.getShardPositionForBlock(blockPosition); - in.position(index.getOffset(shardPosition)); - final InputStream is = Channels.newInputStream(in); - return DefaultBlockReader.readBlock(is, datasetAttributes, indexes); + throw new IOException("Remove this!"); } private long getIndexIndex(long... shardPosition) { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java index 792019fd..31856837 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java @@ -13,6 +13,7 @@ import java.util.Arrays; import java.util.List; +@Deprecated public class ShardWriter { private static final int BYTES_PER_LONG = 8; @@ -66,11 +67,10 @@ private void prepareForWritingDataBlock() throws IOException { // final ShardingProperties shardProps = new ShardingProperties(datasetAttributes); // indexData = new ShardIndexDataBlock(shardProps.getIndexDimensions()); - indexData = attributes.createIndex(); blockBytes = new ArrayList<>(); long cumulativeBytes = 0; - final long[] shardPosition = new long[1]; + final int[] shardPosition = new int[1]; for (int i = 0; i < blocks.size(); i++) { try (final ByteArrayOutputStream blockOut = new ByteArrayOutputStream()) { @@ -86,6 +86,7 @@ private void prepareForWritingDataBlock() throws IOException { } System.out.println(Arrays.toString(indexData.getData())); + throw new IOException("Remove this!"); } private void prepareForWriting() throws IOException { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java index 7765d42b..edca5085 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java @@ -31,7 +31,7 @@ public VirtualShard(final ShardedDatasetAttributes datasetAttributes, long[] gri @Override public DataBlock getBlock(long... blockGridPosition) { - final long[] relativePosition = getBlockPosition(blockGridPosition); + final int[] relativePosition = getBlockPosition(blockGridPosition); if (relativePosition == null) throw new N5IOException("Attempted to read a block from the wrong shard."); @@ -57,7 +57,7 @@ public DataBlock getBlock(long... blockGridPosition) { @Override public void writeBlock(final DataBlock block) { - final long[] relativePosition = getBlockPosition(block.getGridPosition()); + final int[] relativePosition = getBlockPosition(block.getGridPosition()); if (relativePosition == null) throw new N5IOException("Attempted to write block in the wrong shard."); From 52751f5a22e4ccbe6b77a114d282f67999278fcf Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Wed, 8 Jan 2025 15:39:34 -0500 Subject: [PATCH 094/124] fix: Shard as Iterator> --- .../saalfeldlab/n5/codec/N5BlockCodec.java | 4 +- .../janelia/saalfeldlab/n5/shard/Shard.java | 46 ++++++------------- .../saalfeldlab/n5/shard/ShardIndex.java | 25 ---------- .../saalfeldlab/n5/util/GridIterator.java | 17 ++++--- .../saalfeldlab/n5/shard/ShardTest.java | 3 ++ 5 files changed, 29 insertions(+), 66 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BlockCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BlockCodec.java index 82f118bb..92158041 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BlockCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BlockCodec.java @@ -63,9 +63,9 @@ public DataBlock allocateDataBlock() throws IOException { start = false; } if (mode != 2) { - return attributes.getDataType().createDataBlock(blockSize, gridPosition, numElements); + return attributes.getDataType().createDataBlock(blockSize, gridPosition.clone(), numElements); } else { - return attributes.getDataType().createDataBlock(null, gridPosition, numElements); + return attributes.getDataType().createDataBlock(null, gridPosition.clone(), numElements); } } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java index 211ad69e..70f8150f 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java @@ -13,6 +13,8 @@ public interface Shard extends Iterable> { long EMPTY_INDEX_NBYTES = 0xFFFFFFFFFFFFFFFFL; + public ShardedDatasetAttributes getDatasetAttributes(); + /** * Returns the number of blocks this shard contains along all dimensions. * @@ -23,30 +25,26 @@ public interface Shard extends Iterable> { */ default int[] getBlockGridSize() { - final int[] sz = getSize(); - final int[] blkSz = getBlockSize(); - final int[] blockGridSize = new int[sz.length]; - for (int i = 0; i < sz.length; i++) - blockGridSize[i] = (int)(sz[i] / blkSz[i]); - - return blockGridSize; + return getDatasetAttributes().getBlocksPerShard(); } - public ShardedDatasetAttributes getDatasetAttributes(); - /** * Returns the size of shards in pixel units. * * @return shard size */ - public int[] getSize(); + default int[] getSize() { + return getDatasetAttributes().getShardSize(); + } /** * Returns the size of blocks in pixel units. * * @return block size */ - public int[] getBlockSize(); + default int[] getBlockSize() { + return getDatasetAttributes().getBlockSize(); + } /** * Returns the position of this shard on the shard grid. @@ -105,23 +103,16 @@ default long[] getShard(long... blockPosition) { default Iterator> iterator() { - return new DataBlockIterator(this); + return new DataBlockIterator<>(this); } default List> getBlocks() { - final ShardIndex shardIndex = getIndex(); - final ShardedDatasetAttributes attrs = getDatasetAttributes(); final List> blocks = new ArrayList<>(); - for (int blockIdx = 0; blockIdx < attrs.getNumBlocks(); blockIdx++) { - if (!shardIndex.exists(blockIdx)) - continue; - - final int[] blockPosInShard = ShardIndex.blockPosition(blockIdx, attrs.getBlocksPerShard()); - final long[] blockPosInImg = attrs.getBlockPositionFromShardPosition(getGridPosition(), blockPosInShard); - blocks.add(getBlock(blockPosInImg)); + for (DataBlock block : this) { + if (block != null) + blocks.add(block); } - return blocks; } @@ -167,15 +158,4 @@ public DataBlock next() { return shard.getBlock(it.next()); } } - - /** - * Say we want async datablock access - * - * Say we construct shard then getBlockAt - * - * (this could be how we do the aggregation) multiple getblockAt calls don't trigger reading read triggers reading of all blocks that were requested - * - * Shard doesn't hold the data directly, but is the metadata about how the blocks are stored - * - */ } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java index a91c470c..9c0d266a 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java @@ -241,29 +241,4 @@ private static int[] prepend(final int value, final int[] array) { System.arraycopy(array, 0, indexBlockSize, 1, array.length); return indexBlockSize; } - - /** - * Calculate the relative block position in the shard for a given block index. - * - * @param blockIdx the block index in the shard - * @param blocksPerShard the dimensions of the shard in blocks - * @return the relative position in the shard - */ - public static int[] blockPosition(int blockIdx, int[] blocksPerShard) { - - int numBlocks = Arrays.stream(blocksPerShard).reduce(1, (x, y) -> x * y); - if (blockIdx >= numBlocks) - throw new IllegalArgumentException("Shard Index Offset " + blockIdx + " is out of bounds for shard dimensions " + Arrays.toString(blocksPerShard)); - - - final int[] position = new int[blocksPerShard.length]; - int remainder = blockIdx ; - - for (int dim = blocksPerShard.length - 1; dim >= 0; dim--) { - position[dim] = remainder % blocksPerShard[dim]; - remainder /= blocksPerShard[dim]; - } - - return position; - } } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/util/GridIterator.java b/src/main/java/org/janelia/saalfeldlab/n5/util/GridIterator.java index 1fcb118c..8b9ab8fc 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/util/GridIterator.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/util/GridIterator.java @@ -1,5 +1,6 @@ package org.janelia.saalfeldlab.n5.util; +import java.util.Arrays; import java.util.Iterator; /** @@ -68,13 +69,10 @@ public int getIndex() { final static public void indexToPosition(long index, final long[] dimensions, final long[] position) { final int maxDim = dimensions.length - 1; - for (int d = 0; d < maxDim; ++d) { - final long j = index / dimensions[d]; - position[d] = index - j * dimensions[d]; - index = j; + for (int dim = maxDim; dim >= 0; dim--) { + position[dim] = index % dimensions[dim]; + index /= dimensions[dim]; } - position[maxDim] = index; - } final static public int[] long2int(final long[] a) { @@ -95,4 +93,11 @@ final static public long[] int2long(final int[] i) { return l; } + public static void main(String[] args) { + + final GridIterator it = new GridIterator(new int[]{2, 2, 2}); + while (it.hasNext()) { + System.out.println(Arrays.toString(it.next())); + } + } } diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java index c5ab89cb..f16aee3d 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java @@ -7,6 +7,7 @@ import org.janelia.saalfeldlab.n5.KeyValueAccess; import org.janelia.saalfeldlab.n5.N5FSTest; import org.janelia.saalfeldlab.n5.N5KeyValueWriter; +import org.janelia.saalfeldlab.n5.N5URI; import org.janelia.saalfeldlab.n5.N5Writer; import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; import org.janelia.saalfeldlab.n5.codec.BytesCodec; @@ -26,6 +27,8 @@ import java.util.Arrays; import java.util.Collection; import java.util.HashMap; +import java.util.Iterator; +import java.util.List; import java.util.Map; @RunWith(Parameterized.class) From 6e3cbe524e9b4af81afe29850c012ba46a5011db Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Wed, 8 Jan 2025 16:54:46 -0500 Subject: [PATCH 095/124] test: ShardIndexTest --- .../saalfeldlab/n5/shard/ShardIndex.java | 74 ++++++++++++--- .../saalfeldlab/n5/shard/ShardIndexTest.java | 93 +++++++++++++++++++ 2 files changed, 154 insertions(+), 13 deletions(-) create mode 100644 src/test/java/org/janelia/saalfeldlab/n5/shard/ShardIndexTest.java diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java index 9c0d266a..27b9eace 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java @@ -1,5 +1,6 @@ package org.janelia.saalfeldlab.n5.shard; +import org.apache.commons.io.input.BoundedInputStream; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DataType; import org.janelia.saalfeldlab.n5.DatasetAttributes; @@ -15,6 +16,7 @@ import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; +import java.io.ByteArrayInputStream; import java.io.DataInputStream; import java.io.IOException; import java.io.InputStream; @@ -113,6 +115,28 @@ public long numBytes() { return totalNumBytes; } + public static ShardIndex read(byte[] data, final ShardIndex index) throws IOException { + + final IndexByteBounds byteBounds = byteBounds(index, data.length); + final ByteArrayInputStream is = new ByteArrayInputStream(data); + is.skip(byteBounds.start); + BoundedInputStream bIs = BoundedInputStream.builder() + .setInputStream(is) + .setMaxCount(byteBounds.size).get(); + + return read(bIs, index); + } + + public static ShardIndex read(InputStream in, final ShardIndex index) throws IOException { + + @SuppressWarnings("unchecked") + final DataBlock indexBlock = (DataBlock) DefaultBlockReader.readBlock(in, + index.getIndexAttributes(), index.gridPosition); + final long[] indexData = indexBlock.getData(); + System.arraycopy(indexData, 0, index.data, 0, index.data.length); + return index; + } + public static ShardIndex read( final KeyValueAccess keyValueAccess, final String key, @@ -121,16 +145,9 @@ public static ShardIndex read( final IndexByteBounds byteBounds = byteBounds(index, keyValueAccess.size(key)); try (final LockedChannel lockedChannel = keyValueAccess.lockForReading(key, byteBounds.start, byteBounds.end)) { - final long[] indexData; try (final InputStream in = lockedChannel.newInputStream()) { - final DataBlock indexBlock = (DataBlock)DefaultBlockReader.readBlock( - in, - index.getIndexAttributes(), - index.gridPosition); - indexData = indexBlock.getData(); + return read(in,index); } - System.arraycopy(indexData, 0, index.data, 0, index.data.length); - return index; } catch (final N5Exception.N5NoSuchKeyException e) { return null; } catch (final IOException | UncheckedIOException e) { @@ -144,7 +161,7 @@ public static void write( final String key ) throws IOException { - final long start = index.location == IndexLocation.START ? 0 : keyValueAccess.size(key); + final long start = index.location == IndexLocation.START ? 0 : sizeOrZero( keyValueAccess, key) ; try (final LockedChannel lockedChannel = keyValueAccess.lockForWriting(key, start, index.numBytes())) { try (final OutputStream os = lockedChannel.newOutputStream()) { write(index, os); @@ -154,6 +171,14 @@ public static void write( } } + private static long sizeOrZero(final KeyValueAccess keyValueAccess, final String key) { + try { + return keyValueAccess.size(key); + } catch (Exception e) { + return 0; + } + } + public static void write(final ShardIndex index, OutputStream out) throws IOException { DefaultBlockWriter.writeBlock(out, index.getIndexAttributes(), index); @@ -191,15 +216,17 @@ public static IndexByteBounds byteBounds(final long indexSize, final IndexLocati } } - private static class IndexByteBounds { + public static class IndexByteBounds { - private final long start; - private final long end; + public final long start; + public final long end; + public final long size; - private IndexByteBounds(long start, long end) { + public IndexByteBounds(long start, long end) { this.start = start; this.end = end; + this.size = end - start + 1; } } @@ -241,4 +268,25 @@ private static int[] prepend(final int value, final int[] array) { System.arraycopy(array, 0, indexBlockSize, 1, array.length); return indexBlockSize; } + + @Override + public boolean equals(Object other) { + + if (other instanceof ShardIndex) { + + final ShardIndex index = (ShardIndex) other; + if (this.location != index.location) + return false; + + if (!Arrays.equals(this.size, index.size)) + return false; + + if (!Arrays.equals(this.data, index.data)) + return false; + + } + return true; + } + } + diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardIndexTest.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardIndexTest.java new file mode 100644 index 00000000..39d6ab3e --- /dev/null +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardIndexTest.java @@ -0,0 +1,93 @@ +package org.janelia.saalfeldlab.n5.shard; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.nio.file.Paths; + +import org.janelia.saalfeldlab.n5.DataType; +import org.janelia.saalfeldlab.n5.GzipCompression; +import org.janelia.saalfeldlab.n5.KeyValueAccess; +import org.janelia.saalfeldlab.n5.N5FSTest; +import org.janelia.saalfeldlab.n5.N5KeyValueWriter; +import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; +import org.janelia.saalfeldlab.n5.codec.BytesCodec; +import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; +import org.janelia.saalfeldlab.n5.codec.N5BlockCodec; +import org.janelia.saalfeldlab.n5.codec.checksum.Crc32cChecksumCodec; +import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; +import org.junit.After; +import org.junit.Ignore; +import org.junit.Test; + +public class ShardIndexTest { + + private static final N5FSTest tempN5Factory = new N5FSTest(); + + @After + public void removeTempWriters() { + tempN5Factory.removeTempWriters(); + } + + @Test + public void testReadVirtual() throws IOException { + + final N5KeyValueWriter writer = (N5KeyValueWriter) tempN5Factory.createTempN5Writer(); + final KeyValueAccess kva = writer.getKeyValueAccess(); + + final int[] shardBlockGridSize = new int[] { 6, 5 }; + final IndexLocation indexLocation = IndexLocation.END; + final DeterministicSizeCodec[] indexCodecs = new DeterministicSizeCodec[] { new BytesCodec(), + new Crc32cChecksumCodec() }; + + final String path = Paths.get(Paths.get(writer.getURI()).toAbsolutePath().toString(), "0").toString(); + + final ShardIndex index = new ShardIndex(shardBlockGridSize, indexLocation, indexCodecs); + index.set(0, 6, new int[] { 0, 0 }); + index.set(19, 32, new int[] { 1, 0 }); + index.set(93, 111, new int[] { 3, 0 }); + index.set(143, 1, new int[] { 1, 2 }); + ShardIndex.write(index, kva, path); + + final ShardIndex other = new ShardIndex(shardBlockGridSize, indexLocation, indexCodecs); + ShardIndex.read(kva, path, other); + + assertEquals(index, other); + } + + @Test + @Ignore + public void testReadInMemory() throws IOException { + + final N5KeyValueWriter writer = (N5KeyValueWriter) tempN5Factory.createTempN5Writer(); + final KeyValueAccess kva = writer.getKeyValueAccess(); + + final int[] shardBlockGridSize = new int[] { 6, 5 }; + final IndexLocation indexLocation = IndexLocation.END; + final DeterministicSizeCodec[] indexCodecs = new DeterministicSizeCodec[] { new BytesCodec(), + new Crc32cChecksumCodec() }; + final String path = Paths.get(Paths.get(writer.getURI()).toAbsolutePath().toString(), "0").toString(); + + final ShardIndex index = new ShardIndex(shardBlockGridSize, indexLocation, indexCodecs); + index.set(0, 6, new int[] { 0, 0 }); + index.set(19, 32, new int[] { 1, 0 }); + index.set(93, 111, new int[] { 3, 0 }); + index.set(143, 1, new int[] { 1, 2 }); + ShardIndex.write(index, kva, path); + + ShardedDatasetAttributes attrs = new ShardedDatasetAttributes( + new long[]{6,5}, + shardBlockGridSize, + new int[]{1,1}, + DataType.UINT8, + new Codec[]{new N5BlockCodec(), new GzipCompression(4)}, + new DeterministicSizeCodec[]{new BytesCodec(), new Crc32cChecksumCodec()}, + indexLocation + ); + + final InMemoryShard shard = InMemoryShard.readShard(kva, path, new long[] {0,0}, attrs); + + assertEquals(index, shard.index); + } +} From 06477551d7b7f49b9d394874ddd5aebcfc921fe9 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Wed, 8 Jan 2025 16:55:15 -0500 Subject: [PATCH 096/124] feat: toward direct reading of InMemoryShard --- .../saalfeldlab/n5/shard/InMemoryShard.java | 63 ++++++++++++++++++- .../saalfeldlab/n5/shard/VirtualShard.java | 15 +++++ .../saalfeldlab/n5/util/GridIterator.java | 11 ++-- 3 files changed, 81 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java index c527df26..70340b8e 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java @@ -1,20 +1,27 @@ package org.janelia.saalfeldlab.n5.shard; +import java.io.ByteArrayInputStream; import java.io.IOException; +import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; +import org.apache.commons.io.input.BoundedInputStream; import org.apache.commons.io.output.ByteArrayOutputStream; import org.apache.commons.io.output.CountingOutputStream; import org.apache.commons.io.output.ProxyOutputStream; import org.checkerframework.checker.units.qual.A; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DatasetAttributes; +import org.janelia.saalfeldlab.n5.DefaultBlockReader; import org.janelia.saalfeldlab.n5.DefaultBlockWriter; +import org.janelia.saalfeldlab.n5.KeyValueAccess; +import org.janelia.saalfeldlab.n5.LockedChannel; import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; +import org.janelia.saalfeldlab.n5.util.GridIterator; public class InMemoryShard extends AbstractShard { @@ -27,7 +34,6 @@ public class InMemoryShard extends AbstractShard { * Use morton- or c-ording instead of writing blocks out in the order they're added? * (later) */ - public InMemoryShard(final A datasetAttributes, final long[] shardPosition) { this( datasetAttributes, shardPosition, null); @@ -99,6 +105,61 @@ public void write(final OutputStream out) throws IOException { writeShardStart(out, this); } + public static InMemoryShard readShard( + final KeyValueAccess kva, final String key, final long[] gridPosition, final A attributes) + throws IOException { + + try (final LockedChannel lockedChannel = kva.lockForReading(key)) { + try (final InputStream is = lockedChannel.newInputStream()) { + return readShard(is, gridPosition, attributes); + } + } + } + + public static InMemoryShard readShard( + final InputStream inputStream, final long[] gridPosition, final A attributes) throws IOException { + + try (ByteArrayOutputStream result = new ByteArrayOutputStream()) { + byte[] buffer = new byte[1024]; + for (int length; (length = inputStream.read(buffer)) != -1;) { + result.write(buffer, 0, length); + } + return readShard(result.toByteArray(), gridPosition, attributes); + } + } + + public static InMemoryShard readShard(final byte[] data, + long[] shardPosition, final A attributes) throws IOException { + + final ShardIndex index = attributes.createIndex(); + ShardIndex.read(data, index); + + final InMemoryShard shard = new InMemoryShard(attributes, shardPosition, index); + final GridIterator it = new GridIterator(attributes.getBlocksPerShard()); + while (it.hasNext()) { + + final long[] p = it.next(); + final int[] pInt = GridIterator.long2int(p); + + if (index.exists(pInt)) { + + final ByteArrayInputStream is = new ByteArrayInputStream(data); + is.skip(index.getOffset(pInt)); + BoundedInputStream bIs = BoundedInputStream.builder().setInputStream(is) + .setMaxCount(index.getNumBytes(pInt)).get(); + + final long[] blockGridPosition = attributes.getBlockPositionFromShardPosition(shardPosition, p); + @SuppressWarnings("unchecked") + final DataBlock blk = (DataBlock) DefaultBlockReader.readBlock(bIs, attributes, + blockGridPosition); + shard.addBlock(blk); + bIs.close(); + } + } + + return shard; + } + public static void writeShard(final OutputStream out, final Shard shard) throws IOException { fromShard(shard).write(out); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java index 3ebe47d3..3a36ac14 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java @@ -4,6 +4,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.io.UncheckedIOException; +import java.util.function.Supplier; import org.checkerframework.checker.units.qual.A; import org.janelia.saalfeldlab.n5.DataBlock; @@ -28,6 +29,20 @@ public VirtualShard(final A data this.path = path; } + public VirtualShard(final A datasetAttributes, long[] gridPosition) { + + this(datasetAttributes, gridPosition, null, null); + } + + @SuppressWarnings("unchecked") + public DataBlock getBlock(Supplier inputSupplier, long... blockGridPosition) throws IOException { + + // TODO this method is just a wrapper around readBlock and probably not worth keeping + try (InputStream is = inputSupplier.get()) { + return (DataBlock) DefaultBlockReader.readBlock(is, datasetAttributes, blockGridPosition); + } + } + @SuppressWarnings("unchecked") @Override public DataBlock getBlock(long... blockGridPosition) { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/util/GridIterator.java b/src/main/java/org/janelia/saalfeldlab/n5/util/GridIterator.java index 8b9ab8fc..1ea16efb 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/util/GridIterator.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/util/GridIterator.java @@ -63,6 +63,10 @@ public long[] next() { return position; } + public int[] nextAsInt() { + return long2int(next()); + } + public int getIndex() { return index; } @@ -93,11 +97,4 @@ final static public long[] int2long(final int[] i) { return l; } - public static void main(String[] args) { - - final GridIterator it = new GridIterator(new int[]{2, 2, 2}); - while (it.hasNext()) { - System.out.println(Arrays.toString(it.next())); - } - } } From b17cb1fbbb9a037e5cdaac33b4bd708e010def85 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Wed, 8 Jan 2025 21:45:05 -0500 Subject: [PATCH 097/124] feat/test: add block position iterator for shard * add ShardProperties test --- .../janelia/saalfeldlab/n5/shard/Shard.java | 13 +++ .../saalfeldlab/n5/shard/ShardParameters.java | 10 ++- .../saalfeldlab/n5/util/GridIterator.java | 17 ++-- .../n5/shard/ShardPropertiesTests.java | 90 +++++++++++++++++++ 4 files changed, 122 insertions(+), 8 deletions(-) create mode 100644 src/test/java/org/janelia/saalfeldlab/n5/shard/ShardPropertiesTests.java diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java index a90a2b12..b0e551a1 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java @@ -116,6 +116,18 @@ default List> getBlocks() { return blocks; } + /** + * Returns an {@link Iterator} over block positions contained in this shard. + * + * @return + */ + default Iterator blockPositionIterator() { + + final int nd = getSize().length; + long[] min = getDatasetAttributes().getBlockPositionFromShardPosition( getGridPosition(), new long[nd]); + return new GridIterator(GridIterator.int2long(getBlockGridSize()), min); + } + public ShardIndex getIndex(); public static Shard createEmpty(final A attributes, long... shardPosition) { @@ -158,4 +170,5 @@ public DataBlock next() { return shard.getBlock(it.next()); } } + } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardParameters.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardParameters.java index 27e9bf5b..31d385f8 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardParameters.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardParameters.java @@ -97,8 +97,10 @@ default int[] getBlockPositionInShard(final long[] shardPosition, final long[] b /** * Given a block's position relative to a shard, returns its position in pixels * relative to the image. - * - * @return the block position + * + * @param shardPosition shard position in the shard grid + * @param blockPosition block position the + * @return the block's min pixel coordinate */ default long[] getBlockMinFromShardPosition(final long[] shardPosition, final long[] blockPosition) { @@ -117,7 +119,9 @@ default long[] getBlockMinFromShardPosition(final long[] shardPosition, final lo * Given a block's position relative to a shard, returns its position relative * to the image. * - * @return the block position + * @param shardPosition shard position in the shard grid + * @param blockPosition block position relative to the shard + * @return the block position in the block grid */ default long[] getBlockPositionFromShardPosition(final long[] shardPosition, final long[] blockPosition) { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/util/GridIterator.java b/src/main/java/org/janelia/saalfeldlab/n5/util/GridIterator.java index 1ea16efb..299fda86 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/util/GridIterator.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/util/GridIterator.java @@ -1,6 +1,5 @@ package org.janelia.saalfeldlab.n5.util; -import java.util.Arrays; import java.util.Iterator; /** @@ -14,15 +13,18 @@ public class GridIterator implements Iterator { final protected long[] position; + final protected long[] min; + final protected int lastIndex; protected int index = -1; - public GridIterator(final long[] dimensions) { + public GridIterator(final long[] dimensions, final long[] min) { final int n = dimensions.length; this.dimensions = new long[n]; this.position = new long[n]; + this.min = min; steps = new long[n]; final int m = n - 1; @@ -38,6 +40,11 @@ public GridIterator(final long[] dimensions) { lastIndex = (int)(k * dimm - 1); } + public GridIterator(final long[] dimensions) { + + this(dimensions, new long[dimensions.length]); + } + public GridIterator(final int[] dimensions) { this(int2long(dimensions)); @@ -59,7 +66,7 @@ public boolean hasNext() { @Override public long[] next() { fwd(); - indexToPosition(index, dimensions, position); + indexToPosition(index, dimensions, min, position); return position; } @@ -71,10 +78,10 @@ public int getIndex() { return index; } - final static public void indexToPosition(long index, final long[] dimensions, final long[] position) { + final static public void indexToPosition(long index, final long[] dimensions, final long[] min, final long[] position) { final int maxDim = dimensions.length - 1; for (int dim = maxDim; dim >= 0; dim--) { - position[dim] = index % dimensions[dim]; + position[dim] = index % dimensions[dim] + min[dim]; index /= dimensions[dim]; } } diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardPropertiesTests.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardPropertiesTests.java new file mode 100644 index 00000000..5f661f03 --- /dev/null +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardPropertiesTests.java @@ -0,0 +1,90 @@ +package org.janelia.saalfeldlab.n5.shard; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +import java.util.Iterator; + +import org.janelia.saalfeldlab.n5.DataType; +import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; +import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; +import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; +import org.junit.Test; + +public class ShardPropertiesTests { + + @Test + public void testShardProperties() throws Exception { + + final long[] arraySize = new long[]{16, 16}; + final int[] shardSize = new int[]{16, 16}; + final long[] shardPosition = new long[]{1, 1}; + final int[] blkSize = new int[]{4, 4}; + + final ShardedDatasetAttributes dsetAttrs = new ShardedDatasetAttributes( + arraySize, + shardSize, + blkSize, + DataType.UINT8, + new Codec[]{}, + new DeterministicSizeCodec[]{}, + IndexLocation.END); + + @SuppressWarnings({"rawtypes", "unchecked"}) + final InMemoryShard shard = new InMemoryShard(dsetAttrs, shardPosition, null); + + assertArrayEquals(new int[]{4, 4}, shard.getBlockGridSize()); + + assertArrayEquals(new long[]{0, 0}, shard.getShard(0, 0)); + assertArrayEquals(new long[]{1, 1}, shard.getShard(5, 5)); + assertArrayEquals(new long[]{1, 0}, shard.getShard(5, 0)); + assertArrayEquals(new long[]{0, 1}, shard.getShard(0, 5)); + +// assertNull(shard.getBlockPosition(0, 0)); +// assertNull(shard.getBlockPosition(3, 3)); + + assertArrayEquals(new int[]{0, 0}, shard.getBlockPosition(4, 4)); + assertArrayEquals(new int[]{1, 1}, shard.getBlockPosition(5, 5)); + assertArrayEquals(new int[]{2, 2}, shard.getBlockPosition(6, 6)); + assertArrayEquals(new int[]{3, 3}, shard.getBlockPosition(7, 7)); + } + + @Test + public void testShardBlockPositionIterator() throws Exception { + + final long[] arraySize = new long[]{16, 16}; + final int[] shardSize = new int[]{16, 16}; + final long[] shardPosition = new long[]{1, 1}; + final int[] blkSize = new int[]{4, 4}; + + final ShardedDatasetAttributes dsetAttrs = new ShardedDatasetAttributes( + arraySize, + shardSize, + blkSize, + DataType.UINT8, + new Codec[]{}, + new DeterministicSizeCodec[]{}, + IndexLocation.END); + + @SuppressWarnings({"rawtypes", "unchecked"}) + final InMemoryShard shard = new InMemoryShard(dsetAttrs, shardPosition, null); + + int i = 0; + Iterator it = shard.blockPositionIterator(); + long[] p = null; + while (it.hasNext()) { + + p = it.next(); + if( i == 0 ) + assertArrayEquals(new long[]{4,4}, p); + + i++; + } + assertEquals(16,i); + assertArrayEquals(new long[]{7,7}, p); + + } + +} From 334e46c64dc60053fddc6a420ab176b49069c329 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Thu, 9 Jan 2025 16:57:48 -0500 Subject: [PATCH 098/124] feat: ShardIndex get properties by block index --- .../org/janelia/saalfeldlab/n5/shard/ShardIndex.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java index 27b9eace..4b3c0b01 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java @@ -76,11 +76,20 @@ public long getOffset(int... gridPosition) { return data[getOffsetIndex(gridPosition)]; } + public long getOffsetByBlockIndex(int index) { + return data[index * 2]; + } + public long getNumBytes(int... gridPosition) { return data[getNumBytesIndex(gridPosition)]; } + public long getNumBytesByBlockIndex(int index) { + + return data[index * 2 + 1]; + } + public void set(long offset, long nbytes, int[] gridPosition) { final int i = getOffsetIndex(gridPosition); From 84493829977de0cd7786aef8f8b339028e7a0e81 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Thu, 9 Jan 2025 17:00:40 -0500 Subject: [PATCH 099/124] feat: add Shard.getNumBlocks --- src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java index b0e551a1..ad18f87f 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java @@ -106,6 +106,11 @@ default Iterator> iterator() { return new DataBlockIterator<>(this); } + default int getNumBlocks() { + + return Arrays.stream(getBlockGridSize()).reduce(1, (x, y) -> x * y); + } + default List> getBlocks() { final List> blocks = new ArrayList<>(); From b6a5b4f307b3e624f6d350b5c11f8eeceb3b7ffa Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Thu, 9 Jan 2025 17:00:53 -0500 Subject: [PATCH 100/124] perf: VirtualShard smart override of getBlocks --- .../saalfeldlab/n5/shard/VirtualShard.java | 90 ++++++++++++++++--- 1 file changed, 80 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java index 3a36ac14..c4cab674 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java @@ -4,9 +4,14 @@ import java.io.InputStream; import java.io.OutputStream; import java.io.UncheckedIOException; -import java.util.function.Supplier; - -import org.checkerframework.checker.units.qual.A; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import org.apache.commons.io.input.BoundedInputStream; +import org.apache.commons.io.input.ProxyInputStream; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.DefaultBlockReader; @@ -15,6 +20,7 @@ import org.janelia.saalfeldlab.n5.LockedChannel; import org.janelia.saalfeldlab.n5.N5Exception; import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; +import org.janelia.saalfeldlab.n5.util.GridIterator; public class VirtualShard extends AbstractShard { @@ -35,15 +41,79 @@ public VirtualShard(final A data } @SuppressWarnings("unchecked") - public DataBlock getBlock(Supplier inputSupplier, long... blockGridPosition) throws IOException { + public DataBlock getBlock(InputStream inputStream, long... blockGridPosition) throws IOException { + + // TODO this method is just a wrapper around readBlock + // is it worth keeping/ + return (DataBlock) DefaultBlockReader.readBlock( + new ProxyInputStream( inputStream ) { + @Override + public void close( ) { + //nop + } + }, datasetAttributes, blockGridPosition); + } + + @Override + public List> getBlocks() { + + // will not contain nulls + + final ShardIndex index = getIndex(); + // TODO if the index is completely empty, can return right away + + final ArrayList> blocks = new ArrayList<>(); + + // sort index offsets + // and keep track of relevant positions + final long[] indexData = index.getData(); + List sortedOffsets = IntStream.range(0, index.getNumElements() / 2).mapToObj(i -> { + return new long[] { indexData[i * 2], i }; + }).filter(x -> { + return x[0] != Shard.EMPTY_INDEX_NBYTES; + }).collect(Collectors.toList()); + + Collections.sort(sortedOffsets, (a, b) -> Long.compare(((long[]) a)[0], ((long[]) b)[0])); + + final int nd = getDatasetAttributes().getNumDimensions(); + long[] position = new long[nd]; + + final int[] blocksPerShard = getDatasetAttributes().getBlocksPerShard(); + final long[] blockGridMin = IntStream.range(0, nd).mapToLong(i -> { + return blocksPerShard[i] * getGridPosition()[i]; + }).toArray(); + + long streamPosition = 0; + try (final LockedChannel lockedChannel = keyValueAccess.lockForReading(path)) { + try (final InputStream channelIn = lockedChannel.newInputStream()) { - // TODO this method is just a wrapper around readBlock and probably not worth keeping - try (InputStream is = inputSupplier.get()) { - return (DataBlock) DefaultBlockReader.readBlock(is, datasetAttributes, blockGridPosition); + for (long[] offsetIndex : sortedOffsets) { + + final long offset = offsetIndex[0]; + if (offset < 0) + continue; + + final long idx = offsetIndex[1]; + GridIterator.indexToPosition(idx, blocksPerShard, blockGridMin, position); + + channelIn.skip(offset - streamPosition); + final long numBytes = index.getNumBytesByBlockIndex((int) idx); + final BoundedInputStream bIs = BoundedInputStream.builder().setInputStream(channelIn) + .setMaxCount(numBytes).get(); + + blocks.add(getBlock(bIs, position.clone())); + streamPosition = offset + numBytes; + } + } + } catch (final N5Exception.N5NoSuchKeyException e) { + return blocks; + } catch (final IOException | UncheckedIOException e) { + throw new N5IOException("Failed to read block from " + path, e); } + + return blocks; } - @SuppressWarnings("unchecked") @Override public DataBlock getBlock(long... blockGridPosition) { @@ -62,7 +132,7 @@ public DataBlock getBlock(long... blockGridPosition) { try (final LockedChannel lockedChannel = keyValueAccess.lockForReading(path, startByte, size)) { try ( final InputStream channelIn = lockedChannel.newInputStream()) { final long[] blockPosInImg = getDatasetAttributes().getBlockPositionFromShardPosition(getGridPosition(), blockGridPosition); - return (DataBlock)DefaultBlockReader.readBlock(channelIn, datasetAttributes, blockPosInImg); + return getBlock( channelIn, blockPosInImg ); } } catch (final N5Exception.N5NoSuchKeyException e) { return null; @@ -126,10 +196,10 @@ public ShardIndex getIndex() { } catch (IOException e) { throw new N5IOException("Failed to read index at " + path, e); } + return index; } - static class CountingOutputStream extends OutputStream { private final OutputStream out; private long numBytes; From 55682ee04d271de70f5d0803f753f6f5f1833584 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Thu, 9 Jan 2025 17:01:10 -0500 Subject: [PATCH 101/124] fix: GridIterator iteration order --- .../saalfeldlab/n5/util/GridIterator.java | 30 ++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/util/GridIterator.java b/src/main/java/org/janelia/saalfeldlab/n5/util/GridIterator.java index 299fda86..d352f2b2 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/util/GridIterator.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/util/GridIterator.java @@ -78,10 +78,32 @@ public int getIndex() { return index; } - final static public void indexToPosition(long index, final long[] dimensions, final long[] min, final long[] position) { - final int maxDim = dimensions.length - 1; - for (int dim = maxDim; dim >= 0; dim--) { - position[dim] = index % dimensions[dim] + min[dim]; + final static public void indexToPosition(long index, final long[] dimensions, final long[] offset, + final long[] position) { + for (int dim = 0; dim < dimensions.length; dim++) { + position[dim] = (index % dimensions[dim]) + offset[dim]; + index /= dimensions[dim]; + } + } + + final static public void indexToPosition(long index, final long[] dimensions, final long[] position) { + for (int dim = 0; dim < dimensions.length; dim++) { + position[dim] = index % dimensions[dim]; + index /= dimensions[dim]; + } + } + + final static public void indexToPosition(long index, final int[] dimensions, final long[] offset, + final long[] position) { + for (int dim = 0; dim < dimensions.length; dim++) { + position[dim] = (index % dimensions[dim]) + offset[dim]; + index /= dimensions[dim]; + } + } + + final static public void indexToPosition(long index, final int[] dimensions, final long[] position) { + for (int dim = 0; dim < dimensions.length; dim++) { + position[dim] = index % dimensions[dim]; index /= dimensions[dim]; } } From 707addf6e9127d91d339176fe4ce7d250b7ac9c3 Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Fri, 10 Jan 2025 09:51:09 -0500 Subject: [PATCH 102/124] feat: DataBlockIterator skips missing blocks in Shard --- .../janelia/saalfeldlab/n5/shard/Shard.java | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java index ad18f87f..d3b1c647 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java @@ -2,8 +2,10 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.Iterator; import java.util.List; +import java.util.stream.Collectors; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DatasetAttributes; @@ -115,8 +117,7 @@ default List> getBlocks() { final List> blocks = new ArrayList<>(); for (DataBlock block : this) { - if (block != null) - blocks.add(block); + blocks.add(block); } return blocks; } @@ -158,20 +159,34 @@ public static class DataBlockIterator implements Iterator> { private final GridIterator it; private final Shard shard; + private final ShardIndex index; + private final ShardParameters attributes; + private int blockIndex = 0; public DataBlockIterator(final Shard shard) { this.shard = shard; + this.index = shard.getIndex(); + this.attributes = shard.getDatasetAttributes(); + this.blockIndex = 0; it = new GridIterator(shard.getBlockGridSize()); } @Override public boolean hasNext() { - return it.hasNext(); + + for (int i = blockIndex; i < attributes.getNumBlocks(); i++) { + if (index.exists(i)) + return true; + } + return false; } @Override public DataBlock next() { + while (!index.exists(blockIndex++)) + it.fwd(); + return shard.getBlock(it.next()); } } From 9a59de2f52abd7280d452fb2859a758b6e2408b3 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Fri, 10 Jan 2025 11:18:46 -0500 Subject: [PATCH 103/124] chore: rm unused ShardReader/Writer classes --- .../saalfeldlab/n5/shard/ShardReader.java | 81 ----------- .../saalfeldlab/n5/shard/ShardWriter.java | 130 ------------------ 2 files changed, 211 deletions(-) delete mode 100644 src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java delete mode 100644 src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java deleted file mode 100644 index 991b3acd..00000000 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardReader.java +++ /dev/null @@ -1,81 +0,0 @@ -package org.janelia.saalfeldlab.n5.shard; - -import org.janelia.saalfeldlab.n5.DataBlock; -import org.janelia.saalfeldlab.n5.DataType; -import org.janelia.saalfeldlab.n5.DefaultBlockReader; -import org.janelia.saalfeldlab.n5.N5FSReader; -import org.janelia.saalfeldlab.n5.N5Reader; -import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; -import org.janelia.saalfeldlab.n5.codec.Codec; -import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; -import org.janelia.saalfeldlab.n5.codec.IdentityCodec; -import org.janelia.saalfeldlab.n5.codec.N5BlockCodec; -import org.janelia.saalfeldlab.n5.codec.checksum.Crc32cChecksumCodec; -import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; - -import java.io.IOException; -import java.io.InputStream; -import java.nio.channels.Channels; -import java.nio.channels.FileChannel; - -public class ShardReader { - - private final ShardedDatasetAttributes datasetAttributes; - private long[] indexes; - - public ShardReader(final ShardedDatasetAttributes datasetAttributes) { - - this.datasetAttributes = datasetAttributes; - } - - public ShardIndex readIndexes(FileChannel channel) throws IOException { - - return ShardIndex.read(channel, datasetAttributes); - } - - public DataBlock readBlock( - final FileChannel in, - long... blockPosition) throws IOException { - - throw new IOException("Remove this!"); - } - - private long getIndexIndex(long... shardPosition) { - - final int[] indexDimensions = datasetAttributes.getBlocksPerShard(); - long idx = 0; - for (int i = 0; i < indexDimensions.length; i++) { - idx += shardPosition[i] * indexDimensions[i]; - } - - return idx; - } - - public static void main(String[] args) { - - final ShardReader reader = new ShardReader(buildTestAttributes()); - - System.out.println(reader.getIndexIndex(0, 0)); - System.out.println(reader.getIndexIndex(0, 1)); - System.out.println(reader.getIndexIndex(1, 0)); - System.out.println(reader.getIndexIndex(1, 1)); - - final N5Reader n5 = new N5FSReader("shard.n5"); - final ShardedDatasetAttributes datasetAttributes = buildTestAttributes(); - n5.readBlock("dataset", datasetAttributes, 0, 0, 0); - - } - - private static ShardedDatasetAttributes buildTestAttributes() { - - return new ShardedDatasetAttributes( - new long[]{4, 4}, - new int[]{2, 2}, - new int[]{2, 2}, - DataType.INT32, - new Codec[]{new N5BlockCodec(), new IdentityCodec()}, - new DeterministicSizeCodec[]{new Crc32cChecksumCodec()}, - IndexLocation.END); - } - -} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java deleted file mode 100644 index 5e3002b9..00000000 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardWriter.java +++ /dev/null @@ -1,130 +0,0 @@ -package org.janelia.saalfeldlab.n5.shard; - -import org.janelia.saalfeldlab.n5.DataBlock; -import org.janelia.saalfeldlab.n5.DatasetAttributes; -import org.janelia.saalfeldlab.n5.DefaultBlockWriter; - -import java.io.ByteArrayOutputStream; -import java.io.DataOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -@Deprecated -public class ShardWriter { - - private static final int BYTES_PER_LONG = 8; - - private final List> blocks; - - private DatasetAttributes attributes; - - private ByteBuffer blockSizes; - - private ByteBuffer blockIndexes; - - private ShardIndex indexData; - - private List blockBytes; - - public ShardWriter(final A datasetAttributes) { - - blocks = new ArrayList<>(); - attributes = datasetAttributes; - } - - public A getAttributes() { - - return (A)attributes; - } - - public void reset() { - - blocks.clear(); - blockBytes.clear(); - blockSizes = null; - indexData = null; - } - - public void addBlock(final DataBlock block) { - - blocks.add(block); - } - - public void write(final Shard shard, final OutputStream out) throws IOException { - - prepareForWritingDataBlock(); - if (shard.getDatasetAttributes().getIndexLocation() == ShardingCodec.IndexLocation.START) { - writeIndexBlock(out); - writeBlocks(out); - } else { - writeBlocks(out); - writeIndexBlock(out); - } - } - - private void prepareForWritingDataBlock() throws IOException { - - // final ShardingProperties shardProps = new ShardingProperties(datasetAttributes); - // indexData = new ShardIndexDataBlock(shardProps.getIndexDimensions()); - - blockBytes = new ArrayList<>(); - long cumulativeBytes = 0; - final int[] shardPosition = new int[1]; - for (int i = 0; i < blocks.size(); i++) { - - try (final ByteArrayOutputStream blockOut = new ByteArrayOutputStream()) { - DefaultBlockWriter.writeBlock(blockOut, attributes, blocks.get(i)); - System.out.println(String.format("block %d is %d bytes", i, blockOut.size())); - - shardPosition[0] = i; - indexData.set(cumulativeBytes, blockOut.size(), shardPosition); - cumulativeBytes += blockOut.size(); - - blockBytes.add(blockOut.toByteArray()); - } - } - - System.out.println(Arrays.toString(indexData.getData())); - throw new IOException("Remove this!"); - } - - private void prepareForWriting() throws IOException { - - blockSizes = ByteBuffer.allocate(BYTES_PER_LONG * blocks.size()); - blockIndexes = ByteBuffer.allocate(BYTES_PER_LONG * blocks.size()); - blockBytes = new ArrayList<>(); - long cumulativeBytes = 0; - for (int i = 0; i < blocks.size(); i++) { - - try (final ByteArrayOutputStream blockOut = new ByteArrayOutputStream()) { - - DefaultBlockWriter.writeBlock(blockOut, attributes, blocks.get(i)); - System.out.println(String.format("block %d is %d bytes", i, blockOut.size())); - - blockIndexes.putLong(cumulativeBytes); - blockSizes.putLong(blockOut.size()); - cumulativeBytes += blockOut.size(); - - blockBytes.add(blockOut.toByteArray()); - } - } - } - - private void writeBlocks(final OutputStream out) throws IOException { - - for (final byte[] bytes : blockBytes) - out.write(bytes); - } - - private void writeIndexBlock(final OutputStream out) throws IOException { - - final DataOutputStream dos = new DataOutputStream(out); - for (final long l : indexData.getData()) - dos.writeLong(l); - } - -} From edb61a5c5b2257a6209d1ce514b3f695e2ed9db4 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Fri, 10 Jan 2025 11:20:33 -0500 Subject: [PATCH 104/124] fix/test: clone gridPosition --- .../java/org/janelia/saalfeldlab/n5/shard/ShardTest.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java index da77593b..4ace0546 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java @@ -7,7 +7,6 @@ import org.janelia.saalfeldlab.n5.KeyValueAccess; import org.janelia.saalfeldlab.n5.N5FSTest; import org.janelia.saalfeldlab.n5.N5KeyValueWriter; -import org.janelia.saalfeldlab.n5.N5URI; import org.janelia.saalfeldlab.n5.N5Writer; import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; import org.janelia.saalfeldlab.n5.codec.BytesCodec; @@ -72,7 +71,7 @@ private ShardedDatasetAttributes getTestAttributes(long[] dimensions, int[] shar shardSize, blockSize, DataType.UINT8, - new Codec[]{new N5BlockCodec(dataByteOrder)}, // , new GzipCompression(4)}, + new Codec[]{new N5BlockCodec(dataByteOrder), new GzipCompression(4)}, new DeterministicSizeCodec[]{new BytesCodec(indexByteOrder), new Crc32cChecksumCodec()}, indexLocation ); @@ -210,7 +209,7 @@ public void writeReadBlockTest() { } writer.writeBlock("shard", datasetAttributes, dataBlock); - final DataBlock block = writer.readBlock("shard", datasetAttributes, gridPosition); + final DataBlock block = writer.readBlock("shard", datasetAttributes, gridPosition.clone()); Assert.assertArrayEquals("Read from shard doesn't match", data, (byte[])block.getData()); for (Map.Entry entry : writtenBlocks.entrySet()) { From 539959f23b90bd87924055023abdd8e3d71badb6 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Fri, 10 Jan 2025 11:31:54 -0500 Subject: [PATCH 105/124] refactor: EMPTY_INDEX_NBYTES to ShardIndex --- .../org/janelia/saalfeldlab/n5/shard/Shard.java | 1 - .../janelia/saalfeldlab/n5/shard/ShardIndex.java | 14 +++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java index d3b1c647..d5e21ece 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java @@ -13,7 +13,6 @@ public interface Shard extends Iterable> { - long EMPTY_INDEX_NBYTES = 0xFFFFFFFFFFFFFFFFL; /** * Returns the number of blocks this shard contains along all dimensions. diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java index 4b3c0b01..c05e3ffd 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java @@ -28,11 +28,11 @@ public class ShardIndex extends LongArrayDataBlock { + public static final long EMPTY_INDEX_NBYTES = 0xFFFFFFFFFFFFFFFFL; private static final int BYTES_PER_LONG = 8; - private static final int LONGS_PER_BLOCK = 2; - private static final long[] DUMMY_GRID_POSITION = null; + private final IndexLocation location; private final DeterministicSizeCodec[] codecs; @@ -56,14 +56,14 @@ public ShardIndex(int[] shardBlockGridSize, DeterministicSizeCodec... codecs) { public boolean exists(int[] gridPosition) { - return getOffset(gridPosition) != Shard.EMPTY_INDEX_NBYTES || - getNumBytes(gridPosition) != Shard.EMPTY_INDEX_NBYTES; + return getOffset(gridPosition) != EMPTY_INDEX_NBYTES || + getNumBytes(gridPosition) != EMPTY_INDEX_NBYTES; } public boolean exists(int blockNum) { - return data[blockNum * 2] != Shard.EMPTY_INDEX_NBYTES || - data[blockNum * 2 + 1] != Shard.EMPTY_INDEX_NBYTES; + return data[blockNum * 2] != EMPTY_INDEX_NBYTES || + data[blockNum * 2 + 1] != EMPTY_INDEX_NBYTES; } public IndexLocation getLocation() { @@ -266,7 +266,7 @@ private static long[] emptyIndexData(final int[] size) { final int N = 2 * Arrays.stream(size).reduce(1, (x, y) -> x * y); final long[] data = new long[N]; - Arrays.fill(data, Shard.EMPTY_INDEX_NBYTES); + Arrays.fill(data, EMPTY_INDEX_NBYTES); return data; } From 0affc94d5302be443e85cc08127684e364c96195 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Fri, 10 Jan 2025 11:31:54 -0500 Subject: [PATCH 106/124] refactor: EMPTY_INDEX_NBYTES to ShardIndex --- .../org/janelia/saalfeldlab/n5/shard/Shard.java | 3 +-- .../janelia/saalfeldlab/n5/shard/ShardIndex.java | 14 +++++++------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java index d3b1c647..5541bfe5 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java @@ -13,7 +13,6 @@ public interface Shard extends Iterable> { - long EMPTY_INDEX_NBYTES = 0xFFFFFFFFFFFFFFFFL; /** * Returns the number of blocks this shard contains along all dimensions. @@ -139,7 +138,7 @@ default Iterator blockPositionIterator() { public static Shard createEmpty(final A attributes, long... shardPosition) { final long[] emptyIndex = new long[(int)(2 * attributes.getNumBlocks())]; - Arrays.fill(emptyIndex, EMPTY_INDEX_NBYTES); + Arrays.fill(emptyIndex, ShardIndex.EMPTY_INDEX_NBYTES); final ShardIndex shardIndex = new ShardIndex(attributes.getBlocksPerShard(), emptyIndex, ShardingCodec.IndexLocation.END); return new InMemoryShard(attributes, shardPosition, shardIndex); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java index 4b3c0b01..c05e3ffd 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java @@ -28,11 +28,11 @@ public class ShardIndex extends LongArrayDataBlock { + public static final long EMPTY_INDEX_NBYTES = 0xFFFFFFFFFFFFFFFFL; private static final int BYTES_PER_LONG = 8; - private static final int LONGS_PER_BLOCK = 2; - private static final long[] DUMMY_GRID_POSITION = null; + private final IndexLocation location; private final DeterministicSizeCodec[] codecs; @@ -56,14 +56,14 @@ public ShardIndex(int[] shardBlockGridSize, DeterministicSizeCodec... codecs) { public boolean exists(int[] gridPosition) { - return getOffset(gridPosition) != Shard.EMPTY_INDEX_NBYTES || - getNumBytes(gridPosition) != Shard.EMPTY_INDEX_NBYTES; + return getOffset(gridPosition) != EMPTY_INDEX_NBYTES || + getNumBytes(gridPosition) != EMPTY_INDEX_NBYTES; } public boolean exists(int blockNum) { - return data[blockNum * 2] != Shard.EMPTY_INDEX_NBYTES || - data[blockNum * 2 + 1] != Shard.EMPTY_INDEX_NBYTES; + return data[blockNum * 2] != EMPTY_INDEX_NBYTES || + data[blockNum * 2 + 1] != EMPTY_INDEX_NBYTES; } public IndexLocation getLocation() { @@ -266,7 +266,7 @@ private static long[] emptyIndexData(final int[] size) { final int N = 2 * Arrays.stream(size).reduce(1, (x, y) -> x * y); final long[] data = new long[N]; - Arrays.fill(data, Shard.EMPTY_INDEX_NBYTES); + Arrays.fill(data, EMPTY_INDEX_NBYTES); return data; } From 7dde5bb0633d17b1c32af7ef9585742345eeec8f Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Fri, 10 Jan 2025 11:36:29 -0500 Subject: [PATCH 107/124] chore: rm unused method --- .../org/janelia/saalfeldlab/n5/DefaultBlockReader.java | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java index f881aea9..0a49fc19 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DefaultBlockReader.java @@ -33,7 +33,6 @@ import org.janelia.saalfeldlab.n5.codec.Codec.ArrayCodec; import org.janelia.saalfeldlab.n5.codec.Codec.BytesCodec; import org.janelia.saalfeldlab.n5.codec.Codec.DataBlockInputStream; -import org.janelia.saalfeldlab.n5.shard.ShardingCodec; /** * Default implementation of {@link BlockReader}. @@ -97,10 +96,4 @@ public static > void readFromStream(final B dataBlock, dataBlock.readData(buffer); } - public static long getShardIndex(final ShardingCodec shardingCodec, final long[] gridPosition) { - - // TODO implement - return -1; - } - } \ No newline at end of file From a791244b42d162a8ffd04ed4b8ca19286071e5f0 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Mon, 13 Jan 2025 11:34:39 -0500 Subject: [PATCH 108/124] feat: Codec add composition helpers --- .../org/janelia/saalfeldlab/n5/codec/Codec.java | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java index a78df016..209d169b 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java @@ -23,6 +23,22 @@ @NameConfig.Prefix("codec") public interface Codec extends Serializable { + public static OutputStream encode(OutputStream out, Codec.BytesCodec... bytesCodecs) throws IOException { + OutputStream stream = out; + for (final BytesCodec codec : bytesCodecs) + stream = codec.encode(stream); + + return stream; + } + + public static InputStream decode(InputStream out, Codec.BytesCodec... bytesCodecs) throws IOException { + InputStream stream = out; + for (final BytesCodec codec : bytesCodecs) + stream = codec.decode(stream); + + return stream; + } + public interface BytesCodec extends Codec { /** From cd54053d42c4f12b0fa897182a508da1e38431ca Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Mon, 13 Jan 2025 13:09:45 -0500 Subject: [PATCH 109/124] fix: EMPTY_INDEX_NBYTES now in ShardIndex --- .../org/janelia/saalfeldlab/n5/shard/VirtualShard.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java index c4cab674..1b5a59a7 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java @@ -5,6 +5,7 @@ import java.io.OutputStream; import java.io.UncheckedIOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.stream.Collectors; @@ -67,10 +68,10 @@ public List> getBlocks() { // sort index offsets // and keep track of relevant positions final long[] indexData = index.getData(); - List sortedOffsets = IntStream.range(0, index.getNumElements() / 2).mapToObj(i -> { + List sortedOffsets = Arrays.stream(blockIndexes).mapToObj(i -> { return new long[] { indexData[i * 2], i }; }).filter(x -> { - return x[0] != Shard.EMPTY_INDEX_NBYTES; + return x[0] != ShardIndex.EMPTY_INDEX_NBYTES; }).collect(Collectors.toList()); Collections.sort(sortedOffsets, (a, b) -> Long.compare(((long[]) a)[0], ((long[]) b)[0])); @@ -125,7 +126,7 @@ public DataBlock getBlock(long... blockGridPosition) { final long startByte = idx.getOffset(relativePosition); - if (startByte == Shard.EMPTY_INDEX_NBYTES ) + if (startByte == ShardIndex.EMPTY_INDEX_NBYTES ) return null; final long size = idx.getNumBytes(relativePosition); From 1c92d14a2149e1084947c234e264d31095d86809 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Mon, 13 Jan 2025 13:14:02 -0500 Subject: [PATCH 110/124] feat: add getBlocks(int[] blockIndexes) --- .../saalfeldlab/n5/shard/InMemoryShard.java | 21 ++++++++++++++++++- .../janelia/saalfeldlab/n5/shard/Shard.java | 4 +--- .../saalfeldlab/n5/shard/VirtualShard.java | 6 +++++- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java index 70340b8e..52ef6abe 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java @@ -80,6 +80,21 @@ public List> getBlocks() { return new ArrayList<>(blocks.values()); } + public List> getBlocks( int[] blockIndexes ) { + + final ArrayList> out = new ArrayList<>(); + final int[] blocksPerShard = getDatasetAttributes().getBlocksPerShard(); + + long[] position = new long[ getSize().length ]; + for( int idx : blockIndexes ) { + GridIterator.indexToPosition(idx, blocksPerShard, position); + DataBlock blk = blocks.get(Arrays.hashCode(position)); + if( blk != null ); + out.add(blk); + } + return out; + } + protected IndexLocation indexLocation() { if (index != null) @@ -114,8 +129,11 @@ public static InMemoryShard(attributes, gridPosition, kva, key)); } + @SuppressWarnings("hiding") public static InMemoryShard readShard( final InputStream inputStream, final long[] gridPosition, final A attributes) throws IOException { @@ -128,7 +146,8 @@ public static InMemoryShard InMemoryShard readShard(final byte[] data, + public static InMemoryShard readShard( + final byte[] data, long[] shardPosition, final A attributes) throws IOException { final ShardIndex index = attributes.createIndex(); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java index 5541bfe5..a8f8ce89 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java @@ -1,11 +1,10 @@ package org.janelia.saalfeldlab.n5.shard; +import java.io.InputStream; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.Iterator; import java.util.List; -import java.util.stream.Collectors; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DatasetAttributes; @@ -13,7 +12,6 @@ public interface Shard extends Iterable> { - /** * Returns the number of blocks this shard contains along all dimensions. * diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java index 1b5a59a7..f0586c8c 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java @@ -56,7 +56,11 @@ public void close( ) { } @Override - public List> getBlocks() { + public List> getBlocks() { + return getBlocks(IntStream.range(0, getNumBlocks()).toArray()); + } + + public List> getBlocks(final int[] blockIndexes) { // will not contain nulls From 52aaa2ab1c4fa38ba625f72da74eae542d382907 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Mon, 13 Jan 2025 15:02:22 -0500 Subject: [PATCH 111/124] feat: InMemoryShard add new write methods --- .../saalfeldlab/n5/shard/InMemoryShard.java | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java index 52ef6abe..0bd3cd47 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java @@ -112,6 +112,15 @@ public ShardIndex getIndex() { return indexBuilder.build(); } + public void write(final KeyValueAccess keyValueAccess, final String path) throws IOException { + + try (final LockedChannel lockedChannel = keyValueAccess.lockForWriting(path)) { + try (final OutputStream os = lockedChannel.newOutputStream()) { + write(os); + } + } + } + public void write(final OutputStream out) throws IOException { if (indexLocation() == IndexLocation.END) @@ -130,6 +139,7 @@ public static InMemoryShard(attributes, gridPosition, kva, key)); } @@ -179,6 +189,15 @@ public static InMemoryShard void writeShard(final KeyValueAccess keyValueAccess, final String path, final InMemoryShard shard) throws IOException { + + try (final LockedChannel lockedChannel = keyValueAccess.lockForWriting(path)) { + try (final OutputStream os = lockedChannel.newOutputStream()) { + writeShard(os, shard); + } + } + } + public static void writeShard(final OutputStream out, final Shard shard) throws IOException { fromShard(shard).write(out); From 6fac5f236102c050222e3cecd47dd8e6c4334bd1 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Mon, 13 Jan 2025 16:12:18 -0500 Subject: [PATCH 112/124] wip: minor change to writeBlocks, implement readBlocks * where readBlocks batches when possible --- .../saalfeldlab/n5/GsonKeyValueN5Reader.java | 47 +++++++++++++++++++ .../saalfeldlab/n5/GsonKeyValueN5Writer.java | 12 ++--- .../org/janelia/saalfeldlab/n5/N5Reader.java | 28 +++++++++++ 3 files changed, 81 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java index d8165146..61ab4e2b 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java @@ -27,9 +27,13 @@ import java.io.IOException; import java.io.UncheckedIOException; +import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; +import java.util.List; import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; +import org.janelia.saalfeldlab.n5.shard.InMemoryShard; import org.janelia.saalfeldlab.n5.shard.Shard; import org.janelia.saalfeldlab.n5.shard.ShardParameters; import org.janelia.saalfeldlab.n5.shard.VirtualShard; @@ -124,6 +128,49 @@ default DataBlock readBlock( } } + @Override + default List> readBlocks( + final String pathName, + final DatasetAttributes datasetAttributes, + final List blockPositions) throws N5Exception { + + // TODO which interface should have this implementation? + if (datasetAttributes instanceof ShardParameters) { + + /* Group by shard index */ + final HashMap> shardBlockMap = new HashMap<>(); + final HashMap> shardPositionMap = new HashMap<>(); + final ShardParameters shardAttributes = (ShardParameters)datasetAttributes; + + for ( long[] blockPosition : blockPositions ) { + final long[] shardPosition = shardAttributes.getShardPositionForBlock(blockPosition); + final int shardHash = Arrays.hashCode(shardPosition); + if (!shardBlockMap.containsKey(shardHash)) { + final Shard shard = getShard(pathName, (DatasetAttributes & ShardParameters)shardAttributes, shardPosition); + shardBlockMap.put(shardHash, shard); + + final ArrayList positionList = new ArrayList<>(); + positionList.add(blockPosition); + shardPositionMap.put(shardHash, positionList); + } + else + shardPositionMap.get(shardBlockMap.get(shardHash)).add(blockPosition); + } + + final ArrayList> blocks = new ArrayList<>(); + for (Shard shard : shardBlockMap.values()) { + /* Add existing blocks before overwriting shard */ + final int shardHash = Arrays.hashCode(shard.getGridPosition()); + for( final long[] blkPosition : shardPositionMap.get(shardHash)) { + blocks.add(shard.getBlock(blkPosition)); + } + } + return blocks; + } else + return GsonN5Reader.super.readBlocks(pathName, datasetAttributes, blockPositions); + + } + @Override default String[] list(final String pathName) throws N5Exception { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java index 5977f142..9db10575 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java @@ -216,8 +216,10 @@ default boolean removeAttributes(final String pathName, final List attri return removed; } - @SuppressWarnings({ "rawtypes", "unchecked" }) - @Override default void writeBlocks(final String datasetPath, final DatasetAttributes datasetAttributes, final DataBlock... dataBlocks) throws N5Exception { + @Override default void writeBlocks( + final String datasetPath, + final DatasetAttributes datasetAttributes, + final DataBlock... dataBlocks) throws N5Exception { if (datasetAttributes instanceof ShardParameters) { /* Group by shard index */ @@ -236,6 +238,7 @@ default boolean removeAttributes(final String pathName, final List attri for (InMemoryShard shard : shardBlockMap.values()) { /* Add existing blocks before overwriting shard */ + @SuppressWarnings("unchecked") final Shard currentShard = (Shard)getShard(datasetPath, (DatasetAttributes & ShardParameters)shardAttributes, shard.getGridPosition()); for (DataBlock currentBlock : currentShard.getBlocks()) { if (shard.getBlock(currentBlock.getGridPosition()) == null) @@ -246,10 +249,7 @@ default boolean removeAttributes(final String pathName, final List attri } } else { - /* Just write each block */ - for (DataBlock dataBlock : dataBlocks) { - writeBlock(datasetPath, datasetAttributes, dataBlock); - } + GsonN5Writer.super.writeBlocks(datasetPath, datasetAttributes, dataBlocks); } } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/N5Reader.java b/src/main/java/org/janelia/saalfeldlab/n5/N5Reader.java index 010e757f..932768a6 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/N5Reader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/N5Reader.java @@ -293,6 +293,34 @@ DataBlock readBlock( final DatasetAttributes datasetAttributes, final long... gridPosition) throws N5Exception; + /** + * Reads multiple {@link DataBlock}s. + *

      + * Implementations may optimize / batch read operations when possible, e.g. + * in the case that the datasets are sharded. + * + * @param pathName + * dataset path + * @param datasetAttributes + * the dataset attributes + * @param gridPositions + * a list of grid positions + * @return a list of data blocks + * @throws N5Exception + * the exception + */ + default List> readBlocks( + final String pathName, + final DatasetAttributes datasetAttributes, + final List gridPositions) throws N5Exception { + + final ArrayList> blocks = new ArrayList<>(); + for( final long[] p : gridPositions ) + blocks.add(readBlock(pathName, datasetAttributes, p)); + + return blocks; + } + /** * Load a {@link DataBlock} as a {@link Serializable}. The offset is given * in From aca03d43e959c8fa627a9478cf6f7becaaf8540c Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Tue, 14 Jan 2025 09:02:53 -0500 Subject: [PATCH 113/124] demo: BlockIterators --- .../saalfeldlab/n5/demo/BlockIterators.java | 95 +++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 src/test/java/org/janelia/saalfeldlab/n5/demo/BlockIterators.java diff --git a/src/test/java/org/janelia/saalfeldlab/n5/demo/BlockIterators.java b/src/test/java/org/janelia/saalfeldlab/n5/demo/BlockIterators.java new file mode 100644 index 00000000..1631b945 --- /dev/null +++ b/src/test/java/org/janelia/saalfeldlab/n5/demo/BlockIterators.java @@ -0,0 +1,95 @@ +package org.janelia.saalfeldlab.n5.demo; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.stream.IntStream; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +import org.janelia.saalfeldlab.n5.DataType; +import org.janelia.saalfeldlab.n5.DatasetAttributes; +import org.janelia.saalfeldlab.n5.RawCompression; +import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; +import org.janelia.saalfeldlab.n5.codec.BytesCodec; +import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; +import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; +import org.janelia.saalfeldlab.n5.util.GridIterator; + +public class BlockIterators { + + public static void main(String[] args) { + +// blockIterator(); + shardBlockIterator(); + } + + public static void shardBlockIterator() { + + final ShardedDatasetAttributes attrs = new ShardedDatasetAttributes( + new long[] {12, 8}, // image size + new int[] {6, 4}, // shard size + new int[] {2, 2}, // block size + DataType.UINT8, + new Codec[] { new BytesCodec() }, + new DeterministicSizeCodec[] { new BytesCodec() }, + IndexLocation.END); + + shardPositions(attrs) + .forEach(x -> System.out.println(Arrays.toString(x))); + } + + public static void blockIterator() { + + final DatasetAttributes attrs = new DatasetAttributes( + new long[] {12, 8}, + new int[] {2, 2}, + DataType.UINT8, + new RawCompression()); + + blockPositions(attrs).forEach(x -> System.out.println(Arrays.toString(x))); + } + + public static long[] blockGridSize(final DatasetAttributes attrs ) { + // this could be a nice method for DatasetAttributes + + return IntStream.range(0, attrs.getNumDimensions()).mapToLong(i -> { + return (long)Math.ceil(attrs.getDimensions()[i] / attrs.getBlockSize()[i]); + }).toArray(); + + } + + public static long[] shardGridSize(final ShardedDatasetAttributes attrs ) { + // this could be a nice method for DatasetAttributes + + return IntStream.range(0, attrs.getNumDimensions()).mapToLong(i -> { + return (long)Math.ceil(attrs.getDimensions()[i] / attrs.getShardSize()[i]); + }).toArray(); + + } + + public static Stream blockPositions( DatasetAttributes attrs ) { + return toStream(new GridIterator(blockGridSize(attrs))); + } + + public static Stream shardPositions( ShardedDatasetAttributes attrs ) { + + final int[] blocksPerShard = attrs.getBlocksPerShard(); + return toStream( new GridIterator(shardGridSize(attrs))) + .flatMap( shardPosition -> { + + final int nd = attrs.getNumDimensions(); + final long[] min = attrs.getBlockPositionFromShardPosition(shardPosition, new long[nd]); + return toStream(new GridIterator(GridIterator.int2long(blocksPerShard), min)); + }); + } + + public static Stream toStream( final Iterator it ) { + return StreamSupport.stream( Spliterators.spliteratorUnknownSize( + it, Spliterator.ORDERED), + false); + } + +} From 367cadbe712473cb901ae984857216eb454bb73f Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Wed, 15 Jan 2025 16:29:23 -0500 Subject: [PATCH 114/124] fix: ShardIndex.getOffsetIndex --- .../saalfeldlab/n5/shard/ShardIndex.java | 8 ++- .../saalfeldlab/n5/shard/ShardIndexTest.java | 66 +++++++++++++++---- 2 files changed, 57 insertions(+), 17 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java index c05e3ffd..1a4836b7 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java @@ -97,16 +97,18 @@ public void set(long offset, long nbytes, int[] gridPosition) { data[i + 1] = nbytes; } - private int getOffsetIndex(int... gridPosition) { + protected int getOffsetIndex(int... gridPosition) { int idx = (int) gridPosition[0]; + int cumulativeSize = 1; for (int i = 1; i < gridPosition.length; i++) { - idx += gridPosition[i] * size[i]; + cumulativeSize *= size[i]; + idx += gridPosition[i] * cumulativeSize; } return idx * 2; } - private int getNumBytesIndex(int... gridPosition) { + protected int getNumBytesIndex(int... gridPosition) { return getOffsetIndex(gridPosition) + 1; } diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardIndexTest.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardIndexTest.java index 39d6ab3e..f260b708 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardIndexTest.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardIndexTest.java @@ -3,11 +3,14 @@ import static org.junit.Assert.assertEquals; import java.io.IOException; +import java.io.InputStream; import java.nio.file.Paths; +import org.apache.commons.io.output.ByteArrayOutputStream; import org.janelia.saalfeldlab.n5.DataType; import org.janelia.saalfeldlab.n5.GzipCompression; import org.janelia.saalfeldlab.n5.KeyValueAccess; +import org.janelia.saalfeldlab.n5.LockedChannel; import org.janelia.saalfeldlab.n5.N5FSTest; import org.janelia.saalfeldlab.n5.N5KeyValueWriter; import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; @@ -17,6 +20,7 @@ import org.janelia.saalfeldlab.n5.codec.N5BlockCodec; import org.janelia.saalfeldlab.n5.codec.checksum.Crc32cChecksumCodec; import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; +import org.janelia.saalfeldlab.n5.util.GridIterator; import org.junit.After; import org.junit.Ignore; import org.junit.Test; @@ -30,6 +34,38 @@ public void removeTempWriters() { tempN5Factory.removeTempWriters(); } + @Test + public void testOffsetIndex() throws IOException { + + int[] shardBlockGridSize = new int[]{5,4,3}; + ShardIndex index = new ShardIndex( + shardBlockGridSize, + IndexLocation.END, new BytesCodec()); + + GridIterator it = new GridIterator(shardBlockGridSize); + int i = 0; + while( it.hasNext()) { + int j = index.getOffsetIndex(GridIterator.long2int(it.next())); + assertEquals(i, j); + i+=2; + } + + + shardBlockGridSize = new int[]{5,4,3,13}; + index = new ShardIndex( + shardBlockGridSize, + IndexLocation.END, new BytesCodec()); + + it = new GridIterator(shardBlockGridSize); + i = 0; + while( it.hasNext()) { + int j = index.getOffsetIndex(GridIterator.long2int(it.next())); + assertEquals(i, j); + i+=2; + } + + } + @Test public void testReadVirtual() throws IOException { @@ -57,7 +93,6 @@ public void testReadVirtual() throws IOException { } @Test - @Ignore public void testReadInMemory() throws IOException { final N5KeyValueWriter writer = (N5KeyValueWriter) tempN5Factory.createTempN5Writer(); @@ -65,9 +100,10 @@ public void testReadInMemory() throws IOException { final int[] shardBlockGridSize = new int[] { 6, 5 }; final IndexLocation indexLocation = IndexLocation.END; - final DeterministicSizeCodec[] indexCodecs = new DeterministicSizeCodec[] { new BytesCodec(), + final DeterministicSizeCodec[] indexCodecs = new DeterministicSizeCodec[] { + new BytesCodec(), new Crc32cChecksumCodec() }; - final String path = Paths.get(Paths.get(writer.getURI()).toAbsolutePath().toString(), "0").toString(); + final String path = Paths.get(Paths.get(writer.getURI()).toAbsolutePath().toString(), "indexTest").toString(); final ShardIndex index = new ShardIndex(shardBlockGridSize, indexLocation, indexCodecs); index.set(0, 6, new int[] { 0, 0 }); @@ -76,18 +112,20 @@ public void testReadInMemory() throws IOException { index.set(143, 1, new int[] { 1, 2 }); ShardIndex.write(index, kva, path); - ShardedDatasetAttributes attrs = new ShardedDatasetAttributes( - new long[]{6,5}, - shardBlockGridSize, - new int[]{1,1}, - DataType.UINT8, - new Codec[]{new N5BlockCodec(), new GzipCompression(4)}, - new DeterministicSizeCodec[]{new BytesCodec(), new Crc32cChecksumCodec()}, - indexLocation - ); + final ShardIndex indexRead = new ShardIndex(shardBlockGridSize, indexLocation, indexCodecs); + ShardIndex.read(rawBytes(kva, path), indexRead); + + assertEquals(index, indexRead); + } - final InMemoryShard shard = InMemoryShard.readShard(kva, path, new long[] {0,0}, attrs); + private static byte[] rawBytes(KeyValueAccess kva, String path) throws IOException { - assertEquals(index, shard.index); + final byte[] rawBytes = new byte[(int) kva.size(path)]; + try (final LockedChannel lockedChannel = kva.lockForReading(path)) { + try (final InputStream is = lockedChannel.newInputStream()) { + is.read(rawBytes); + } + } + return rawBytes; } } From 1a44168191b161def0f6a73d262608b9dab97c0f Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Fri, 17 Jan 2025 13:48:40 -0500 Subject: [PATCH 115/124] feat: add Position * so that we can index by position * primitive long arrays are not great as keys for maps --- .../saalfeldlab/n5/util/FinalPosition.java | 38 ++++++++++++ .../janelia/saalfeldlab/n5/util/Position.java | 62 +++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/util/FinalPosition.java create mode 100644 src/main/java/org/janelia/saalfeldlab/n5/util/Position.java diff --git a/src/main/java/org/janelia/saalfeldlab/n5/util/FinalPosition.java b/src/main/java/org/janelia/saalfeldlab/n5/util/FinalPosition.java new file mode 100644 index 00000000..1b7076d5 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/util/FinalPosition.java @@ -0,0 +1,38 @@ +package org.janelia.saalfeldlab.n5.util; + +/* + * An immutable {@Position}. + */ +public class FinalPosition implements Position { + + public final long[] position; + + public FinalPosition(long[] position) { + this.position = position; + } + + public FinalPosition(Position p) { + this.position = p.get().clone(); + } + + @Override + public long[] get() { + return position; + } + + @Override + public long get(int i) { + return position[i]; + } + + @Override + public String toString() { + return Position.toString(this); + } + + @Override + public boolean equals(Object obj) { + return Position.equals(this, obj); + } + +} diff --git a/src/main/java/org/janelia/saalfeldlab/n5/util/Position.java b/src/main/java/org/janelia/saalfeldlab/n5/util/Position.java new file mode 100644 index 00000000..5ddb8cf0 --- /dev/null +++ b/src/main/java/org/janelia/saalfeldlab/n5/util/Position.java @@ -0,0 +1,62 @@ +package org.janelia.saalfeldlab.n5.util; + +import java.util.Arrays; + +/* + * A wrapper around a primitive long array that is lexicographically {@link Comparable} + * and for which we can test equality. + */ +public interface Position extends Comparable { + + public long[] get(); + + public long get(int i); + + default int numDimensions() { + return get().length; + } + + @Override + default int compareTo(Position other) { + + // use Arrays.compare when we update to Java 9+ + final int N = numDimensions() > other.numDimensions() ? numDimensions() : other.numDimensions(); + for (int i = 0; i < N; i++) { + final long diff = get(i) - other.get(i); + if (diff != 0) + return (int) diff; + } + return 0; + } + + public static boolean equals(final Position a, final Object b) { + + if (a == null && b == null) + return true; + + if (b == null) + return false; + + if (!(b instanceof Position)) + return false; + + final Position other = (Position) b; + if (other.numDimensions() != a.numDimensions()) + return false; + + for (int i = 0; i < a.numDimensions(); i++) + if (other.get(i) != a.get(i)) + return false; + + return true; + } + + public static String toString(Position p) { + return "Position: " + Arrays.toString(p.get()); + } + + public static Position wrap(final long[] p) { + return new FinalPosition(p); + } + +} From 43bc1e04eaa6ff8253439032322c18718da6267f Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Fri, 17 Jan 2025 13:51:04 -0500 Subject: [PATCH 116/124] feat: add positionToIndex static methods in GridIterator --- .../saalfeldlab/n5/util/GridIterator.java | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/util/GridIterator.java b/src/main/java/org/janelia/saalfeldlab/n5/util/GridIterator.java index d352f2b2..9880e7b3 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/util/GridIterator.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/util/GridIterator.java @@ -108,6 +108,46 @@ final static public void indexToPosition(long index, final int[] dimensions, fin } } + final static public long positionToIndex(final long[] dimensions, final long[] position) { + long idx = position[0]; + int cumulativeSize = 1; + for (int i = 0; i < position.length; i++) { + idx += position[i] * cumulativeSize; + cumulativeSize *= dimensions[i]; + } + return idx; + } + + final static public long positionToIndex(final long[] dimensions, final int[] position) { + long idx = position[0]; + int cumulativeSize = 1; + for (int i = 0; i < position.length; i++) { + idx += position[i] * cumulativeSize; + cumulativeSize *= dimensions[i]; + } + return idx; + } + + final static public long positionToIndex(final int[] dimensions, final int[] position) { + long idx = position[0]; + int cumulativeSize = 1; + for (int i = 0; i < position.length; i++) { + idx += position[i] * cumulativeSize; + cumulativeSize *= dimensions[i]; + } + return idx; + } + + final static public long positionToIndex(final int[] dimensions, final long[] position) { + long idx = position[0]; + int cumulativeSize = 1; + for (int i = 0; i < position.length; i++) { + idx += position[i] * cumulativeSize; + cumulativeSize *= dimensions[i]; + } + return idx; + } + final static public int[] long2int(final long[] a) { final int[] i = new int[a.length]; From 0e353e42fc2c6175561ead61258197580fd875c0 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Fri, 17 Jan 2025 13:54:18 -0500 Subject: [PATCH 117/124] feat: ShardParameters methods * shardsPerImage, blocksPerImage * grouping DataBlocks by shard postion * stream of block positions ordered by shard --- .../saalfeldlab/n5/shard/ShardParameters.java | 85 ++++++++++++++++++- .../n5/shard/ShardPropertiesTests.java | 30 ++++++- 2 files changed, 113 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardParameters.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardParameters.java index 31d385f8..d1952b4b 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardParameters.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardParameters.java @@ -1,9 +1,25 @@ package org.janelia.saalfeldlab.n5.shard; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.TreeMap; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; import org.janelia.saalfeldlab.n5.BlockParameters; +import org.janelia.saalfeldlab.n5.DataBlock; +import org.janelia.saalfeldlab.n5.DatasetAttributes; +import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; +import org.janelia.saalfeldlab.n5.util.GridIterator; +import org.janelia.saalfeldlab.n5.util.Position; public interface ShardParameters extends BlockParameters { @@ -38,6 +54,28 @@ default int[] getBlocksPerShard() { return blocksPerShard; } + /** + * Returns the number of blocks per dimension that tile the image. + * + * @return blocks per image + */ + default long[] blocksPerImage() { + return IntStream.range(0, getNumDimensions()).mapToLong(i -> { + return (long) Math.ceil(getDimensions()[i] / getBlockSize()[i]); + }).toArray(); + } + + /** + * Returns the number of shards per dimension that tile the image. + * + * @return shards per image + */ + default long[] shardsPerImage() { + return IntStream.range(0, getNumDimensions()).mapToLong(i -> { + return (long)Math.ceil(getDimensions()[i] / getShardSize()[i]); + }).toArray(); + } + /** * Given a block's position relative to the array, returns the position of the shard containing that block relative to the shard grid. * @@ -92,7 +130,6 @@ default int[] getBlockPositionInShard(final long[] shardPosition, final long[] b return blockShardPos; } - /** * Given a block's position relative to a shard, returns its position in pixels @@ -134,7 +171,36 @@ default long[] getBlockPositionFromShardPosition(final long[] shardPosition, fin return blockImagePos; } + + default Map> groupBlockPositions(final List blockPositions) { + + final TreeMap> map = new TreeMap<>(); + for( final long[] blockPos : blockPositions ) { + Position shardPos = Position.wrap(getShardPositionForBlock(blockPos)); + if( !map.containsKey(shardPos)) { + map.put(shardPos, new ArrayList<>()); + } + map.get(shardPos).add(blockPos); + } + + return map; + } + default Map>> groupBlocks(final List> blocks) { + + // figure out how to re-use groupBlockPositions here? + final TreeMap>> map = new TreeMap<>(); + for (final DataBlock block : blocks) { + Position shardPos = Position.wrap(getShardPositionForBlock(block.getGridPosition())); + if (!map.containsKey(shardPos)) { + map.put(shardPos, new ArrayList<>()); + } + map.get(shardPos).add(block); + } + + return map; + } + /** * @return the number of blocks per shard */ @@ -143,4 +209,21 @@ default long getNumBlocks() { return Arrays.stream(getBlocksPerShard()).reduce(1, (x, y) -> x * y); } + default Stream blockPositions() { + + final int[] blocksPerShard = getBlocksPerShard(); + return toStream( new GridIterator(shardsPerImage())) + .flatMap( shardPosition -> { + final int nd = getNumDimensions(); + final long[] min = getBlockPositionFromShardPosition(shardPosition, new long[nd]); + return toStream(new GridIterator(GridIterator.int2long(blocksPerShard), min)); + }); + } + + static Stream toStream( final Iterator it ) { + return StreamSupport.stream( Spliterators.spliteratorUnknownSize( + it, Spliterator.ORDERED), + false); + } + } diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardPropertiesTests.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardPropertiesTests.java index 5f661f03..77d46ac4 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardPropertiesTests.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardPropertiesTests.java @@ -2,15 +2,18 @@ import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; import org.janelia.saalfeldlab.n5.DataType; import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; import org.janelia.saalfeldlab.n5.codec.Codec; import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; +import org.janelia.saalfeldlab.n5.util.Position; import org.junit.Test; public class ShardPropertiesTests { @@ -84,7 +87,32 @@ public void testShardBlockPositionIterator() throws Exception { } assertEquals(16,i); assertArrayEquals(new long[]{7,7}, p); + } + + @Test + public void testShardGrouping() { + + final long[] arraySize = new long[]{8, 12}; + final int[] shardSize = new int[]{4, 6}; + final int[] blkSize = new int[]{2, 3}; + + final ShardedDatasetAttributes attrs = new ShardedDatasetAttributes( + arraySize, + shardSize, + blkSize, + DataType.UINT8, + new Codec[]{}, + new DeterministicSizeCodec[]{}, + IndexLocation.END); + + List blockPositions = attrs.blockPositions().collect(Collectors.toList()); + final Map> result = attrs.groupBlockPositions(blockPositions); + + // there are four shards in this image + assertEquals(4, result.keySet().size()); + // there are four blocks per shard in this image + result.values().stream().forEach( x -> assertEquals(4, x.size())); } } From 709730868111ffcdc584efaffe5379836c5d49a0 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Fri, 17 Jan 2025 13:57:17 -0500 Subject: [PATCH 118/124] wip: rm unused flatIndex in Shard * methods in GridIterator replaces this --- .../java/org/janelia/saalfeldlab/n5/shard/Shard.java | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java index a8f8ce89..69ed415f 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java @@ -1,6 +1,5 @@ package org.janelia.saalfeldlab.n5.shard; -import java.io.InputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; @@ -141,17 +140,6 @@ public static Shard createE return new InMemoryShard(attributes, shardPosition, shardIndex); } - public static long flatIndex(long[] gridPosition, int[] gridSize) { - - long index = gridPosition[0]; - long cumSizes = gridSize[0]; - for (int i = 1; i < gridSize.length; i++) { - index += gridPosition[i] * cumSizes; - cumSizes *= gridSize[i]; - } - return index; - } - public static class DataBlockIterator implements Iterator> { private final GridIterator it; From f67943dcbb804227ca2d1d2ae441fa03dc7e1063 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Fri, 17 Jan 2025 13:59:16 -0500 Subject: [PATCH 119/124] refactor: InMemoryShard, read/writeBlocks * Using Position class * using ShardParameters.groupBlocks helper method --- .../saalfeldlab/n5/GsonKeyValueN5Reader.java | 45 +++++++------------ .../saalfeldlab/n5/GsonKeyValueN5Writer.java | 35 +++++++-------- .../saalfeldlab/n5/shard/InMemoryShard.java | 20 ++++++--- .../saalfeldlab/n5/shard/ShardParameters.java | 3 -- 4 files changed, 46 insertions(+), 57 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java index 61ab4e2b..ba8eb7b4 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java @@ -29,14 +29,15 @@ import java.io.UncheckedIOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.Map.Entry; import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; -import org.janelia.saalfeldlab.n5.shard.InMemoryShard; import org.janelia.saalfeldlab.n5.shard.Shard; import org.janelia.saalfeldlab.n5.shard.ShardParameters; import org.janelia.saalfeldlab.n5.shard.VirtualShard; +import org.janelia.saalfeldlab.n5.util.Position; import com.google.gson.Gson; import com.google.gson.JsonElement; @@ -93,10 +94,10 @@ default JsonElement getAttributes(final String pathName) throws N5Exception { } - @SuppressWarnings("rawtypes") - default Shard getShard(final String pathName, - final A datasetAttributes, - long... shardGridPosition) { + @SuppressWarnings({ "unchecked", "rawtypes" }) + @Override + default Shard readShard(final String pathName, + final A datasetAttributes, long... shardGridPosition) { final String path = absoluteDataBlockPath(N5URI.normalizeGroupPath(pathName), shardGridPosition); return new VirtualShard(datasetAttributes, shardGridPosition, getKeyValueAccess(), path); @@ -111,7 +112,7 @@ default DataBlock readBlock( if (datasetAttributes instanceof ShardedDatasetAttributes) { final ShardedDatasetAttributes shardedAttrs = (ShardedDatasetAttributes) datasetAttributes; final long[] shardPosition = shardedAttrs.getShardPositionForBlock(gridPosition); - final Shard shard = getShard(pathName, shardedAttrs, shardPosition); + final Shard shard = readShard(pathName, shardedAttrs, shardPosition); return shard.getBlock(gridPosition); } @@ -137,38 +138,24 @@ default List> readBlocks( // TODO which interface should have this implementation? if (datasetAttributes instanceof ShardParameters) { - /* Group by shard index */ - final HashMap> shardBlockMap = new HashMap<>(); - final HashMap> shardPositionMap = new HashMap<>(); final ShardParameters shardAttributes = (ShardParameters)datasetAttributes; - for ( long[] blockPosition : blockPositions ) { - final long[] shardPosition = shardAttributes.getShardPositionForBlock(blockPosition); - final int shardHash = Arrays.hashCode(shardPosition); - if (!shardBlockMap.containsKey(shardHash)) { - final Shard shard = getShard(pathName, (DatasetAttributes & ShardParameters)shardAttributes, shardPosition); - shardBlockMap.put(shardHash, shard); + /* Group by shard position */ + final Map> shardBlockMap = shardAttributes.groupBlockPositions(blockPositions); + final ArrayList> blocks = new ArrayList<>(); + for( Entry> e : shardBlockMap.entrySet()) { - final ArrayList positionList = new ArrayList<>(); - positionList.add(blockPosition); - shardPositionMap.put(shardHash, positionList); - } - else - shardPositionMap.get(shardBlockMap.get(shardHash)).add(blockPosition); - } + final Shard shard = readShard(pathName, (DatasetAttributes & ShardParameters) shardAttributes, + e.getKey().get()); - final ArrayList> blocks = new ArrayList<>(); - for (Shard shard : shardBlockMap.values()) { - /* Add existing blocks before overwriting shard */ - final int shardHash = Arrays.hashCode(shard.getGridPosition()); - for( final long[] blkPosition : shardPositionMap.get(shardHash)) { + for (final long[] blkPosition : e.getValue()) { blocks.add(shard.getBlock(blkPosition)); } } + return blocks; } else return GsonN5Reader.super.readBlocks(pathName, datasetAttributes, blockPositions); - } @Override diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java index 9db10575..ac6f733a 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java @@ -29,9 +29,10 @@ import java.io.OutputStream; import java.io.UncheckedIOException; import java.util.Arrays; -import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; +import java.util.stream.Collectors; import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; import org.janelia.saalfeldlab.n5.shard.InMemoryShard; @@ -44,6 +45,7 @@ import com.google.gson.JsonObject; import com.google.gson.JsonSyntaxException; import org.janelia.saalfeldlab.n5.shard.VirtualShard; +import org.janelia.saalfeldlab.n5.util.Position; /** * Default implementation of {@link N5Writer} with JSON attributes parsed with @@ -222,30 +224,25 @@ default boolean removeAttributes(final String pathName, final List attri final DataBlock... dataBlocks) throws N5Exception { if (datasetAttributes instanceof ShardParameters) { - /* Group by shard index */ - final HashMap> shardBlockMap = new HashMap<>(); + final ShardParameters shardAttributes = (ShardParameters)datasetAttributes; - for (DataBlock dataBlock : dataBlocks) { - final long[] shardPosition = shardAttributes.getShardPositionForBlock(dataBlock.getGridPosition()); - final int shardHash = Arrays.hashCode(shardPosition); - if (!shardBlockMap.containsKey(shardHash)) - shardBlockMap.put(shardHash, new InMemoryShard<>((DatasetAttributes & ShardParameters)shardAttributes, shardPosition)); + /* Group blocks by shard index */ + final Map>> shardBlockMap = shardAttributes.groupBlocks( + Arrays.stream(dataBlocks).collect(Collectors.toList())); - final InMemoryShard shard = shardBlockMap.get(shardHash); - shard.addBlock(dataBlock); - } + for( final Entry>> e : shardBlockMap.entrySet()) { - for (InMemoryShard shard : shardBlockMap.values()) { - /* Add existing blocks before overwriting shard */ + final long[] shardPosition = e.getKey().get(); @SuppressWarnings("unchecked") - final Shard currentShard = (Shard)getShard(datasetPath, (DatasetAttributes & ShardParameters)shardAttributes, shard.getGridPosition()); - for (DataBlock currentBlock : currentShard.getBlocks()) { - if (shard.getBlock(currentBlock.getGridPosition()) == null) - shard.addBlock(currentBlock); - } + final Shard currentShard = (Shard) readShard(datasetPath, (DatasetAttributes & ShardParameters)shardAttributes, + shardPosition); + + final InMemoryShard newShard = InMemoryShard.fromShard(currentShard); + for( DataBlock blk : e.getValue()) + newShard.addBlock(blk); - writeShard(datasetPath, (DatasetAttributes & ShardParameters)shardAttributes, shard); + writeShard(datasetPath, (DatasetAttributes & ShardParameters)shardAttributes, newShard); } } else { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java index 0bd3cd47..b36dc5ac 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java @@ -8,6 +8,8 @@ import java.util.Arrays; import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.TreeMap; import org.apache.commons.io.input.BoundedInputStream; import org.apache.commons.io.output.ByteArrayOutputStream; @@ -22,13 +24,14 @@ import org.janelia.saalfeldlab.n5.LockedChannel; import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; import org.janelia.saalfeldlab.n5.util.GridIterator; +import org.janelia.saalfeldlab.n5.util.Position; public class InMemoryShard extends AbstractShard { /* Map of a hash of the DataBlocks `gridPosition` to the block */ - private final HashMap> blocks; + private final Map> blocks; private ShardIndexBuilder indexBuilder; - + /* * TODO: * Use morton- or c-ording instead of writing blocks out in the order they're added? @@ -45,17 +48,22 @@ public InMemoryShard(final A dat ShardIndex index) { super(datasetAttributes, gridPosition, index); - blocks = new HashMap<>(); + blocks = new TreeMap<>(); } private void storeBlock(DataBlock block) { - blocks.put(Arrays.hashCode(block.getGridPosition()), block); + blocks.put(Position.wrap(block.getGridPosition()), block); } + /* + * Returns the {@link DataBlock} given a block grid position. + *

      + * The block grid position is relative to the image, not relative to this shard. + */ @Override public DataBlock getBlock(long... blockGridPosition) { - return blocks.get(Arrays.hashCode(blockGridPosition)); + return blocks.get(Position.wrap(blockGridPosition)); } @Override @@ -88,7 +96,7 @@ public List> getBlocks( int[] blockIndexes ) { long[] position = new long[ getSize().length ]; for( int idx : blockIndexes ) { GridIterator.indexToPosition(idx, blocksPerShard, position); - DataBlock blk = blocks.get(Arrays.hashCode(position)); + DataBlock blk = getBlock(position); if( blk != null ); out.add(blk); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardParameters.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardParameters.java index d1952b4b..ddc3ba28 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardParameters.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardParameters.java @@ -8,15 +8,12 @@ import java.util.Spliterator; import java.util.Spliterators; import java.util.TreeMap; -import java.util.stream.Collectors; import java.util.stream.IntStream; import java.util.stream.Stream; import java.util.stream.StreamSupport; import org.janelia.saalfeldlab.n5.BlockParameters; import org.janelia.saalfeldlab.n5.DataBlock; -import org.janelia.saalfeldlab.n5.DatasetAttributes; -import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; import org.janelia.saalfeldlab.n5.util.GridIterator; import org.janelia.saalfeldlab.n5.util.Position; From 9224215e811bf9f0f637291d86d68ace92a1353a Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Tue, 21 Jan 2025 09:31:28 -0500 Subject: [PATCH 120/124] wip/feat: N5Reader.readShard --- .../java/org/janelia/saalfeldlab/n5/N5Reader.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/N5Reader.java b/src/main/java/org/janelia/saalfeldlab/n5/N5Reader.java index 932768a6..04c9d889 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/N5Reader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/N5Reader.java @@ -43,6 +43,9 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.janelia.saalfeldlab.n5.shard.Shard; +import org.janelia.saalfeldlab.n5.shard.ShardParameters; + /** * A simple structured container for hierarchies of chunked * n-dimensional datasets and attributes. @@ -293,6 +296,18 @@ DataBlock readBlock( final DatasetAttributes datasetAttributes, final long... gridPosition) throws N5Exception; + /** + * Reads the {@link Shard} at the corresponding grid position. + * + * @param + * @param datasetPath + * @param datasetAttributes + * @param shardGridPosition + * @return the shard + */ + public Shard readShard(final String datasetPath, + final A datasetAttributes, long... shardGridPosition); + /** * Reads multiple {@link DataBlock}s. *

      From a74d3437d0b5520076e4e2629de4dc50a11db787 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Tue, 21 Jan 2025 10:08:02 -0500 Subject: [PATCH 121/124] feat: add ShardIndex.isEmpty * use it to return early for getBlocks --- .../org/janelia/saalfeldlab/n5/shard/ShardIndex.java | 11 +++++++++++ .../janelia/saalfeldlab/n5/shard/VirtualShard.java | 6 +++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java index 1a4836b7..10433c10 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java @@ -25,6 +25,7 @@ import java.nio.channels.Channels; import java.nio.channels.FileChannel; import java.util.Arrays; +import java.util.stream.IntStream; public class ShardIndex extends LongArrayDataBlock { @@ -66,6 +67,16 @@ public boolean exists(int blockNum) { data[blockNum * 2 + 1] != EMPTY_INDEX_NBYTES; } + public int getNumBlocks() { + + return Arrays.stream(getSize()).reduce(1, (x, y) -> x * y); + } + + public boolean isEmpty() { + + return !IntStream.range(0, getNumBlocks()).anyMatch(i -> exists(i)); + } + public IndexLocation getLocation() { return location; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java index f0586c8c..88aa6b08 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java @@ -63,12 +63,12 @@ public List> getBlocks() { public List> getBlocks(final int[] blockIndexes) { // will not contain nulls - final ShardIndex index = getIndex(); - // TODO if the index is completely empty, can return right away - final ArrayList> blocks = new ArrayList<>(); + if (index.isEmpty()) + return blocks; + // sort index offsets // and keep track of relevant positions final long[] indexData = index.getData(); From ce50fa9a746179e4f741ef0c62939586f44fcc82 Mon Sep 17 00:00:00 2001 From: John Bogovic Date: Fri, 24 Jan 2025 13:12:19 -0500 Subject: [PATCH 122/124] test: add a test for nested sharding codecs * but ignore it for now --- .../saalfeldlab/n5/shard/ShardTest.java | 53 +++++++++++++++++-- 1 file changed, 49 insertions(+), 4 deletions(-) diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java index 4ace0546..6848b9df 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java @@ -15,12 +15,16 @@ import org.janelia.saalfeldlab.n5.codec.N5BlockCodec; import org.janelia.saalfeldlab.n5.codec.checksum.Crc32cChecksumCodec; import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; +import org.janelia.saalfeldlab.n5.util.GridIterator; import org.junit.After; import org.junit.Assert; +import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import static org.junit.Assert.assertArrayEquals; + import java.nio.ByteOrder; import java.util.ArrayList; import java.util.Arrays; @@ -121,10 +125,6 @@ public void writeReadBlocksTest() { final KeyValueAccess kva = ((N5KeyValueWriter)writer).getKeyValueAccess(); - final String shardKey = ((N5KeyValueWriter)writer).absoluteDataBlockPath("shard", 2, 2); - final VirtualShard vs = new VirtualShard<>(datasetAttributes, new long[]{2, 2}, kva, shardKey); - final List> blocks = vs.getBlocks(); - final String[][] keys = new String[][]{ {"shard", "0", "0"}, {"shard", "1", "0"}, @@ -272,4 +272,49 @@ public void writeReadShardTest() { } } + @Test + @Ignore + public void writeReadNestedShards() { + + int[] blockSize = new int[]{4, 4}; + int N = Arrays.stream(blockSize).reduce(1, (x,y) -> x*y); + + final N5Writer writer = tempN5Factory.createTempN5Writer(); + final ShardedDatasetAttributes datasetAttributes = getNestedShardCodecsAttributes(blockSize); + writer.createDataset("nestedShards", datasetAttributes); + + final byte[] data = new byte[N]; + Arrays.fill(data, (byte)4); + + writer.writeBlocks("nestedShards", datasetAttributes, + new ByteArrayDataBlock(blockSize, new long[] { 1, 1 }, data), + new ByteArrayDataBlock(blockSize, new long[] { 0, 2 }, data), + new ByteArrayDataBlock(blockSize, new long[] { 2, 1 }, data)); + + assertArrayEquals(data, (byte[]) writer.readBlock("nestedShards", datasetAttributes, 1, 1).getData()); + assertArrayEquals(data, (byte[]) writer.readBlock("nestedShards", datasetAttributes, 0, 2).getData()); + assertArrayEquals(data, (byte[]) writer.readBlock("nestedShards", datasetAttributes, 2, 1).getData()); + } + + private ShardedDatasetAttributes getNestedShardCodecsAttributes(int[] blockSize) { + + final int[] innerShardSize = new int[] { 2 * blockSize[0], 2 * blockSize[1] }; + final int[] shardSize = new int[] { 4 * blockSize[0], 4 * blockSize[1] }; + final long[] dimensions = GridIterator.int2long(shardSize); + + // TODO: its not even clear how we build this given + // this constructor. Is the block size of the sharded dataset attributes + // the innermost (block) size or the intermediate shard size? + // probably better to forget about this class - only use DatasetAttributes + // and detect shading in another way + final ShardingCodec innerShard = new ShardingCodec(innerShardSize, + new Codec[] { new BytesCodec() }, + new DeterministicSizeCodec[] { new BytesCodec(indexByteOrder), new Crc32cChecksumCodec() }, + IndexLocation.START); + + return new ShardedDatasetAttributes(dimensions, shardSize, blockSize, DataType.UINT8, + new Codec[] { innerShard }, + new DeterministicSizeCodec[] { new BytesCodec(indexByteOrder), new Crc32cChecksumCodec() }, + IndexLocation.END); + } } From 3bf8fe830747aa4f96e53ae50de374b25649133d Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Tue, 4 Feb 2025 16:02:20 -0500 Subject: [PATCH 123/124] refactor: large codecs/shards implementation refactor --- README.md | 5 +- .../janelia/saalfeldlab/n5/Compression.java | 18 +- .../saalfeldlab/n5/DatasetAttributes.java | 238 ++++++++++-------- .../saalfeldlab/n5/DefaultBlockWriter.java | 6 +- .../saalfeldlab/n5/GsonKeyValueN5Reader.java | 103 +++----- .../saalfeldlab/n5/GsonKeyValueN5Writer.java | 41 +-- .../org/janelia/saalfeldlab/n5/GsonUtils.java | 4 +- .../org/janelia/saalfeldlab/n5/N5Reader.java | 30 ++- .../org/janelia/saalfeldlab/n5/N5Writer.java | 30 +-- .../n5/ShardedDatasetAttributes.java | 6 +- .../janelia/saalfeldlab/n5/codec/Codec.java | 98 ++++++-- .../FixedLengthConvertedInputStream.java | 2 +- .../saalfeldlab/n5/codec/N5BlockCodec.java | 189 ++++++++------ .../codec/{BytesCodec.java => RawBytes.java} | 23 +- .../saalfeldlab/n5/shard/AbstractShard.java | 6 +- .../saalfeldlab/n5/shard/InMemoryShard.java | 67 ++--- .../janelia/saalfeldlab/n5/shard/Shard.java | 2 +- .../saalfeldlab/n5/shard/ShardIndex.java | 52 ++-- .../saalfeldlab/n5/shard/ShardParameters.java | 31 +-- .../saalfeldlab/n5/shard/ShardingCodec.java | 31 ++- .../saalfeldlab/n5/shard/VirtualShard.java | 87 ++++--- .../saalfeldlab/n5/AbstractN5Test.java | 10 +- .../n5/FileSystemKeyValueAccessTest.java | 10 - .../org/janelia/saalfeldlab/n5/N5FSTest.java | 1 - .../saalfeldlab/n5/codec/BytesTests.java | 7 +- .../codec/FixedConvertedOutputStreamTest.java | 69 +++-- .../saalfeldlab/n5/demo/BlockIterators.java | 6 +- .../saalfeldlab/n5/shard/ShardIndexTest.java | 17 +- .../n5/shard/ShardPropertiesTests.java | 74 +++--- .../saalfeldlab/n5/shard/ShardTest.java | 127 +++++----- 30 files changed, 748 insertions(+), 642 deletions(-) rename src/main/java/org/janelia/saalfeldlab/n5/codec/{BytesCodec.java => RawBytes.java} (89%) diff --git a/README.md b/README.md index bc97e9b8..59b25df2 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ N5 group is not a single file but simply a directory on the file system. Meta-d 1. All directories of the file system are N5 groups. 2. A JSON file `attributes.json` in a directory contains arbitrary attributes. A group without attributes may not have an `attributes.json` file. -3. The version of this specification is 4.0.0 and is stored in the "n5" attribute of the root group "/". +3. The version of this specification is 1.0.0 and is stored in the "n5" attribute of the root group "/". 4. A dataset is a group with the mandatory attributes: * dimensions (e.g. [100, 200, 300]), * blockSize (e.g. [64, 64, 64]), @@ -38,7 +38,7 @@ N5 group is not a single file but simply a directory on the file system. Meta-d * xz with parameters * preset (integer, default 6). - Custom compression schemes with arbitrary parameters can be added using [compression annotations](#extensible-compression-schemes), e.g. [N5 Blosc](https://github.com/saalfeldlab/n5-blosc) and [N5 ZStandard](https://github.com/JaneliaSciComp/n5-zstandard/). + Custom compression schemes with arbitrary parameters can be added using [compression annotations](#extensible-compression-schemes), e.g. [N5 Blosc](https://github.com/saalfeldlab/n5-blosc). 5. Chunks are stored in a directory hierarchy that enumerates their positive integer position in the chunk grid (e.g. `0/4/1/7` for chunk grid position p=(0, 4, 1, 7)). 6. Datasets are sparse, i.e. there is no guarantee that all chunks of a dataset exist. 7. Chunks cannot be larger than 2GB (231Bytes). @@ -134,3 +134,4 @@ Custom compression schemes can be implemented using the annotation discovery mec HDF5 is a great format that provides a wealth of conveniences that I do not want to miss. It's inefficiency for parallel writing, however, limit its applicability for handling of very large n-dimensional data. N5 uses the native filesystem of the target platform and JSON files to specify basic and custom meta-data as attributes. It aims at preserving the convenience of HDF5 where possible but doesn't try too hard to be a full replacement. +Please do not take this project too seriously, we will see where it will get us and report back when more data is available. diff --git a/src/main/java/org/janelia/saalfeldlab/n5/Compression.java b/src/main/java/org/janelia/saalfeldlab/n5/Compression.java index ac0c49b5..2e8b9cdf 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/Compression.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/Compression.java @@ -39,6 +39,10 @@ import org.scijava.annotations.Indexable; /** + * Deprecated: {@link Compression}s are no longer a special case. + *
      + * Use {@link Codec.BytesCodec} for implementing compressors + *

      * Compression scheme interface. * * @author Stephan Saalfeld @@ -53,7 +57,7 @@ public interface Compression extends Serializable, Codec.BytesCodec { @Inherited @Target(ElementType.TYPE) @Indexable - public static @interface CompressionType { + @interface CompressionType { String value(); } @@ -65,10 +69,10 @@ public interface Compression extends Serializable, Codec.BytesCodec { @Retention(RetentionPolicy.RUNTIME) @Inherited @Target(ElementType.FIELD) - public static @interface CompressionParameter {} + @interface CompressionParameter {} @Override - public default String getType() { + default String getType() { final CompressionType compressionType = getClass().getAnnotation(CompressionType.class); if (compressionType == null) @@ -78,9 +82,9 @@ public default String getType() { } - public BlockReader getReader(); + BlockReader getReader(); - public BlockWriter getWriter(); + BlockWriter getWriter(); /** * Decode an {@link InputStream}. @@ -90,7 +94,7 @@ public default String getType() { * @return the decoded input stream */ @Override - public InputStream decode(InputStream in) throws IOException; + InputStream decode(InputStream in) throws IOException; /** * Encode an {@link OutputStream}. @@ -100,6 +104,6 @@ public default String getType() { * @return the encoded output stream */ @Override - public OutputStream encode(OutputStream out) throws IOException; + OutputStream encode(OutputStream out) throws IOException; } \ No newline at end of file diff --git a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java index eca190d9..d2cfa4a5 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/DatasetAttributes.java @@ -4,12 +4,13 @@ import java.lang.reflect.Type; import java.util.Arrays; import java.util.HashMap; +import java.util.stream.Stream; import org.janelia.saalfeldlab.n5.codec.Codec; import org.janelia.saalfeldlab.n5.codec.Codec.ArrayCodec; import org.janelia.saalfeldlab.n5.codec.Codec.BytesCodec; import org.janelia.saalfeldlab.n5.codec.N5BlockCodec; -import org.janelia.saalfeldlab.n5.shard.ShardingCodec; +import org.janelia.saalfeldlab.n5.shard.ShardParameters; import com.google.gson.JsonDeserializationContext; import com.google.gson.JsonDeserializer; @@ -19,6 +20,9 @@ import com.google.gson.JsonSerializationContext; import com.google.gson.JsonSerializer; +import javax.annotation.CheckForNull; +import javax.annotation.Nullable; + /** * Mandatory dataset attributes: * @@ -26,18 +30,13 @@ *
    3. long[] : dimensions
    4. *
    5. int[] : blockSize
    6. *
    7. {@link DataType} : dataType
    8. - *
    9. {@link Compression} : compression
    10. - *
    - * - * Optional dataset attributes: - *
      - *
    1. {@link Codec}[] : codecs
    2. + *
    3. {@link Codec}... : encode/decode routines
    4. *
    * * @author Stephan Saalfeld - * */ -public class DatasetAttributes implements BlockParameters, Serializable { +//TODO Caleb: try to delete ShardParameters? +public class DatasetAttributes implements BlockParameters, ShardParameters, Serializable { private static final long serialVersionUID = -4521467080388947553L; @@ -58,60 +57,91 @@ public class DatasetAttributes implements BlockParameters, Serializable { private final long[] dimensions; private final int[] blockSize; private final DataType dataType; - private final Compression compression; private final ArrayCodec arrayCodec; private final BytesCodec[] byteCodecs; - + @Nullable private final int[] shardSize; + + /** + * Constructs a DatasetAttributes instance with specified dimensions, block size, data type, + * and array of codecs. + * + * @param dimensions the dimensions of the dataset + * @param blockSize the size of the blocks in the dataset + * @param dataType the data type of the dataset + * @param codecs the codecs used encode/decode the data + */ public DatasetAttributes( final long[] dimensions, + @Nullable final int[] shardSize, final int[] blockSize, final DataType dataType, - final Compression compression, - final Codec[] codecs) { + final Codec... codecs) { this.dimensions = dimensions; + this.shardSize = shardSize; this.blockSize = blockSize; this.dataType = dataType; - if (codecs == null && !(compression instanceof RawCompression)) { + if (codecs == null || codecs.length == 0) { byteCodecs = new BytesCodec[]{}; arrayCodec = new N5BlockCodec(); - } else if (codecs == null || codecs.length == 0) { - byteCodecs = new BytesCodec[]{}; + } else if (codecs.length == 1 && codecs[0] instanceof Compression) { + final BytesCodec compression = (BytesCodec)codecs[0]; + byteCodecs = compression instanceof RawCompression ? new BytesCodec[]{} : new BytesCodec[]{compression}; arrayCodec = new N5BlockCodec(); } else { if (!(codecs[0] instanceof ArrayCodec)) - throw new N5Exception("Expected first element of codecs to be ArrayCodec, but was: " + codecs[0]); + throw new N5Exception("Expected first element of codecs to be ArrayCodec, but was: " + codecs[0].getClass()); + + if (Arrays.stream(codecs).filter(c -> c instanceof ArrayCodec).count() > 1) + throw new N5Exception("Multiple ArrayCodecs found. Only one is allowed."); arrayCodec = (ArrayCodec)codecs[0]; - byteCodecs = new BytesCodec[codecs.length - 1]; - for (int i = 0; i < byteCodecs.length; i++) - byteCodecs[i] = (BytesCodec)codecs[i + 1]; + byteCodecs = Stream.of(codecs) + .skip(1) + .filter(c -> !(c instanceof RawCompression)) + .filter(c -> c instanceof BytesCodec) + .toArray(BytesCodec[]::new); } - //TODO Caleb: Do we want to do this? - this.compression = Arrays.stream(byteCodecs) - .filter(codec -> codec instanceof Compression) - .map(codec -> (Compression)codec) - .findFirst() - .orElse(compression == null ? new RawCompression() : compression); } + /** + * Constructs a DatasetAttributes instance with specified dimensions, block size, data type, + * and array of codecs. + * + * @param dimensions the dimensions of the dataset + * @param blockSize the size of the blocks in the dataset + * @param dataType the data type of the dataset + * @param codecs the codecs used encode/decode the data + */ public DatasetAttributes( final long[] dimensions, final int[] blockSize, final DataType dataType, - final Codec[] codecs) { - this(dimensions, blockSize, dataType, null, codecs); + final Codec... codecs) { + this( dimensions, null, blockSize, dataType, codecs ); } + /** + * Deprecated. {@link Compression} are {@link Codec}. Use {@code Code...} constructor instead + * Constructs a DatasetAttributes instance with specified dimensions, block size, data type, + * and compression scheme. This constructor is deprecated and redirects to another constructor + * with codec support. + * + * @param dimensions the dimensions of the dataset + * @param blockSize the size of the blocks in the dataset + * @param dataType the data type of the dataset + * @param compression the compression scheme used for storing the dataset + */ + @Deprecated public DatasetAttributes( final long[] dimensions, final int[] blockSize, final DataType dataType, final Compression compression) { - this(dimensions, blockSize, dataType, compression, null); + this(dimensions, blockSize, dataType, (Codec)compression); } @Override @@ -126,15 +156,33 @@ public int getNumDimensions() { return dimensions.length; } + @Override + @CheckForNull + public int[] getShardSize() { + + return shardSize; + } + @Override public int[] getBlockSize() { return blockSize; } + /** + * Deprecated. {@link Compression} is no longer a special case. prefer to reference {@link #getCodecs()} + * Will return {@link RawCompression} if no compression is otherwise provided, for legacy compatibility. + * + * @return compression Codec, if one was present + */ + @Deprecated public Compression getCompression() { - return compression; + return Arrays.stream(byteCodecs) + .filter(it -> it instanceof Compression) + .map(it -> (Compression)it) + .findFirst() + .orElse(new RawCompression()); } public DataType getDataType() { @@ -152,62 +200,22 @@ public BytesCodec[] getCodecs() { return byteCodecs; } + /** + * Deprecated in favor of {@link DatasetAttributesAdapter} for serialization + * + * @return serilizable properties of {@link DatasetAttributes} + */ + @Deprecated public HashMap asMap() { final HashMap map = new HashMap<>(); map.put(DIMENSIONS_KEY, dimensions); map.put(BLOCK_SIZE_KEY, blockSize); map.put(DATA_TYPE_KEY, dataType); - map.put(COMPRESSION_KEY, compression); - map.put(CODEC_KEY, concatenateCodecs()); // TODO : consider not adding to map when null + map.put(COMPRESSION_KEY, getCompression()); return map; } - static DatasetAttributes from( - final long[] dimensions, - final DataType dataType, - int[] blockSize, - Compression compression, - final String compressionVersion0Name) { - - return from(dimensions, dataType, blockSize, compression, compressionVersion0Name, null); - } - - static DatasetAttributes from( - final long[] dimensions, - final DataType dataType, - int[] blockSize, - Compression compression, - final String compressionVersion0Name, - Codec[] codecs) { - - if (blockSize == null) - blockSize = Arrays.stream(dimensions).mapToInt(a -> (int)a).toArray(); - - /* version 0 */ - if (compression == null) { - compression = getCompressionVersion0(compressionVersion0Name); - } - - return new DatasetAttributes(dimensions, blockSize, dataType, compression, codecs); - } - - private static Compression getCompressionVersion0(final String compressionVersion0Name) { - - switch (compressionVersion0Name) { - case "raw": - return new RawCompression(); - case "gzip": - return new GzipCompression(); - case "bzip2": - return new Bzip2Compression(); - case "lz4": - return new Lz4Compression(); - case "xz": - return new XzCompression(); - } - return null; - } protected Codec[] concatenateCodecs() { @@ -227,50 +235,57 @@ public static DatasetAttributesAdapter getJsonAdapter() { return adapter; } + public static class InvalidN5DatasetException extends N5Exception { + + public InvalidN5DatasetException(String dataset, String reason, Throwable cause) { + + this(String.format("Invalid dataset %s: %s", dataset, reason), cause); + } + + public InvalidN5DatasetException(String message, Throwable cause) { + + super(message, cause); + } + } public static class DatasetAttributesAdapter implements JsonSerializer, JsonDeserializer { @Override public DatasetAttributes deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) throws JsonParseException { if (json == null || !json.isJsonObject()) return null; final JsonObject obj = json.getAsJsonObject(); - if (!obj.has(DIMENSIONS_KEY) || !obj.has(BLOCK_SIZE_KEY) || !obj.has(DATA_TYPE_KEY) || !obj.has(COMPRESSION_KEY)) + final boolean validKeySet = obj.has(DIMENSIONS_KEY) + && obj.has(BLOCK_SIZE_KEY) + && obj.has(DATA_TYPE_KEY) + && (obj.has(CODEC_KEY) || obj.has(COMPRESSION_KEY) || obj.has(compressionTypeKey)); + + if (!validKeySet) return null; final long[] dimensions = context.deserialize(obj.get(DIMENSIONS_KEY), long[].class); final int[] blockSize = context.deserialize(obj.get(BLOCK_SIZE_KEY), int[].class); int[] shardSize = null; - if (obj.has(SHARD_SIZE_KEY)) { + if (obj.has(SHARD_SIZE_KEY)) shardSize = context.deserialize(obj.get(SHARD_SIZE_KEY), int[].class); - } final DataType dataType = context.deserialize(obj.get(DATA_TYPE_KEY), DataType.class); - Compression compression = null; - if (obj.has(COMPRESSION_KEY)) { - compression = CompressionAdapter.getJsonAdapter().deserialize(obj.get(COMPRESSION_KEY), Compression.class, context); - } else if (obj.has(compressionTypeKey)) { - compression = DatasetAttributes.getCompressionVersion0(obj.get(compressionTypeKey).getAsString()); - } - if (compression == null) - return null; final Codec[] codecs; if (obj.has(CODEC_KEY)) { codecs = context.deserialize(obj.get(CODEC_KEY), Codec[].class); - } else codecs = null; - - if (codecs != null && codecs.length == 1 && codecs[0] instanceof ShardingCodec) { - final ShardingCodec shardingCodec = (ShardingCodec)codecs[0]; - return new ShardedDatasetAttributes( - dimensions, - shardSize, - blockSize, - dataType, - shardingCodec - ); + } else if (obj.has(COMPRESSION_KEY)) { + final Compression compression = CompressionAdapter.getJsonAdapter().deserialize(obj.get(COMPRESSION_KEY), Compression.class, context); + final N5BlockCodec n5BlockCodec = dataType == DataType.UINT8 || dataType == DataType.INT8 ? new N5BlockCodec(null) : new N5BlockCodec(); + codecs = new Codec[]{compression, n5BlockCodec}; + } else if (obj.has(compressionTypeKey)) { + final Compression compression = getCompressionVersion0(obj.get(compressionTypeKey).getAsString()); + final N5BlockCodec n5BlockCodec = dataType == DataType.UINT8 || dataType == DataType.INT8 ? new N5BlockCodec(null) : new N5BlockCodec(); + codecs = new Codec[]{compression, n5BlockCodec}; + } else { + return null; } - return new DatasetAttributes(dimensions, blockSize, dataType, compression, codecs); + return new DatasetAttributes(dimensions, shardSize, blockSize, dataType, codecs); } @Override public JsonElement serialize(DatasetAttributes src, Type typeOfSrc, JsonSerializationContext context) { @@ -279,15 +294,32 @@ public static class DatasetAttributesAdapter implements JsonSerializer void writeBlock( final ArrayCodec arrayCodec = datasetAttributes.getArrayCodec(); final DataBlockOutputStream dataBlockOutput = arrayCodec.encode(datasetAttributes, dataBlock, out); - OutputStream stream = dataBlockOutput; - for (final BytesCodec codec : codecs) - stream = codec.encode(stream); + OutputStream stream = encode(dataBlockOutput, codecs); dataBlock.writeData(dataBlockOutput.getDataOutput(stream)); stream.close(); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java index ba8eb7b4..78e10811 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Reader.java @@ -25,23 +25,20 @@ */ package org.janelia.saalfeldlab.n5; +import com.google.gson.Gson; +import com.google.gson.JsonElement; +import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; +import org.janelia.saalfeldlab.n5.shard.Shard; +import org.janelia.saalfeldlab.n5.shard.VirtualShard; +import org.janelia.saalfeldlab.n5.util.Position; + import java.io.IOException; import java.io.UncheckedIOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Map.Entry; -import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; -import org.janelia.saalfeldlab.n5.shard.Shard; -import org.janelia.saalfeldlab.n5.shard.ShardParameters; -import org.janelia.saalfeldlab.n5.shard.VirtualShard; -import org.janelia.saalfeldlab.n5.util.Position; - -import com.google.gson.Gson; -import com.google.gson.JsonElement; - /** * {@link N5Reader} implementation through {@link KeyValueAccess} with JSON * attributes parsed with {@link Gson}. @@ -95,58 +92,46 @@ default JsonElement getAttributes(final String pathName) throws N5Exception { } @SuppressWarnings({ "unchecked", "rawtypes" }) - @Override - default
    Shard readShard(final String pathName, - final A datasetAttributes, long... shardGridPosition) { + default Shard readShard( + final String keyPath, + final DatasetAttributes datasetAttributes, + long... shardGridPosition) { - final String path = absoluteDataBlockPath(N5URI.normalizeGroupPath(pathName), shardGridPosition); - return new VirtualShard(datasetAttributes, shardGridPosition, getKeyValueAccess(), path); + final String path = absoluteDataBlockPath(N5URI.normalizeGroupPath(keyPath), shardGridPosition); + return new VirtualShard<>(datasetAttributes, shardGridPosition, getKeyValueAccess(), path); } @Override - default DataBlock readBlock( + default DataBlock readBlock( final String pathName, final DatasetAttributes datasetAttributes, final long... gridPosition) throws N5Exception { - if (datasetAttributes instanceof ShardedDatasetAttributes) { - final ShardedDatasetAttributes shardedAttrs = (ShardedDatasetAttributes) datasetAttributes; - final long[] shardPosition = shardedAttrs.getShardPositionForBlock(gridPosition); - final Shard shard = readShard(pathName, shardedAttrs, shardPosition); - return shard.getBlock(gridPosition); - } - - final String path = absoluteDataBlockPath(N5URI.normalizeGroupPath(pathName), gridPosition); + final long[] keyPos = datasetAttributes.getArrayCodec().getPositionForBlock(datasetAttributes, gridPosition); + final String keyPath = absoluteDataBlockPath(N5URI.normalizeGroupPath(pathName), keyPos); - try (final LockedChannel lockedChannel = getKeyValueAccess().lockForReading(path)) { - return DefaultBlockReader.readBlock(lockedChannel.newInputStream(), datasetAttributes, gridPosition); - } catch (final N5Exception.N5NoSuchKeyException e) { - return null; - } catch (final IOException | UncheckedIOException e) { - throw new N5IOException( - "Failed to read block " + Arrays.toString(gridPosition) + " from dataset " + path, - e); - } + return datasetAttributes.getArrayCodec().readBlock( + getKeyValueAccess(), + keyPath, + datasetAttributes, + gridPosition + ); } @Override - default List> readBlocks( + default List> readBlocks( final String pathName, final DatasetAttributes datasetAttributes, final List blockPositions) throws N5Exception { // TODO which interface should have this implementation? - if (datasetAttributes instanceof ShardParameters) { - - final ShardParameters shardAttributes = (ShardParameters)datasetAttributes; - + if (datasetAttributes.getShardSize() != null) { /* Group by shard position */ - final Map> shardBlockMap = shardAttributes.groupBlockPositions(blockPositions); - final ArrayList> blocks = new ArrayList<>(); + final Map> shardBlockMap = datasetAttributes.groupBlockPositions(blockPositions); + final ArrayList> blocks = new ArrayList<>(); for( Entry> e : shardBlockMap.entrySet()) { - final Shard shard = readShard(pathName, (DatasetAttributes & ShardParameters) shardAttributes, - e.getKey().get()); + final Shard shard = readShard(pathName, datasetAttributes, e.getKey().get()); for (final long[] blkPosition : e.getValue()) { blocks.add(shard.getBlock(blkPosition)); @@ -154,8 +139,8 @@ default List> readBlocks( } return blocks; - } else - return GsonN5Reader.super.readBlocks(pathName, datasetAttributes, blockPositions); + } + return GsonN5Reader.super.readBlocks(pathName, datasetAttributes, blockPositions); } @Override @@ -171,6 +156,9 @@ default String[] list(final String pathName) throws N5Exception { /** * Constructs the path for a data block in a dataset at a given grid * position. + *
    + * If the gridPosition passed in refers to shard position + * in a sharded dataset, this will return the path to the shard key *

    * The returned path is * @@ -198,33 +186,6 @@ default String absoluteDataBlockPath( return getKeyValueAccess().compose(getURI(), components); } - /** - * Constructs the path for a shard in a dataset at a given grid position. - *

    - * The returned path is - * - *

    -	 * $basePath/datasetPathName/$shardPosition[0]/$shardPosition[1]/.../$shardPosition[n]
    -	 * 
    - *

    - * This is the file into which the shard will be stored. - * - * @param normalPath normalized dataset path - * @param shardGridPosition to the target shard - * @return the absolute path to the shard at shardGridPosition - */ - default String absoluteShardPath( - final String normalPath, - final long... shardGridPosition) { - - final String[] components = new String[shardGridPosition.length + 1]; - components[0] = normalPath; - int i = 0; - for (final long p : shardGridPosition) - components[++i] = Long.toString(p); - - return getKeyValueAccess().compose(getURI(), components); - } /** diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java index ac6f733a..36f5ef1d 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java @@ -82,7 +82,7 @@ default void createGroup(final String path) throws N5Exception { try { getKeyValueAccess().createDirectories(absoluteGroupPath(normalPath)); } catch (final IOException | UncheckedIOException e) { - throw new N5Exception.N5IOException("Failed to create group " + path, e); + throw new N5IOException("Failed to create group " + path, e); } } @@ -106,7 +106,7 @@ default void writeAttributes( try (final LockedChannel lock = getKeyValueAccess().lockForWriting(absoluteAttributesPath(normalGroupPath))) { GsonUtils.writeAttributes(lock.newWriter(), attributes, getGson()); } catch (final IOException | UncheckedIOException e) { - throw new N5Exception.N5IOException("Failed to write attributes into " + normalGroupPath, e); + throw new N5IOException("Failed to write attributes into " + normalGroupPath, e); } } @@ -223,26 +223,23 @@ default boolean removeAttributes(final String pathName, final List attri final DatasetAttributes datasetAttributes, final DataBlock... dataBlocks) throws N5Exception { - if (datasetAttributes instanceof ShardParameters) { - - final ShardParameters shardAttributes = (ShardParameters)datasetAttributes; + if (datasetAttributes.getShardSize() != null) { /* Group blocks by shard index */ - final Map>> shardBlockMap = shardAttributes.groupBlocks( + final Map>> shardBlockMap = datasetAttributes.groupBlocks( Arrays.stream(dataBlocks).collect(Collectors.toList())); for( final Entry>> e : shardBlockMap.entrySet()) { final long[] shardPosition = e.getKey().get(); - @SuppressWarnings("unchecked") - final Shard currentShard = (Shard) readShard(datasetPath, (DatasetAttributes & ShardParameters)shardAttributes, + final Shard currentShard = readShard(datasetPath, datasetAttributes, shardPosition); final InMemoryShard newShard = InMemoryShard.fromShard(currentShard); for( DataBlock blk : e.getValue()) newShard.addBlock(blk); - writeShard(datasetPath, (DatasetAttributes & ShardParameters)shardAttributes, newShard); + writeShard(datasetPath, datasetAttributes, newShard); } } else { @@ -256,26 +253,14 @@ default void writeBlock( final DatasetAttributes datasetAttributes, final DataBlock dataBlock) throws N5Exception { - /* Delegate to shard for writing block? How to know what type of shard? */ - if (datasetAttributes instanceof ShardParameters) { - ShardParameters shardDatasetAttrs = (ShardParameters)datasetAttributes; - final long[] shardPos = shardDatasetAttrs.getShardPositionForBlock(dataBlock.getGridPosition()); - final String shardPath = absoluteShardPath(N5URI.normalizeGroupPath(path), shardPos); - final VirtualShard shard = new VirtualShard<>((DatasetAttributes & ShardParameters)shardDatasetAttrs, shardPos, getKeyValueAccess(), shardPath); - shard.writeBlock(dataBlock); - return; - } + final long[] keyPos = datasetAttributes.getArrayCodec().getPositionForBlock(datasetAttributes, dataBlock); + final String keyPath = absoluteDataBlockPath(N5URI.normalizeGroupPath(path), keyPos); - final String blockPath = absoluteDataBlockPath(N5URI.normalizeGroupPath(path), dataBlock.getGridPosition()); - try (final LockedChannel lock = getKeyValueAccess().lockForWriting(blockPath)) { - try (final OutputStream out = lock.newOutputStream()) { - DefaultBlockWriter.writeBlock(out, datasetAttributes, dataBlock); - } - } catch (final IOException | UncheckedIOException e) { - throw new N5IOException( - "Failed to write block " + Arrays.toString(dataBlock.getGridPosition()) + " into dataset " + path, - e); - } + datasetAttributes.getArrayCodec().writeBlock( + getKeyValueAccess(), + keyPath, + datasetAttributes, + dataBlock); } @Override diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonUtils.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonUtils.java index 157c9bdd..03741476 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonUtils.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonUtils.java @@ -35,7 +35,7 @@ import java.util.Map; import java.util.regex.Matcher; -import org.janelia.saalfeldlab.n5.codec.BytesCodec; +import org.janelia.saalfeldlab.n5.codec.RawBytes; import org.janelia.saalfeldlab.n5.codec.Codec; import com.google.gson.Gson; @@ -61,7 +61,7 @@ static Gson registerGson(final GsonBuilder gsonBuilder) { gsonBuilder.registerTypeHierarchyAdapter(Compression.class, CompressionAdapter.getJsonAdapter()); gsonBuilder.registerTypeHierarchyAdapter(DatasetAttributes.class, DatasetAttributes.getJsonAdapter()); gsonBuilder.registerTypeHierarchyAdapter(Codec.class, NameConfigAdapter.getJsonAdapter(Codec.class)); - gsonBuilder.registerTypeHierarchyAdapter(ByteOrder.class, BytesCodec.byteOrderAdapter); + gsonBuilder.registerTypeHierarchyAdapter(ByteOrder.class, RawBytes.byteOrderAdapter); gsonBuilder.registerTypeHierarchyAdapter(ShardingCodec.IndexLocation.class, ShardingCodec.indexLocationAdapter); gsonBuilder.disableHtmlEscaping(); return gsonBuilder.create(); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/N5Reader.java b/src/main/java/org/janelia/saalfeldlab/n5/N5Reader.java index 04c9d889..20a04b33 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/N5Reader.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/N5Reader.java @@ -25,6 +25,8 @@ */ package org.janelia.saalfeldlab.n5; +import org.janelia.saalfeldlab.n5.shard.Shard; + import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.ObjectInputStream; @@ -43,9 +45,6 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.janelia.saalfeldlab.n5.shard.Shard; -import org.janelia.saalfeldlab.n5.shard.ShardParameters; - /** * A simple structured container for hierarchies of chunked * n-dimensional datasets and attributes. @@ -55,7 +54,7 @@ */ public interface N5Reader extends AutoCloseable { - public static class Version { + class Version { private final int major; private final int minor; @@ -192,12 +191,12 @@ public boolean isCompatible(final Version version) { /** * SemVer version of this N5 spec. */ - public static final Version VERSION = new Version(4, 0, 0); + Version VERSION = new Version(4, 0, 0); /** * Version attribute key. */ - public static final String VERSION_KEY = "n5"; + String VERSION_KEY = "n5"; /** * Get the SemVer version of this container as specified in the 'version' @@ -223,7 +222,7 @@ default Version getVersion() throws N5Exception { * @return the base path URI */ // TODO: should this throw URISyntaxException or can we assume that this is - // never possible if we were able to instantiate this N5Reader? + // never possible if we were able to instantiate this N5Reader? URI getURI(); /** @@ -291,7 +290,7 @@ T getAttribute( * @throws N5Exception * the exception */ - DataBlock readBlock( + DataBlock readBlock( final String pathName, final DatasetAttributes datasetAttributes, final long... gridPosition) throws N5Exception; @@ -299,14 +298,13 @@ DataBlock readBlock( /** * Reads the {@link Shard} at the corresponding grid position. * - * @param - * @param datasetPath - * @param datasetAttributes - * @param shardGridPosition + * @param the data access type for the blocks in the shard + * @param datasetPath to read the shard from + * @param datasetAttributes for the shard + * @param shardGridPosition of the shard we are reading * @return the shard */ - public Shard readShard(final String datasetPath, - final A datasetAttributes, long... shardGridPosition); + Shard readShard(final String datasetPath, final DatasetAttributes datasetAttributes, long... shardGridPosition); /** * Reads multiple {@link DataBlock}s. @@ -324,12 +322,12 @@ public Shard readShard(final * @throws N5Exception * the exception */ - default List> readBlocks( + default List> readBlocks( final String pathName, final DatasetAttributes datasetAttributes, final List gridPositions) throws N5Exception { - final ArrayList> blocks = new ArrayList<>(); + final ArrayList> blocks = new ArrayList<>(); for( final long[] p : gridPositions ) blocks.add(readBlock(pathName, datasetAttributes, p)); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java index 74df1da3..2471135c 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java @@ -30,15 +30,15 @@ import java.io.ObjectOutputStream; import java.io.Serializable; import java.io.UncheckedIOException; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.codec.N5BlockCodec; import org.janelia.saalfeldlab.n5.shard.Shard; import org.janelia.saalfeldlab.n5.shard.ShardParameters; -import org.janelia.saalfeldlab.n5.shard.ShardingCodec; -import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; /** * A simple structured container API for hierarchies of chunked @@ -212,18 +212,6 @@ default void createDataset( setDatasetAttributes(normalPath, datasetAttributes); } - default void createDataset( - final String datasetPath, - final long[] dimensions, - final int[] shardSize, - final int[] blockSize, - final DataType dataType, - final Compression compression) throws N5Exception { - - final Codec[] codecs = new Codec[]{new ShardingCodec(blockSize, null, null, IndexLocation.END)}; - - createDataset(datasetPath, new DatasetAttributes(dimensions, shardSize, dataType, compression, codecs)); - } /** * Creates a dataset. This does not create any data but the path and @@ -233,8 +221,7 @@ default void createDataset( * @param dimensions the dataset dimensions * @param blockSize the block size * @param dataType the data type - * @param compression the compression - * @param codecs optional codecs (may be null) + * @param codecs codecs to encode/decode with * @throws N5Exception the exception */ default void createDataset( @@ -242,13 +229,15 @@ default void createDataset( final long[] dimensions, final int[] blockSize, final DataType dataType, - final Compression compression, - final Codec[] codecs) throws N5Exception { + final Codec... codecs) throws N5Exception { - createDataset(datasetPath, new DatasetAttributes(dimensions, blockSize, dataType, compression, codecs)); + createDataset(datasetPath, new DatasetAttributes(dimensions, blockSize, dataType, codecs)); } /** + * DEPRECATED. {@link Compression}s are {@link Codec}s. + * Use {@link #createDataset(String, long[], int[], DataType, Codec...)} + *

    * Creates a dataset. This does not create any data but the path and * mandatory attributes only. * @@ -259,6 +248,7 @@ default void createDataset( * @param compression the compression * @throws N5Exception the exception */ + @Deprecated default void createDataset( final String datasetPath, final long[] dimensions, @@ -266,7 +256,7 @@ default void createDataset( final DataType dataType, final Compression compression) throws N5Exception { - createDataset(datasetPath, new DatasetAttributes(dimensions, blockSize, dataType, compression, null)); + createDataset(datasetPath, dimensions, blockSize, dataType, new N5BlockCodec(), compression); } /** diff --git a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java index 04a8a037..335c252e 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/ShardedDatasetAttributes.java @@ -6,11 +6,11 @@ import org.janelia.saalfeldlab.n5.codec.Codec.ArrayCodec; import org.janelia.saalfeldlab.n5.codec.Codec.BytesCodec; import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; -import org.janelia.saalfeldlab.n5.shard.ShardIndex; import org.janelia.saalfeldlab.n5.shard.ShardParameters; import org.janelia.saalfeldlab.n5.shard.ShardingCodec; import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; +@Deprecated public class ShardedDatasetAttributes extends DatasetAttributes implements ShardParameters { private static final long serialVersionUID = -4559068841006651814L; @@ -28,7 +28,8 @@ public ShardedDatasetAttributes ( final DeterministicSizeCodec[] indexCodecs, final IndexLocation indexLocation ) { - super(dimensions, blockSize, dataType, null, blocksCodecs); + //TODO Caleb: Can we just let the super codecs() return this ShardCodec? + super(dimensions, blockSize, dataType, blocksCodecs); if (!validateShardBlockSize(shardSize, blockSize)) { throw new N5Exception(String.format("Invalid shard %s / block size %s", @@ -96,7 +97,6 @@ protected Codec[] concatenateCodecs() { return new Codec[] { shardingCodec }; } - @Override public IndexLocation getIndexLocation() { return getShardingCodec().getIndexLocation(); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java index 209d169b..ed544257 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/Codec.java @@ -6,24 +6,29 @@ import java.io.InputStream; import java.io.OutputStream; import java.io.Serializable; +import java.io.UncheckedIOException; +import java.util.Arrays; import org.apache.commons.io.input.ProxyInputStream; import org.apache.commons.io.output.ProxyOutputStream; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DatasetAttributes; +import org.janelia.saalfeldlab.n5.KeyValueAccess; +import org.janelia.saalfeldlab.n5.LockedChannel; +import org.janelia.saalfeldlab.n5.N5Exception; import org.janelia.saalfeldlab.n5.serialization.NameConfig; /** * Interface representing a filter can encode a {@link OutputStream}s when writing data, and decode * the {@link InputStream}s when reading data. - * + *

    * Modeled after Filters in * Zarr. */ @NameConfig.Prefix("codec") public interface Codec extends Serializable { - public static OutputStream encode(OutputStream out, Codec.BytesCodec... bytesCodecs) throws IOException { + static OutputStream encode(OutputStream out, Codec.BytesCodec... bytesCodecs) throws IOException { OutputStream stream = out; for (final BytesCodec codec : bytesCodecs) stream = codec.encode(stream); @@ -31,7 +36,7 @@ public static OutputStream encode(OutputStream out, Codec.BytesCodec... bytesCod return stream; } - public static InputStream decode(InputStream out, Codec.BytesCodec... bytesCodecs) throws IOException { + static InputStream decode(InputStream out, Codec.BytesCodec... bytesCodecs) throws IOException { InputStream stream = out; for (final BytesCodec codec : bytesCodecs) stream = codec.decode(stream); @@ -39,44 +44,55 @@ public static InputStream decode(InputStream out, Codec.BytesCodec... bytesCodec return stream; } - public interface BytesCodec extends Codec { + interface BytesCodec extends Codec { /** * Decode an {@link InputStream}. * - * @param in - * input stream + * @param in input stream * @return the decoded input stream */ - public InputStream decode(final InputStream in) throws IOException; + InputStream decode(final InputStream in) throws IOException; /** * Encode an {@link OutputStream}. * - * @param out - * the output stream + * @param out the output stream * @return the encoded output stream */ - public OutputStream encode(final OutputStream out) throws IOException; + OutputStream encode(final OutputStream out) throws IOException; } interface ArrayCodec extends DeterministicSizeCodec { + default long[] getPositionForBlock(final DatasetAttributes attributes, final DataBlock datablock) { + + return datablock.getGridPosition(); + } + + default long[] getPositionForBlock(final DatasetAttributes attributes, final long... blockPosition) { + + return blockPosition; + } /** * Decode an {@link InputStream}. * - * @param in - * input stream + * @param in input stream * @return the DataBlock corresponding to the input stream */ - public DataBlockInputStream decode(final DatasetAttributes attributes, final long[] gridPosition, final InputStream in) throws IOException; + DataBlockInputStream decode( + final DatasetAttributes attributes, + final long[] gridPosition, + final InputStream in) throws IOException; /** * Encode a {@link DataBlock}. * * @param datablock the datablock to encode */ - public DataBlockOutputStream encode(final DatasetAttributes attributes, final DataBlock datablock, + DataBlockOutputStream encode( + final DatasetAttributes attributes, + final DataBlock datablock, final OutputStream out) throws IOException; @Override default long encodedSize(long size) { @@ -88,9 +104,55 @@ public DataBlockOutputStream encode(final DatasetAttributes attributes, final Da return size; } + default void writeBlock( + final KeyValueAccess kva, + final String keyPath, + final DatasetAttributes datasetAttributes, + final DataBlock dataBlock) { + + try (final LockedChannel lock = kva.lockForWriting(keyPath)) { + try (final OutputStream out = lock.newOutputStream()) { + final DataBlockOutputStream dataBlockOutput = encode(datasetAttributes, dataBlock, out); + try (final OutputStream stream = Codec.encode(dataBlockOutput, datasetAttributes.getCodecs())) { + dataBlock.writeData(dataBlockOutput.getDataOutput(stream)); + } + } + } catch (final IOException | UncheckedIOException e) { + final String msg = "Failed to write block " + Arrays.toString(dataBlock.getGridPosition()) + " into dataset " + keyPath; + throw new N5Exception.N5IOException( msg, e); + } + } + + default DataBlock readBlock( + final KeyValueAccess kva, + final String keyPath, + final DatasetAttributes datasetAttributes, + final long[] gridPosition) { + + try (final LockedChannel lockedChannel = kva.lockForReading(keyPath)) { + try(final InputStream in = lockedChannel.newInputStream()) { + + final BytesCodec[] codecs = datasetAttributes.getCodecs(); + final ArrayCodec arrayCodec = datasetAttributes.getArrayCodec(); + final DataBlockInputStream dataBlockStream = arrayCodec.decode(datasetAttributes, gridPosition, in); + InputStream stream = Codec.decode(dataBlockStream, codecs); + + final DataBlock dataBlock = dataBlockStream.allocateDataBlock(); + dataBlock.readData(dataBlockStream.getDataInput(stream)); + stream.close(); + + return dataBlock; + } + } catch (final N5Exception.N5NoSuchKeyException e) { + return null; + } catch (final IOException | UncheckedIOException e) { + final String msg = "Failed to read block " + Arrays.toString(gridPosition) + " from dataset " + keyPath; + throw new N5Exception.N5IOException( msg, e); + } + } } - public abstract class DataBlockInputStream extends ProxyInputStream { + abstract class DataBlockInputStream extends ProxyInputStream { protected DataBlockInputStream(InputStream in) { @@ -98,12 +160,12 @@ protected DataBlockInputStream(InputStream in) { super(in); } - public abstract DataBlock allocateDataBlock() throws IOException; + public abstract DataBlock allocateDataBlock() throws IOException; public abstract DataInput getDataInput(final InputStream inputStream); } - public abstract class DataBlockOutputStream extends ProxyOutputStream { + abstract class DataBlockOutputStream extends ProxyOutputStream { protected DataBlockOutputStream(final OutputStream out) { @@ -113,6 +175,6 @@ protected DataBlockOutputStream(final OutputStream out) { public abstract DataOutput getDataOutput(final OutputStream outputStream); } - public String getType(); + String getType(); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedLengthConvertedInputStream.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedLengthConvertedInputStream.java index f9d65a87..78d6313a 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedLengthConvertedInputStream.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/FixedLengthConvertedInputStream.java @@ -5,7 +5,7 @@ import java.nio.ByteBuffer; import java.util.function.BiConsumer; -/* +/** * An {@link InputStream} that converts between two fixed-length types. */ public class FixedLengthConvertedInputStream extends InputStream { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BlockCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BlockCodec.java index 7232d9ac..6b83f466 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BlockCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/N5BlockCodec.java @@ -17,13 +17,20 @@ import com.google.common.io.LittleEndianDataInputStream; import com.google.common.io.LittleEndianDataOutputStream; +import javax.annotation.CheckForNull; +import javax.annotation.Nullable; + @NameConfig.Name(value = N5BlockCodec.TYPE) public class N5BlockCodec implements Codec.ArrayCodec { private static final long serialVersionUID = 3523505403978222360L; public static final String TYPE = "n5bytes"; + public static final int MODE_DEFAULT = 0; + public static final int MODE_VARLENGTH = 1; + public static final int MODE_OBJECT = 2; + @Nullable @NameConfig.Parameter(value = "endian", optional = true) protected final ByteOrder byteOrder; @@ -32,18 +39,103 @@ public N5BlockCodec() { this(ByteOrder.BIG_ENDIAN); } - public N5BlockCodec(final ByteOrder byteOrder) { + public N5BlockCodec(@Nullable final ByteOrder byteOrder) { this.byteOrder = byteOrder; } + /** + * ByteOrder used to encode/decode this block of data.
    + * Will be `null` when {@link DatasetAttributes#getDataType()} refers to a single-byte type, + * + * @return the byte order for this codec + */ + @CheckForNull public ByteOrder getByteOrder() { return byteOrder; } @Override public DataBlockInputStream decode(final DatasetAttributes attributes, final long[] gridPosition, InputStream in) throws IOException { - return new DataBlockInputStream(in) { + return new N5DataBlockInputStream(in, attributes, gridPosition, byteOrder); + } + + + @Override + public DataBlockOutputStream encode(final DatasetAttributes attributes, final DataBlock dataBlock, + final OutputStream out) + throws IOException { + + return new N5DataBlockOutputStream(out, attributes, dataBlock, byteOrder); + } + + @Override + public String getType() { + + return TYPE; + } + + private static class N5DataBlockOutputStream extends DataBlockOutputStream { + + private final DatasetAttributes attributes; + private final DataBlock dataBlock; + private final ByteOrder byteOrder; + boolean start = true; + + + public N5DataBlockOutputStream(final OutputStream out, final DatasetAttributes attributes, final DataBlock dataBlock, ByteOrder byteOrder) { + super(out); + this.attributes = attributes; + this.dataBlock = dataBlock; + this.byteOrder = byteOrder; + } + + @Override + protected void beforeWrite(int n) throws IOException { + + if (start) { + writeHeader(); + start = false; + } + } + + private void writeHeader() throws IOException { + final DataOutput dos = getDataOutput(out); + + final int mode; + if (attributes.getDataType() == DataType.OBJECT || dataBlock.getSize() == null) + mode = MODE_OBJECT; + else if (dataBlock.getNumElements() == DataBlock.getNumElements(dataBlock.getSize())) + mode = MODE_DEFAULT; + else + mode = MODE_VARLENGTH; + + dos.writeShort(mode); + + if (mode != MODE_OBJECT) { + dos.writeShort(attributes.getNumDimensions()); + for (final int size : dataBlock.getSize()) + dos.writeInt(size); + } + + if (mode != MODE_DEFAULT) + dos.writeInt(dataBlock.getNumElements()); + } + + @Override + public DataOutput getDataOutput(final OutputStream outputStream) { + + if (byteOrder.equals(ByteOrder.BIG_ENDIAN)) + return new DataOutputStream(outputStream); + else + return new LittleEndianDataOutputStream(outputStream); + } + } + + private static class N5DataBlockInputStream extends DataBlockInputStream { + private final DatasetAttributes attributes; + private final long[] gridPosition; + private final ByteOrder byteOrder; private short mode = -1; private int[] blockSize = null; @@ -51,6 +143,12 @@ public ByteOrder getByteOrder() { private boolean start = true; + N5DataBlockInputStream(final InputStream in, final DatasetAttributes attributes, final long[] gridPosition, ByteOrder byteOrder) { + super(in); + this.attributes = attributes; + this.gridPosition = gridPosition; + this.byteOrder = byteOrder; + } @Override protected void beforeRead(int n) throws IOException { if (start) { @@ -60,32 +158,31 @@ public ByteOrder getByteOrder() { } @Override - public DataBlock allocateDataBlock() throws IOException { + public DataBlock allocateDataBlock() throws IOException { if (start) { readHeader(); start = false; } - if (mode != 2) { - return attributes.getDataType().createDataBlock(blockSize, gridPosition, numElements); - } else { - return attributes.getDataType().createDataBlock(null, gridPosition, numElements); + if (mode == MODE_OBJECT) { + return (DataBlock) attributes.getDataType().createDataBlock(null, gridPosition, numElements); } + return (DataBlock) attributes.getDataType().createDataBlock(blockSize, gridPosition, numElements); } private void readHeader() throws IOException { final DataInput dis = getDataInput(in); mode = dis.readShort(); - if (mode != 2) { - final int nDim = dis.readShort(); - blockSize = new int[nDim]; - for (int d = 0; d < nDim; ++d) - blockSize[d] = dis.readInt(); - if (mode == 0) { - numElements = DataBlock.getNumElements(blockSize); - } else { - numElements = dis.readInt(); - } + if (mode == MODE_OBJECT) { + numElements = dis.readInt(); + return; + } + final int nDim = dis.readShort(); + blockSize = new int[nDim]; + for (int d = 0; d < nDim; ++d) + blockSize[d] = dis.readInt(); + if (mode == MODE_DEFAULT) { + numElements = DataBlock.getNumElements(blockSize); } else { numElements = dis.readInt(); } @@ -99,65 +196,7 @@ public DataInput getDataInput(final InputStream inputStream) { else return new LittleEndianDataInputStream(inputStream); } - }; - } - - - @Override - public DataBlockOutputStream encode(final DatasetAttributes attributes, final DataBlock dataBlock, - final OutputStream out) - throws IOException { - - return new DataBlockOutputStream(out) { - - boolean start = true; - - @Override - protected void beforeWrite(int n) throws IOException { - - if (start) { - writeHeader(); - start = false; - } - } - - private void writeHeader() throws IOException { - final DataOutput dos = getDataOutput(out); - final int mode; - if (attributes.getDataType() == DataType.OBJECT || dataBlock.getSize() == null) - mode = 2; - else if (dataBlock.getNumElements() == DataBlock.getNumElements(dataBlock.getSize())) - mode = 0; - else - mode = 1; - dos.writeShort(mode); - - if (mode != 2) { - dos.writeShort(attributes.getNumDimensions()); - for (final int size : dataBlock.getSize()) - dos.writeInt(size); - } - - if (mode != 0) - dos.writeInt(dataBlock.getNumElements()); - } - - @Override - public DataOutput getDataOutput(final OutputStream outputStream) { - - if (byteOrder.equals(ByteOrder.BIG_ENDIAN)) - return new DataOutputStream(outputStream); - else - return new LittleEndianDataOutputStream(outputStream); - } - }; - } - - @Override - public String getType() { - - return TYPE; } } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/codec/RawBytes.java similarity index 89% rename from src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java rename to src/main/java/org/janelia/saalfeldlab/n5/codec/RawBytes.java index 66e1a8b2..bb3232e4 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/codec/BytesCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/codec/RawBytes.java @@ -24,8 +24,10 @@ import com.google.gson.JsonSerializationContext; import com.google.gson.JsonSerializer; -@NameConfig.Name(value = BytesCodec.TYPE) -public class BytesCodec implements Codec.ArrayCodec { +import javax.annotation.Nullable; + +@NameConfig.Name(value = RawBytes.TYPE) +public class RawBytes implements Codec.ArrayCodec { private static final long serialVersionUID = 3282569607795127005L; @@ -34,16 +36,17 @@ public class BytesCodec implements Codec.ArrayCodec { @NameConfig.Parameter(value = "endian", optional = true) protected final ByteOrder byteOrder; - public BytesCodec() { + public RawBytes() { this(ByteOrder.LITTLE_ENDIAN); } - public BytesCodec(final ByteOrder byteOrder) { + public RawBytes(final ByteOrder byteOrder) { this.byteOrder = byteOrder; } + @Nullable public ByteOrder getByteOrder() { return byteOrder; } @@ -55,15 +58,13 @@ public DataBlockInputStream decode(final DatasetAttributes attributes, final lon return new DataBlockInputStream(in) { private int[] blockSize = attributes.getBlockSize(); - private int numElements = Arrays.stream(blockSize).reduce(1, (x, y) -> { - return x * y; - }); + private int numElements = Arrays.stream(blockSize).reduce(1, (x, y) -> x * y); @Override - protected void beforeRead(int n) throws IOException {} + protected void beforeRead(int n) {} @Override - public DataBlock allocateDataBlock() throws IOException { + public DataBlock allocateDataBlock() { return attributes.getDataType().createDataBlock(blockSize, gridPosition, numElements); } @@ -73,8 +74,8 @@ public DataInput getDataInput(final InputStream inputStream) { if (byteOrder.equals(ByteOrder.BIG_ENDIAN)) return new DataInputStream(inputStream); - else - return new LittleEndianDataInputStream(inputStream); + + return new LittleEndianDataInputStream(inputStream); } }; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java index 2cdb392f..4e2ac831 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/AbstractShard.java @@ -11,7 +11,7 @@ public abstract class AbstractShard implements Shard { private final long[] gridPosition; - public AbstractShard(final A datasetAttributes, final long[] gridPosition, + public AbstractShard(final DatasetAttributes datasetAttributes, final long[] gridPosition, final ShardIndex index) { this.datasetAttributes = datasetAttributes; @@ -20,9 +20,9 @@ public AbstractShard(final A dat } @Override - public A getDatasetAttributes() { + public DatasetAttributes getDatasetAttributes() { - return (A)datasetAttributes; + return datasetAttributes; } @Override diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java index b36dc5ac..f7a01fc8 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java @@ -1,21 +1,9 @@ package org.janelia.saalfeldlab.n5.shard; -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; - import org.apache.commons.io.input.BoundedInputStream; import org.apache.commons.io.output.ByteArrayOutputStream; import org.apache.commons.io.output.CountingOutputStream; import org.apache.commons.io.output.ProxyOutputStream; -import org.checkerframework.checker.units.qual.A; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.DefaultBlockReader; @@ -26,6 +14,15 @@ import org.janelia.saalfeldlab.n5.util.GridIterator; import org.janelia.saalfeldlab.n5.util.Position; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + public class InMemoryShard extends AbstractShard { /* Map of a hash of the DataBlocks `gridPosition` to the block */ @@ -37,14 +34,15 @@ public class InMemoryShard extends AbstractShard { * Use morton- or c-ording instead of writing blocks out in the order they're added? * (later) */ - public InMemoryShard(final A datasetAttributes, final long[] shardPosition) { + public InMemoryShard(final DatasetAttributes datasetAttributes, final long[] shardPosition) { this( datasetAttributes, shardPosition, null); indexBuilder = new ShardIndexBuilder(this); - indexBuilder.indexLocation(datasetAttributes.getIndexLocation()); + final IndexLocation indexLocation = ((ShardingCodec)datasetAttributes.getArrayCodec()).getIndexLocation(); + indexBuilder.indexLocation(indexLocation); } - public InMemoryShard(final A datasetAttributes, final long[] gridPosition, + public InMemoryShard(final DatasetAttributes datasetAttributes, final long[] gridPosition, ShardIndex index) { super(datasetAttributes, gridPosition, index); @@ -137,8 +135,8 @@ public void write(final OutputStream out) throws IOException { writeShardStart(out, this); } - public static InMemoryShard readShard( - final KeyValueAccess kva, final String key, final long[] gridPosition, final A attributes) + public static InMemoryShard readShard( + final KeyValueAccess kva, final String key, final long[] gridPosition, final DatasetAttributes attributes) throws IOException { try (final LockedChannel lockedChannel = kva.lockForReading(key)) { @@ -152,8 +150,8 @@ public static InMemoryShard InMemoryShard readShard( - final InputStream inputStream, final long[] gridPosition, final A attributes) throws IOException { + public static InMemoryShard readShard( + final InputStream inputStream, final long[] gridPosition, final DatasetAttributes attributes) throws IOException { try (ByteArrayOutputStream result = new ByteArrayOutputStream()) { byte[] buffer = new byte[1024]; @@ -164,11 +162,11 @@ public static InMemoryShard InMemoryShard readShard( + public static InMemoryShard readShard( final byte[] data, - long[] shardPosition, final A attributes) throws IOException { + long[] shardPosition, final DatasetAttributes attributes) throws IOException { - final ShardIndex index = attributes.createIndex(); + final ShardIndex index = ((ShardingCodec)attributes.getArrayCodec()).createIndex(attributes); ShardIndex.read(data, index); final InMemoryShard shard = new InMemoryShard(attributes, shardPosition, index); @@ -224,17 +222,18 @@ public static InMemoryShard fromShard(Shard shard) { return inMemoryShard; } - protected static void writeShardEndStream( + protected static void writeShardEndStream( final OutputStream out, InMemoryShard shard ) throws IOException { - final A datasetAttributes = shard.getDatasetAttributes(); + final DatasetAttributes datasetAttributes = shard.getDatasetAttributes(); final ShardIndexBuilder indexBuilder = new ShardIndexBuilder(shard); indexBuilder.indexLocation(IndexLocation.END); - indexBuilder.setCodecs(datasetAttributes.getShardingCodec().getIndexCodecs()); + final ShardingCodec shardingCodec = (ShardingCodec)datasetAttributes.getArrayCodec(); + indexBuilder.setCodecs(shardingCodec.getIndexCodecs()); - // Neccesary to stop `close()` when writing blocks from closing out base OutputStream + // Necessary to stop `close()` when writing blocks from closing out base OutputStream final ProxyOutputStream nop = new ProxyOutputStream(out) { @Override public void close() { //nop @@ -255,15 +254,15 @@ protected static void writeSh ShardIndex.write(indexBuilder.build(), out); } - protected static void writeShardEnd( + protected static void writeShardEnd( final OutputStream out, InMemoryShard shard ) throws IOException { - final A datasetAttributes = shard.getDatasetAttributes(); - final ShardIndexBuilder indexBuilder = new ShardIndexBuilder(shard); indexBuilder.indexLocation(IndexLocation.END); - indexBuilder.setCodecs(datasetAttributes.getShardingCodec().getIndexCodecs()); + final DatasetAttributes datasetAttributes = shard.getDatasetAttributes(); + final ShardingCodec shardingCodec = (ShardingCodec)datasetAttributes.getArrayCodec(); + indexBuilder.setCodecs(shardingCodec.getIndexCodecs()); for (DataBlock block : shard.getBlocks()) { final ByteArrayOutputStream os = new ByteArrayOutputStream(); @@ -276,14 +275,16 @@ protected static void writeSh ShardIndex.write(indexBuilder.build(), out); } - protected static void writeShardStart( + protected static void writeShardStart( final OutputStream out, InMemoryShard shard ) throws IOException { - final A datasetAttributes = shard.getDatasetAttributes(); + final DatasetAttributes datasetAttributes = shard.getDatasetAttributes(); + final ShardingCodec shardingCodec = (ShardingCodec)datasetAttributes.getArrayCodec(); + final ShardIndexBuilder indexBuilder = new ShardIndexBuilder(shard); indexBuilder.indexLocation(IndexLocation.START); - indexBuilder.setCodecs(datasetAttributes.getShardingCodec().getIndexCodecs()); + indexBuilder.setCodecs(shardingCodec.getIndexCodecs()); final List blockData = new ArrayList<>(shard.numBlocks()); for (DataBlock block : shard.getBlocks()) { diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java index 69ed415f..2e1d7e7e 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java @@ -24,7 +24,7 @@ default int[] getBlockGridSize() { return getDatasetAttributes().getBlocksPerShard(); } - public A getDatasetAttributes(); + DatasetAttributes getDatasetAttributes(); /** * Returns the size of shards in pixel units. diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java index 10433c10..f50a6d62 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java @@ -14,6 +14,7 @@ import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; import org.janelia.saalfeldlab.n5.codec.Codec; import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; +import org.janelia.saalfeldlab.n5.codec.N5BlockCodec; import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; import java.io.ByteArrayInputStream; @@ -137,43 +138,50 @@ public long numBytes() { return totalNumBytes; } - public static ShardIndex read(byte[] data, final ShardIndex index) throws IOException { + public static boolean read(byte[] data, final ShardIndex index) { final IndexByteBounds byteBounds = byteBounds(index, data.length); final ByteArrayInputStream is = new ByteArrayInputStream(data); is.skip(byteBounds.start); + try { BoundedInputStream bIs = BoundedInputStream.builder() .setInputStream(is) .setMaxCount(byteBounds.size).get(); - return read(bIs, index); + read(bIs, index); + return true; + } catch (IOException e) { + return false; + } } - public static ShardIndex read(InputStream in, final ShardIndex index) throws IOException { + public static void read(InputStream in, final ShardIndex index) throws IOException { @SuppressWarnings("unchecked") final DataBlock indexBlock = (DataBlock) DefaultBlockReader.readBlock(in, index.getIndexAttributes(), index.gridPosition); final long[] indexData = indexBlock.getData(); System.arraycopy(indexData, 0, index.data, 0, index.data.length); - return index; } - public static ShardIndex read( + public static boolean read( final KeyValueAccess keyValueAccess, final String key, final ShardIndex index - ) throws IOException { + ) { - final IndexByteBounds byteBounds = byteBounds(index, keyValueAccess.size(key)); - try (final LockedChannel lockedChannel = keyValueAccess.lockForReading(key, byteBounds.start, byteBounds.end)) { - try (final InputStream in = lockedChannel.newInputStream()) { - return read(in,index); + try { + final IndexByteBounds byteBounds = byteBounds(index, keyValueAccess.size(key)); + try (final LockedChannel lockedChannel = keyValueAccess.lockForReading(key, byteBounds.start, byteBounds.end)) { + try (final InputStream in = lockedChannel.newInputStream()) { + read(in,index); + return true; + } + } catch (final IOException | UncheckedIOException e) { + throw new N5IOException("Failed to read shard index from " + key, e); } - } catch (final N5Exception.N5NoSuchKeyException e) { - return null; - } catch (final IOException | UncheckedIOException e) { - throw new N5IOException("Failed to read shard index from " + key, e); + } catch (final IOException | N5Exception.N5NoSuchKeyException e) { + return false; } } @@ -213,16 +221,18 @@ private DatasetAttributes getIndexAttributes() { Arrays.stream(getSize()).mapToLong(it -> it).toArray(), getSize(), DataType.UINT64, - null, codecs ); return indexAttributes; } - public static IndexByteBounds byteBounds(ShardedDatasetAttributes datasetAttributes, final long objectSize) { + public static IndexByteBounds byteBounds(DatasetAttributes datasetAttributes, final long objectSize) { + + ShardingCodec shardCodec = (ShardingCodec)datasetAttributes.getArrayCodec(); + final ShardIndex index = shardCodec.createIndex(datasetAttributes); - final long indexSize = datasetAttributes.createIndex().numBytes(); - return byteBounds(indexSize, datasetAttributes.getIndexLocation(), objectSize); + final long indexSize = index.numBytes(); + return byteBounds(indexSize, index.location, objectSize); } public static IndexByteBounds byteBounds(final ShardIndex index, long objectSize) { @@ -252,15 +262,17 @@ public IndexByteBounds(long start, long end) { } } - public static ShardIndex read(FileChannel channel, ShardedDatasetAttributes datasetAttributes) throws IOException { + //TODO Caleb: Probably don't need to keep this eventually + public static ShardIndex read(FileChannel channel, DatasetAttributes datasetAttributes) throws IOException { // TODO need codecs // TODO FileChannel is too specific - generalize + ShardingCodec shardingCodec = (ShardingCodec)datasetAttributes.getArrayCodec(); final int[] indexShape = prepend(2, datasetAttributes.getBlocksPerShard()); final int indexSize = (int)Arrays.stream(indexShape).reduce(1, (x, y) -> x * y); final int indexBytes = BYTES_PER_LONG * indexSize; - if (datasetAttributes.getIndexLocation() == IndexLocation.END) { + if (shardingCodec.getIndexLocation() == IndexLocation.END) { channel.position(channel.size() - indexBytes); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardParameters.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardParameters.java index ddc3ba28..1791c944 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardParameters.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardParameters.java @@ -5,6 +5,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Spliterator; import java.util.Spliterators; import java.util.TreeMap; @@ -14,26 +15,23 @@ import org.janelia.saalfeldlab.n5.BlockParameters; import org.janelia.saalfeldlab.n5.DataBlock; -import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; import org.janelia.saalfeldlab.n5.util.GridIterator; import org.janelia.saalfeldlab.n5.util.Position; +import javax.annotation.CheckForNull; + +@Deprecated public interface ShardParameters extends BlockParameters { - public ShardingCodec getShardingCodec(); /** * The size of the blocks in pixel units. * * @return the number of pixels per dimension for this shard. */ - public int[] getShardSize(); - - public IndexLocation getIndexLocation(); + @CheckForNull + int[] getShardSize(); - default ShardIndex createIndex() { - return new ShardIndex(getBlocksPerShard(), getIndexLocation(), getShardingCodec().getIndexCodecs()); - } /** * Returns the number of blocks per dimension for a shard. @@ -42,11 +40,13 @@ default ShardIndex createIndex() { */ default int[] getBlocksPerShard() { + final int[] shardSize = getShardSize(); + Objects.requireNonNull(shardSize, "getShardSize() must not be null"); final int nd = getNumDimensions(); final int[] blocksPerShard = new int[nd]; final int[] blockSize = getBlockSize(); for (int i = 0; i < nd; i++) - blocksPerShard[i] = getShardSize()[i] / blockSize[i]; + blocksPerShard[i] = shardSize[i] / blockSize[i]; return blocksPerShard; } @@ -57,9 +57,9 @@ default int[] getBlocksPerShard() { * @return blocks per image */ default long[] blocksPerImage() { - return IntStream.range(0, getNumDimensions()).mapToLong(i -> { - return (long) Math.ceil(getDimensions()[i] / getBlockSize()[i]); - }).toArray(); + return IntStream.range(0, getNumDimensions()) + .mapToLong(i -> (long) Math.ceil(getDimensions()[i] / getBlockSize()[i])) + .toArray(); } /** @@ -68,9 +68,9 @@ default long[] blocksPerImage() { * @return shards per image */ default long[] shardsPerImage() { - return IntStream.range(0, getNumDimensions()).mapToLong(i -> { - return (long)Math.ceil(getDimensions()[i] / getShardSize()[i]); - }).toArray(); + return IntStream.range(0, getNumDimensions()) + .mapToLong(i -> (long)Math.ceil(getDimensions()[i] / getShardSize()[i])) + .toArray(); } /** @@ -141,6 +141,7 @@ default long[] getBlockMinFromShardPosition(final long[] shardPosition, final lo // is this useful? final int[] blockSize = getBlockSize(); final int[] shardSize = getShardSize(); + Objects.requireNonNull(shardSize, "getShardSize() must not be null"); final long[] blockImagePos = new long[shardSize.length]; for (int i = 0; i < shardSize.length; i++) { blockImagePos[i] = (shardPosition[i] * shardSize[i]) + (blockPosition[i] * blockSize[i]); diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java index cb65f1a4..79c609b8 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java @@ -9,6 +9,7 @@ import com.google.gson.JsonSerializer; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DatasetAttributes; +import org.janelia.saalfeldlab.n5.KeyValueAccess; import org.janelia.saalfeldlab.n5.codec.Codec; import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; import org.janelia.saalfeldlab.n5.serialization.N5Annotations; @@ -99,14 +100,40 @@ public DeterministicSizeCodec[] getIndexCodecs() { return indexCodecs; } + @Override public long[] getPositionForBlock(DatasetAttributes attributes, DataBlock datablock) { + + final long[] blockPosition = datablock.getGridPosition(); + return attributes.getShardPositionForBlock(blockPosition); + } + + @Override public long[] getPositionForBlock(DatasetAttributes attributes, final long... blockPosition) { + + return attributes.getShardPositionForBlock(blockPosition); + } @Override public DataBlockInputStream decode(DatasetAttributes attributes, long[] gridPosition, InputStream in) throws IOException { return getArrayCodec().decode(attributes, gridPosition, in); } - @Override public DataBlockOutputStream encode(DatasetAttributes attributes, DataBlock datablock, OutputStream out) throws IOException { + @Override public DataBlockOutputStream encode(DatasetAttributes attributes, DataBlock dataBlock, OutputStream out) throws IOException { + + return getArrayCodec().encode(attributes, dataBlock, out); + } + + @Override public void writeBlock(KeyValueAccess kva, String keyPath, DatasetAttributes datasetAttributes, DataBlock dataBlock) { + + final long[] shardPos = datasetAttributes.getShardPositionForBlock(dataBlock.getGridPosition()); + new VirtualShard(datasetAttributes, shardPos, kva, keyPath).writeBlock(dataBlock); + } + + @Override public DataBlock readBlock(final KeyValueAccess kva, final String keyPath, final DatasetAttributes datasetAttributes, final long... gridPosition) { + + final long[] shardPosition = datasetAttributes.getShardPositionForBlock(gridPosition); + return new VirtualShard(datasetAttributes, shardPosition, kva, keyPath).getBlock(gridPosition); + } - return getArrayCodec().encode(attributes, datablock, out); + ShardIndex createIndex(final DatasetAttributes attributes) { + return new ShardIndex(attributes.getBlocksPerShard(), getIndexLocation(), getIndexCodecs()); } @Override diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java index 88aa6b08..ad70e4d5 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java @@ -7,6 +7,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Comparator; import java.util.List; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -15,12 +16,11 @@ import org.apache.commons.io.input.ProxyInputStream; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DatasetAttributes; -import org.janelia.saalfeldlab.n5.DefaultBlockReader; -import org.janelia.saalfeldlab.n5.DefaultBlockWriter; import org.janelia.saalfeldlab.n5.KeyValueAccess; import org.janelia.saalfeldlab.n5.LockedChannel; import org.janelia.saalfeldlab.n5.N5Exception; import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; +import org.janelia.saalfeldlab.n5.codec.Codec; import org.janelia.saalfeldlab.n5.util.GridIterator; public class VirtualShard extends AbstractShard { @@ -28,7 +28,7 @@ public class VirtualShard extends AbstractShard { final private KeyValueAccess keyValueAccess; final private String path; - public VirtualShard(final A datasetAttributes, long[] gridPosition, + public VirtualShard(final DatasetAttributes datasetAttributes, long[] gridPosition, final KeyValueAccess keyValueAccess, final String path) { super(datasetAttributes, gridPosition, null); @@ -36,23 +36,34 @@ public VirtualShard(final A data this.path = path; } - public VirtualShard(final A datasetAttributes, long[] gridPosition) { + public VirtualShard(final DatasetAttributes datasetAttributes, long[] gridPosition) { this(datasetAttributes, gridPosition, null, null); } @SuppressWarnings("unchecked") - public DataBlock getBlock(InputStream inputStream, long... blockGridPosition) throws IOException { - - // TODO this method is just a wrapper around readBlock - // is it worth keeping/ - return (DataBlock) DefaultBlockReader.readBlock( - new ProxyInputStream( inputStream ) { - @Override - public void close( ) { - //nop - } - }, datasetAttributes, blockGridPosition); + public DataBlock getBlock(InputStream in, long... blockGridPosition) throws IOException { + + ShardingCodec shardingCodec = (ShardingCodec)datasetAttributes.getArrayCodec(); + final Codec.BytesCodec[] codecs = shardingCodec.getCodecs(); + final Codec.ArrayCodec arrayCodec = shardingCodec.getArrayCodec(); + + final ProxyInputStream proxyIn = new ProxyInputStream(in) { + @Override + public void close() { + //nop + } + }; + final Codec.DataBlockInputStream dataBlockStream = arrayCodec.decode(datasetAttributes, blockGridPosition, proxyIn); + + final InputStream stream = Codec.decode(in, codecs); + final DataBlock dataBlock = dataBlockStream.allocateDataBlock(); + dataBlock.readData(dataBlockStream.getDataInput(stream)); + stream.close(); + + return dataBlock; + + } @Override @@ -72,21 +83,19 @@ public List> getBlocks(final int[] blockIndexes) { // sort index offsets // and keep track of relevant positions final long[] indexData = index.getData(); - List sortedOffsets = Arrays.stream(blockIndexes).mapToObj(i -> { - return new long[] { indexData[i * 2], i }; - }).filter(x -> { - return x[0] != ShardIndex.EMPTY_INDEX_NBYTES; - }).collect(Collectors.toList()); - - Collections.sort(sortedOffsets, (a, b) -> Long.compare(((long[]) a)[0], ((long[]) b)[0])); + List sortedOffsets = Arrays.stream(blockIndexes) + .mapToObj(i -> new long[]{indexData[i * 2], i}) + .filter(x -> x[0] != ShardIndex.EMPTY_INDEX_NBYTES) + .sorted(Comparator.comparingLong(a -> ((long[])a)[0])) + .collect(Collectors.toList()); final int nd = getDatasetAttributes().getNumDimensions(); long[] position = new long[nd]; final int[] blocksPerShard = getDatasetAttributes().getBlocksPerShard(); - final long[] blockGridMin = IntStream.range(0, nd).mapToLong(i -> { - return blocksPerShard[i] * getGridPosition()[i]; - }).toArray(); + final long[] blockGridMin = IntStream.range(0, nd) + .mapToLong(i -> blocksPerShard[i] * getGridPosition()[i]) + .toArray(); long streamPosition = 0; try (final LockedChannel lockedChannel = keyValueAccess.lockForReading(path)) { @@ -166,8 +175,8 @@ public void writeBlock(final DataBlock block) { try (final LockedChannel lockedChannel = keyValueAccess.lockForWriting(path, startByte, size)) { try ( final OutputStream channelOut = lockedChannel.newOutputStream()) { - try (final CountingOutputStream out = new CountingOutputStream(channelOut)) { - DefaultBlockWriter.writeBlock(out, datasetAttributes, block); + try (final CountingOutputStream out = new CountingOutputStream(channelOut)) {; + writeBlock(out, datasetAttributes, block); /* Update and write the index to the shard*/ index.set(startByte, out.getNumBytes(), relativePosition); @@ -184,23 +193,31 @@ public void writeBlock(final DataBlock block) { } } + void writeBlock( + final OutputStream out, + final DatasetAttributes datasetAttributes, + final DataBlock dataBlock) throws IOException { + + ShardingCodec shardingCodec = (ShardingCodec)datasetAttributes.getArrayCodec(); + final Codec.BytesCodec[] codecs = shardingCodec.getCodecs(); + final Codec.ArrayCodec arrayCodec = shardingCodec.getArrayCodec(); + final Codec.DataBlockOutputStream dataBlockOutput = arrayCodec.encode(datasetAttributes, dataBlock, out); + final OutputStream stream = Codec.encode(dataBlockOutput, codecs); + + dataBlock.writeData(dataBlockOutput.getDataOutput(stream)); + stream.close(); + } public ShardIndex createIndex() { // Empty index of the correct size - return getDatasetAttributes().createIndex(); + return ((ShardingCodec)getDatasetAttributes().getArrayCodec()).createIndex(getDatasetAttributes()); } @Override public ShardIndex getIndex() { - try { - final ShardIndex readIndex = ShardIndex.read(keyValueAccess, path, getDatasetAttributes().createIndex()); - index = readIndex == null ? createIndex() : readIndex; - } catch (final N5Exception.N5NoSuchKeyException e) { index = createIndex(); - } catch (IOException e) { - throw new N5IOException("Failed to read index at " + path, e); - } + ShardIndex.read(keyValueAccess, path, index); return index; } diff --git a/src/test/java/org/janelia/saalfeldlab/n5/AbstractN5Test.java b/src/test/java/org/janelia/saalfeldlab/n5/AbstractN5Test.java index a1916327..9b1e0831 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/AbstractN5Test.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/AbstractN5Test.java @@ -216,7 +216,7 @@ public void testCreateDataset() { final DatasetAttributes info; try (N5Writer writer = createTempN5Writer()) { - writer.createDataset(datasetName, dimensions, blockSize, DataType.UINT64, new RawCompression()); + writer.createDataset(datasetName, dimensions, blockSize, DataType.UINT64); assertTrue("Dataset does not exist", writer.exists(datasetName)); @@ -264,12 +264,12 @@ public void testWriteReadByteBlockMultipleCodecs() { final long[] longBlock1 = new long[]{1,2,3,4,5,6,7,8}; final long[] dimensions1 = new long[]{2,2,2}; final int[] blockSize1 = new int[]{2,2,2}; - n5.createDataset(datasetName, dimensions1, blockSize1, DataType.INT8, new RawCompression(), codecs); + n5.createDataset(datasetName, dimensions1, blockSize1, DataType.INT8, codecs); final DatasetAttributes attributes = n5.getDatasetAttributes(datasetName); final LongArrayDataBlock dataBlock = new LongArrayDataBlock(blockSize1, new long[]{0, 0, 0}, longBlock1); n5.writeBlock(datasetName, attributes, dataBlock); - final DatasetAttributes fakeAttributes = new DatasetAttributes(dimensions1, blockSize1, DataType.INT64, new RawCompression(), codecs); + final DatasetAttributes fakeAttributes = new DatasetAttributes(dimensions1, blockSize1, DataType.INT64, codecs); final DataBlock loadedDataBlock = n5.readBlock(datasetName, fakeAttributes, 0, 0, 0); assertArrayEquals(longBlock1, (long[])loadedDataBlock.getData()); assertTrue(n5.remove(datasetName)); @@ -336,7 +336,7 @@ public void testWriteReadIntBlock() { DataType.INT32}) { try (final N5Writer n5 = createTempN5Writer()) { - n5.createDataset(datasetName, dimensions, blockSize, dataType, compression); + n5.createDataset(datasetName, dimensions, blockSize, dataType, (Codec)compression); final DatasetAttributes attributes = n5.getDatasetAttributes(datasetName); final IntArrayDataBlock dataBlock = new IntArrayDataBlock(blockSize, new long[]{0, 0, 0}, intBlock); n5.writeBlock(datasetName, attributes, dataBlock); @@ -910,7 +910,7 @@ public void testDeepList() throws ExecutionException, InterruptedException { for (final String subGroup : subGroupNames) assertTrue("deepList contents", Arrays.asList(n5.deepList("")).contains(groupName.replaceFirst("/", "") + "/" + subGroup)); - final DatasetAttributes datasetAttributes = new DatasetAttributes(dimensions, blockSize, DataType.UINT64, new RawCompression()); + final DatasetAttributes datasetAttributes = new DatasetAttributes(dimensions, blockSize, DataType.UINT64); final LongArrayDataBlock dataBlock = new LongArrayDataBlock(blockSize, new long[]{0, 0, 0}, new long[blockNumElements]); n5.createDataset(datasetName, datasetAttributes); n5.writeBlock(datasetName, datasetAttributes, dataBlock); diff --git a/src/test/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccessTest.java b/src/test/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccessTest.java index e902eb88..a42f8e07 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccessTest.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/FileSystemKeyValueAccessTest.java @@ -7,9 +7,6 @@ import java.nio.file.FileSystems; import java.nio.file.Paths; -import java.util.Arrays; - -import org.junit.BeforeClass; import org.junit.Test; @@ -47,12 +44,6 @@ public class FileSystemKeyValueAccessTest { {""} }; - /** - * @throws java.lang.Exception - */ - @BeforeClass - public static void setUpBeforeClass() throws Exception {} - @Test public void testComponents() { @@ -61,7 +52,6 @@ public void testComponents() { for (int i = 0; i < testPaths.length; ++i) { final String[] components = access.components(testPaths[i]); - System.out.println(String.format("%d: %s -> %s", i, testPaths[i], Arrays.toString(components))); assertArrayEquals(testPathComponents[i], components); } diff --git a/src/test/java/org/janelia/saalfeldlab/n5/N5FSTest.java b/src/test/java/org/janelia/saalfeldlab/n5/N5FSTest.java index bd1e43aa..da0a38ec 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/N5FSTest.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/N5FSTest.java @@ -199,7 +199,6 @@ public void testWriteLock() throws IOException { @Test public void testLockReleaseByReader() throws IOException, ExecutionException, InterruptedException, TimeoutException { - System.out.println("Testing lock release by Reader."); final Path path = Paths.get(tempN5PathName(), "lock"); final LockedChannel lock = access.lockForWriting(path); diff --git a/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java b/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java index 66ef8632..ea24a7d9 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/codec/BytesTests.java @@ -24,12 +24,12 @@ public void testSerialization() { factory.cacheAttributes(false); final GsonBuilder gsonBuilder = new GsonBuilder(); gsonBuilder.registerTypeHierarchyAdapter(Codec.class, NameConfigAdapter.getJsonAdapter(Codec.class)); - gsonBuilder.registerTypeAdapter(ByteOrder.class, BytesCodec.byteOrderAdapter); + gsonBuilder.registerTypeAdapter(ByteOrder.class, RawBytes.byteOrderAdapter); factory.gsonBuilder(gsonBuilder); final N5Writer reader = factory.openWriter("n5:src/test/resources/shardExamples/test.n5"); final Codec bytes = reader.getAttribute("mid_sharded", "codecs[0]/configuration/codecs[0]", Codec.class); - assertTrue("as BytesCodec", bytes instanceof BytesCodec); + assertTrue("as RawBytes", bytes instanceof RawBytes); final N5Writer writer = factory.openWriter("n5:src/test/resources/shardExamples/test.n5"); @@ -37,11 +37,8 @@ public void testSerialization() { new long[]{8, 8}, new int[]{4, 4}, DataType.UINT8, - new RawCompression(), - new Codec[]{ new N5BlockCodec(ByteOrder.LITTLE_ENDIAN), new IdentityCodec() - } ); writer.createGroup("shard"); //Should already exist, but this will ensure. writer.setAttribute("shard", "/", datasetAttributes); diff --git a/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedConvertedOutputStreamTest.java b/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedConvertedOutputStreamTest.java index 1035e271..f8cf5215 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedConvertedOutputStreamTest.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/codec/FixedConvertedOutputStreamTest.java @@ -1,8 +1,11 @@ package org.janelia.saalfeldlab.n5.codec; import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.util.Arrays; @@ -38,9 +41,7 @@ public void testLengthOne() throws IOException final ByteArrayOutputStream outPlusOne = new ByteArrayOutputStream(N); final FixedLengthConvertedOutputStream convertedPlusOne = new FixedLengthConvertedOutputStream(1, 1, - (x, y) -> { - y.put((byte)(x.get() + 1)); - }, + (x, y) -> y.put((byte)(x.get() + 1)), outPlusOne); convertedPlusOne.write(expectedData); @@ -54,14 +55,10 @@ public void testIntToByte() throws IOException final int N = 16; final ByteBuffer buf = ByteBuffer.allocate(Integer.BYTES * N); - IntStream.range(0, N).forEach( x -> { - buf.putInt(x); - }); + IntStream.range(0, N).forEach(buf::putInt); final ByteBuffer expected = ByteBuffer.allocate(N); - IntStream.range(0, N).forEach( x -> { - expected.put((byte)x); - }); + IntStream.range(0, N).forEach( x -> expected.put((byte)x)); final ByteArrayOutputStream outStream = new ByteArrayOutputStream(N); final FixedLengthConvertedOutputStream intToByte = new FixedLengthConvertedOutputStream( @@ -72,38 +69,28 @@ public void testIntToByte() throws IOException intToByte.write(buf.array()); intToByte.close(); - System.out.println(Arrays.toString(buf.array())); - System.out.println(Arrays.toString(expected.array())); - System.out.println(Arrays.toString(outStream.toByteArray())); -// -// assertArrayEquals(expected.array(), outStream.toByteArray()); + assertArrayEquals(expected.array(), outStream.toByteArray()); + } + @Test + public void testByteToInt() throws IOException + { + + final int N = 16; + final byte[] data = new byte[16]; + for( int i = 0; i < N; i++ ) + data[i] = (byte)i; + + FixedLengthConvertedInputStream byteToInt = new FixedLengthConvertedInputStream( + 1, 4, + (input, output) -> output.putInt(input.get()), + new ByteArrayInputStream(data)); + + final DataInputStream dataStream = new DataInputStream(byteToInt); + for( int i = 0; i < N; i++ ) + assertEquals(i, dataStream.readInt()); + + dataStream.close(); + byteToInt.close(); } -// -// @Test -// public void testByteToInt() throws IOException -// { -// -// final int N = 16; -// final byte[] data = new byte[16]; -// for( int i = 0; i < N; i++ ) -// data[i] = (byte)i; -// -// FixedLengthConvertedInputStream byteToInt = new FixedLengthConvertedInputStream( -// 1, 4, -// (x, y) -> { -// y[0] = 0; // the setting to zero is not strictly necessary in this case -// y[1] = 0; -// y[2] = 0; -// y[3] = x[0]; -// }, -// new ByteArrayInputStream(data)); -// -// final DataInputStream dataStream = new DataInputStream(byteToInt); -// for( int i = 0; i < N; i++ ) -// assertEquals(i, dataStream.readInt()); -// -// dataStream.close(); -// byteToInt.close(); -// } } diff --git a/src/test/java/org/janelia/saalfeldlab/n5/demo/BlockIterators.java b/src/test/java/org/janelia/saalfeldlab/n5/demo/BlockIterators.java index 1631b945..c96edc07 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/demo/BlockIterators.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/demo/BlockIterators.java @@ -12,7 +12,7 @@ import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.RawCompression; import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; -import org.janelia.saalfeldlab.n5.codec.BytesCodec; +import org.janelia.saalfeldlab.n5.codec.RawBytes; import org.janelia.saalfeldlab.n5.codec.Codec; import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; @@ -33,8 +33,8 @@ public static void shardBlockIterator() { new int[] {6, 4}, // shard size new int[] {2, 2}, // block size DataType.UINT8, - new Codec[] { new BytesCodec() }, - new DeterministicSizeCodec[] { new BytesCodec() }, + new Codec[] { new RawBytes() }, + new DeterministicSizeCodec[] { new RawBytes() }, IndexLocation.END); shardPositions(attrs) diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardIndexTest.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardIndexTest.java index f260b708..0c8ee24a 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardIndexTest.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardIndexTest.java @@ -6,23 +6,16 @@ import java.io.InputStream; import java.nio.file.Paths; -import org.apache.commons.io.output.ByteArrayOutputStream; -import org.janelia.saalfeldlab.n5.DataType; -import org.janelia.saalfeldlab.n5.GzipCompression; import org.janelia.saalfeldlab.n5.KeyValueAccess; import org.janelia.saalfeldlab.n5.LockedChannel; import org.janelia.saalfeldlab.n5.N5FSTest; import org.janelia.saalfeldlab.n5.N5KeyValueWriter; -import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; -import org.janelia.saalfeldlab.n5.codec.BytesCodec; -import org.janelia.saalfeldlab.n5.codec.Codec; +import org.janelia.saalfeldlab.n5.codec.RawBytes; import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; -import org.janelia.saalfeldlab.n5.codec.N5BlockCodec; import org.janelia.saalfeldlab.n5.codec.checksum.Crc32cChecksumCodec; import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; import org.janelia.saalfeldlab.n5.util.GridIterator; import org.junit.After; -import org.junit.Ignore; import org.junit.Test; public class ShardIndexTest { @@ -40,7 +33,7 @@ public void testOffsetIndex() throws IOException { int[] shardBlockGridSize = new int[]{5,4,3}; ShardIndex index = new ShardIndex( shardBlockGridSize, - IndexLocation.END, new BytesCodec()); + IndexLocation.END, new RawBytes()); GridIterator it = new GridIterator(shardBlockGridSize); int i = 0; @@ -54,7 +47,7 @@ public void testOffsetIndex() throws IOException { shardBlockGridSize = new int[]{5,4,3,13}; index = new ShardIndex( shardBlockGridSize, - IndexLocation.END, new BytesCodec()); + IndexLocation.END, new RawBytes()); it = new GridIterator(shardBlockGridSize); i = 0; @@ -74,7 +67,7 @@ public void testReadVirtual() throws IOException { final int[] shardBlockGridSize = new int[] { 6, 5 }; final IndexLocation indexLocation = IndexLocation.END; - final DeterministicSizeCodec[] indexCodecs = new DeterministicSizeCodec[] { new BytesCodec(), + final DeterministicSizeCodec[] indexCodecs = new DeterministicSizeCodec[] { new RawBytes(), new Crc32cChecksumCodec() }; final String path = Paths.get(Paths.get(writer.getURI()).toAbsolutePath().toString(), "0").toString(); @@ -101,7 +94,7 @@ public void testReadInMemory() throws IOException { final int[] shardBlockGridSize = new int[] { 6, 5 }; final IndexLocation indexLocation = IndexLocation.END; final DeterministicSizeCodec[] indexCodecs = new DeterministicSizeCodec[] { - new BytesCodec(), + new RawBytes(), new Crc32cChecksumCodec() }; final String path = Paths.get(Paths.get(writer.getURI()).toAbsolutePath().toString(), "indexTest").toString(); diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardPropertiesTests.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardPropertiesTests.java index 77d46ac4..48652cd1 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardPropertiesTests.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardPropertiesTests.java @@ -1,21 +1,21 @@ package org.janelia.saalfeldlab.n5.shard; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; - -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - import org.janelia.saalfeldlab.n5.DataType; -import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; +import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.codec.Codec; import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; import org.janelia.saalfeldlab.n5.util.Position; import org.junit.Test; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + public class ShardPropertiesTests { @Test @@ -26,17 +26,20 @@ public void testShardProperties() throws Exception { final long[] shardPosition = new long[]{1, 1}; final int[] blkSize = new int[]{4, 4}; - final ShardedDatasetAttributes dsetAttrs = new ShardedDatasetAttributes( + final DatasetAttributes dsetAttrs = new DatasetAttributes( arraySize, shardSize, blkSize, DataType.UINT8, - new Codec[]{}, - new DeterministicSizeCodec[]{}, - IndexLocation.END); + new ShardingCodec( + blkSize, + new Codec[]{}, + new DeterministicSizeCodec[]{}, + IndexLocation.END + ) + ); - @SuppressWarnings({"rawtypes", "unchecked"}) - final InMemoryShard shard = new InMemoryShard(dsetAttrs, shardPosition, null); + @SuppressWarnings({"rawtypes", "unchecked"}) final InMemoryShard shard = new InMemoryShard(dsetAttrs, shardPosition, null); assertArrayEquals(new int[]{4, 4}, shard.getBlockGridSize()); @@ -45,8 +48,8 @@ public void testShardProperties() throws Exception { assertArrayEquals(new long[]{1, 0}, shard.getShard(5, 0)); assertArrayEquals(new long[]{0, 1}, shard.getShard(0, 5)); -// assertNull(shard.getBlockPosition(0, 0)); -// assertNull(shard.getBlockPosition(3, 3)); + // assertNull(shard.getBlockPosition(0, 0)); + // assertNull(shard.getBlockPosition(3, 3)); assertArrayEquals(new int[]{0, 0}, shard.getBlockPosition(4, 4)); assertArrayEquals(new int[]{1, 1}, shard.getBlockPosition(5, 5)); @@ -62,17 +65,20 @@ public void testShardBlockPositionIterator() throws Exception { final long[] shardPosition = new long[]{1, 1}; final int[] blkSize = new int[]{4, 4}; - final ShardedDatasetAttributes dsetAttrs = new ShardedDatasetAttributes( + final DatasetAttributes dsetAttrs = new DatasetAttributes( arraySize, shardSize, blkSize, DataType.UINT8, - new Codec[]{}, - new DeterministicSizeCodec[]{}, - IndexLocation.END); + new ShardingCodec( + blkSize, + new Codec[]{}, + new DeterministicSizeCodec[]{}, + IndexLocation.END + ) + ); - @SuppressWarnings({"rawtypes", "unchecked"}) - final InMemoryShard shard = new InMemoryShard(dsetAttrs, shardPosition, null); + @SuppressWarnings({"rawtypes", "unchecked"}) final InMemoryShard shard = new InMemoryShard(dsetAttrs, shardPosition, null); int i = 0; Iterator it = shard.blockPositionIterator(); @@ -80,13 +86,13 @@ public void testShardBlockPositionIterator() throws Exception { while (it.hasNext()) { p = it.next(); - if( i == 0 ) - assertArrayEquals(new long[]{4,4}, p); + if (i == 0) + assertArrayEquals(new long[]{4, 4}, p); i++; } - assertEquals(16,i); - assertArrayEquals(new long[]{7,7}, p); + assertEquals(16, i); + assertArrayEquals(new long[]{7, 7}, p); } @Test @@ -96,14 +102,18 @@ public void testShardGrouping() { final int[] shardSize = new int[]{4, 6}; final int[] blkSize = new int[]{2, 3}; - final ShardedDatasetAttributes attrs = new ShardedDatasetAttributes( + final DatasetAttributes attrs = new DatasetAttributes( arraySize, shardSize, blkSize, DataType.UINT8, - new Codec[]{}, - new DeterministicSizeCodec[]{}, - IndexLocation.END); + new ShardingCodec( + blkSize, + new Codec[]{}, + new DeterministicSizeCodec[]{}, + IndexLocation.END + ) + ); List blockPositions = attrs.blockPositions().collect(Collectors.toList()); final Map> result = attrs.groupBlockPositions(blockPositions); @@ -112,7 +122,7 @@ public void testShardGrouping() { assertEquals(4, result.keySet().size()); // there are four blocks per shard in this image - result.values().stream().forEach( x -> assertEquals(4, x.size())); + result.values().stream().forEach(x -> assertEquals(4, x.size())); } } diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java index 6848b9df..b19e07e5 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardTest.java @@ -3,16 +3,16 @@ import org.janelia.saalfeldlab.n5.ByteArrayDataBlock; import org.janelia.saalfeldlab.n5.DataBlock; import org.janelia.saalfeldlab.n5.DataType; -import org.janelia.saalfeldlab.n5.GzipCompression; +import org.janelia.saalfeldlab.n5.DatasetAttributes; import org.janelia.saalfeldlab.n5.KeyValueAccess; import org.janelia.saalfeldlab.n5.N5FSTest; import org.janelia.saalfeldlab.n5.N5KeyValueWriter; import org.janelia.saalfeldlab.n5.N5Writer; import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; -import org.janelia.saalfeldlab.n5.codec.BytesCodec; import org.janelia.saalfeldlab.n5.codec.Codec; import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; import org.janelia.saalfeldlab.n5.codec.N5BlockCodec; +import org.janelia.saalfeldlab.n5.codec.RawBytes; import org.janelia.saalfeldlab.n5.codec.checksum.Crc32cChecksumCodec; import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; import org.janelia.saalfeldlab.n5.util.GridIterator; @@ -23,17 +23,15 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import static org.junit.Assert.assertArrayEquals; - import java.nio.ByteOrder; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; -import java.util.Iterator; -import java.util.List; import java.util.Map; +import static org.junit.Assert.assertArrayEquals; + @RunWith(Parameterized.class) public class ShardTest { @@ -66,33 +64,39 @@ public static Collection data() { @After public void removeTempWriters() { + tempN5Factory.removeTempWriters(); } - private ShardedDatasetAttributes getTestAttributes(long[] dimensions, int[] shardSize, int[] blockSize) { - return new ShardedDatasetAttributes( + private DatasetAttributes getTestAttributes(long[] dimensions, int[] shardSize, int[] blockSize) { + + return new DatasetAttributes( dimensions, shardSize, blockSize, DataType.UINT8, - new Codec[]{new N5BlockCodec(dataByteOrder), new GzipCompression(4)}, - new DeterministicSizeCodec[]{new BytesCodec(indexByteOrder), new Crc32cChecksumCodec()}, - indexLocation + new ShardingCodec( + blockSize, + new Codec[]{new N5BlockCodec(dataByteOrder)}, //, new GzipCompression(4)}, + new DeterministicSizeCodec[]{new RawBytes(indexByteOrder), new Crc32cChecksumCodec()}, + indexLocation + ) ); } - private ShardedDatasetAttributes getTestAttributes() { - return getTestAttributes(new long[]{8, 8}, new int[]{4, 4}, new int[]{2, 2}); + private DatasetAttributes getTestAttributes() { + + return getTestAttributes(new long[]{8, 8}, new int[]{4, 4}, new int[]{2, 2}); } @Test public void writeReadBlocksTest() { final N5Writer writer = tempN5Factory.createTempN5Writer(); - final ShardedDatasetAttributes datasetAttributes = getTestAttributes( - new long[]{24,24}, - new int[]{8,8}, - new int[]{2,2} + final DatasetAttributes datasetAttributes = getTestAttributes( + new long[]{24, 24}, + new int[]{8, 8}, + new int[]{2, 2} ); writer.createDataset("shard", datasetAttributes); @@ -105,22 +109,21 @@ public void writeReadBlocksTest() { data[i] = (byte)((100) + (10) + i); } - writer.writeBlocks( "shard", datasetAttributes, /* shard (0, 0) */ - new ByteArrayDataBlock(blockSize, new long[]{0,0}, data), - new ByteArrayDataBlock(blockSize, new long[]{0,1}, data), - new ByteArrayDataBlock(blockSize, new long[]{1,0}, data), - new ByteArrayDataBlock(blockSize, new long[]{1,1}, data), + new ByteArrayDataBlock(blockSize, new long[]{0, 0}, data), + new ByteArrayDataBlock(blockSize, new long[]{0, 1}, data), + new ByteArrayDataBlock(blockSize, new long[]{1, 0}, data), + new ByteArrayDataBlock(blockSize, new long[]{1, 1}, data), /* shard (1, 0) */ - new ByteArrayDataBlock(blockSize, new long[]{4,0}, data), - new ByteArrayDataBlock(blockSize, new long[]{5,0}, data), + new ByteArrayDataBlock(blockSize, new long[]{4, 0}, data), + new ByteArrayDataBlock(blockSize, new long[]{5, 0}, data), /* shard (2, 2) */ - new ByteArrayDataBlock(blockSize, new long[]{11,11}, data) + new ByteArrayDataBlock(blockSize, new long[]{11, 11}, data) ); final KeyValueAccess kva = ((N5KeyValueWriter)writer).getKeyValueAccess(); @@ -135,7 +138,7 @@ public void writeReadBlocksTest() { Assert.assertTrue("Shard at" + Arrays.toString(key) + "Does not exist", kva.exists(shard)); } - final long[][] blockIndices = new long[][]{ {0,0}, {0,1}, {1,0}, {1,1}, {4,0}, {5,0}, {11,11}}; + final long[][] blockIndices = new long[][]{{0, 0}, {0, 1}, {1, 0}, {1, 1}, {4, 0}, {5, 0}, {11, 11}}; for (long[] blockIndex : blockIndices) { final DataBlock block = writer.readBlock("shard", datasetAttributes, blockIndex); Assert.assertArrayEquals("Read from shard doesn't match", data, (byte[])block.getData()); @@ -149,15 +152,15 @@ public void writeReadBlocksTest() { "shard", datasetAttributes, /* shard (0, 0) */ - new ByteArrayDataBlock(blockSize, new long[]{0,0}, data2), - new ByteArrayDataBlock(blockSize, new long[]{1,1}, data2), + new ByteArrayDataBlock(blockSize, new long[]{0, 0}, data2), + new ByteArrayDataBlock(blockSize, new long[]{1, 1}, data2), /* shard (0, 1) */ - new ByteArrayDataBlock(blockSize, new long[]{0,4}, data2), - new ByteArrayDataBlock(blockSize, new long[]{0,5}, data2), + new ByteArrayDataBlock(blockSize, new long[]{0, 4}, data2), + new ByteArrayDataBlock(blockSize, new long[]{0, 5}, data2), /* shard (2, 2) */ - new ByteArrayDataBlock(blockSize, new long[]{10,10}, data2) + new ByteArrayDataBlock(blockSize, new long[]{10, 10}, data2) ); final String[][] keys2 = new String[][]{ @@ -171,13 +174,13 @@ public void writeReadBlocksTest() { Assert.assertTrue("Shard at" + Arrays.toString(key) + "Does not exist", kva.exists(shard)); } - final long[][] oldBlockIndices = new long[][]{{0,1}, {1,0}, {4,0}, {5,0}, {11,11}}; + final long[][] oldBlockIndices = new long[][]{{0, 1}, {1, 0}, {4, 0}, {5, 0}, {11, 11}}; for (long[] blockIndex : oldBlockIndices) { final DataBlock block = writer.readBlock("shard", datasetAttributes, blockIndex); Assert.assertArrayEquals("Read from shard doesn't match", data, (byte[])block.getData()); } - final long[][] newBlockIndices = new long[][]{{0,0}, {1,1}, {0,4}, {0,5}, {10,10}}; + final long[][] newBlockIndices = new long[][]{{0, 0}, {1, 1}, {0, 4}, {0, 5}, {10, 10}}; for (long[] blockIndex : newBlockIndices) { final DataBlock block = writer.readBlock("shard", datasetAttributes, blockIndex); Assert.assertArrayEquals("Read from shard doesn't match", data2, (byte[])block.getData()); @@ -188,7 +191,7 @@ public void writeReadBlocksTest() { public void writeReadBlockTest() { final N5Writer writer = tempN5Factory.createTempN5Writer(); - final ShardedDatasetAttributes datasetAttributes = getTestAttributes(); + final DatasetAttributes datasetAttributes = getTestAttributes(); writer.createDataset("shard", datasetAttributes); writer.deleteBlock("shard", 0, 0); @@ -229,15 +232,7 @@ public void writeReadShardTest() { final N5Writer writer = tempN5Factory.createTempN5Writer(); - final ShardedDatasetAttributes datasetAttributes = new ShardedDatasetAttributes( - new long[]{4, 4}, - new int[]{4, 4}, - new int[]{2, 2}, - DataType.UINT8, - new Codec[]{new N5BlockCodec(dataByteOrder)}, - new DeterministicSizeCodec[]{new BytesCodec(indexByteOrder), new Crc32cChecksumCodec()}, - indexLocation - ); + final DatasetAttributes datasetAttributes = getTestAttributes(); writer.createDataset("wholeShard", datasetAttributes); writer.deleteBlock("wholeShard", 0, 0); @@ -277,44 +272,48 @@ public void writeReadShardTest() { public void writeReadNestedShards() { int[] blockSize = new int[]{4, 4}; - int N = Arrays.stream(blockSize).reduce(1, (x,y) -> x*y); + int N = Arrays.stream(blockSize).reduce(1, (x, y) -> x * y); final N5Writer writer = tempN5Factory.createTempN5Writer(); - final ShardedDatasetAttributes datasetAttributes = getNestedShardCodecsAttributes(blockSize); + final DatasetAttributes datasetAttributes = getNestedShardCodecsAttributes(blockSize); writer.createDataset("nestedShards", datasetAttributes); final byte[] data = new byte[N]; Arrays.fill(data, (byte)4); writer.writeBlocks("nestedShards", datasetAttributes, - new ByteArrayDataBlock(blockSize, new long[] { 1, 1 }, data), - new ByteArrayDataBlock(blockSize, new long[] { 0, 2 }, data), - new ByteArrayDataBlock(blockSize, new long[] { 2, 1 }, data)); + new ByteArrayDataBlock(blockSize, new long[]{1, 1}, data), + new ByteArrayDataBlock(blockSize, new long[]{0, 2}, data), + new ByteArrayDataBlock(blockSize, new long[]{2, 1}, data)); - assertArrayEquals(data, (byte[]) writer.readBlock("nestedShards", datasetAttributes, 1, 1).getData()); - assertArrayEquals(data, (byte[]) writer.readBlock("nestedShards", datasetAttributes, 0, 2).getData()); - assertArrayEquals(data, (byte[]) writer.readBlock("nestedShards", datasetAttributes, 2, 1).getData()); + assertArrayEquals(data, (byte[])writer.readBlock("nestedShards", datasetAttributes, 1, 1).getData()); + assertArrayEquals(data, (byte[])writer.readBlock("nestedShards", datasetAttributes, 0, 2).getData()); + assertArrayEquals(data, (byte[])writer.readBlock("nestedShards", datasetAttributes, 2, 1).getData()); } - private ShardedDatasetAttributes getNestedShardCodecsAttributes(int[] blockSize) { + private DatasetAttributes getNestedShardCodecsAttributes(int[] blockSize) { - final int[] innerShardSize = new int[] { 2 * blockSize[0], 2 * blockSize[1] }; - final int[] shardSize = new int[] { 4 * blockSize[0], 4 * blockSize[1] }; + final int[] innerShardSize = new int[]{2 * blockSize[0], 2 * blockSize[1]}; + final int[] shardSize = new int[]{4 * blockSize[0], 4 * blockSize[1]}; final long[] dimensions = GridIterator.int2long(shardSize); // TODO: its not even clear how we build this given - // this constructor. Is the block size of the sharded dataset attributes - // the innermost (block) size or the intermediate shard size? - // probably better to forget about this class - only use DatasetAttributes - // and detect shading in another way + // this constructor. Is the block size of the sharded dataset attributes + // the innermost (block) size or the intermediate shard size? + // probably better to forget about this class - only use DatasetAttributes + // and detect shading in another way final ShardingCodec innerShard = new ShardingCodec(innerShardSize, - new Codec[] { new BytesCodec() }, - new DeterministicSizeCodec[] { new BytesCodec(indexByteOrder), new Crc32cChecksumCodec() }, + new Codec[]{new RawBytes()}, + new DeterministicSizeCodec[]{new RawBytes(indexByteOrder), new Crc32cChecksumCodec()}, IndexLocation.START); - return new ShardedDatasetAttributes(dimensions, shardSize, blockSize, DataType.UINT8, - new Codec[] { innerShard }, - new DeterministicSizeCodec[] { new BytesCodec(indexByteOrder), new Crc32cChecksumCodec() }, - IndexLocation.END); + return new DatasetAttributes( + dimensions, shardSize, blockSize, DataType.UINT8, + new ShardingCodec( + blockSize, + new Codec[]{innerShard}, + new DeterministicSizeCodec[]{new RawBytes(indexByteOrder), new Crc32cChecksumCodec()}, + IndexLocation.END) + ); } } From 0d1eb525c8065f77fa73280f25629e2ff73a0d25 Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Tue, 4 Feb 2025 17:10:40 -0500 Subject: [PATCH 124/124] refactor: more from refactorShard branch --- .../saalfeldlab/n5/GsonKeyValueN5Writer.java | 8 ++++---- .../org/janelia/saalfeldlab/n5/N5Writer.java | 6 +++--- .../saalfeldlab/n5/shard/InMemoryShard.java | 2 +- .../org/janelia/saalfeldlab/n5/shard/Shard.java | 11 +++++++---- .../saalfeldlab/n5/shard/ShardIndex.java | 7 ++----- .../saalfeldlab/n5/shard/ShardingCodec.java | 2 +- .../saalfeldlab/n5/shard/VirtualShard.java | 17 ++++++++--------- .../n5/shard/ShardPropertiesTests.java | 8 ++++---- 8 files changed, 30 insertions(+), 31 deletions(-) diff --git a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java index 36f5ef1d..b5e4cdf2 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/GsonKeyValueN5Writer.java @@ -264,15 +264,15 @@ default void writeBlock( } @Override - default void writeShard( + default void writeShard( final String path, - final A datasetAttributes, + final DatasetAttributes datasetAttributes, final Shard shard) throws N5Exception { final String shardPath = absoluteDataBlockPath(N5URI.normalizeGroupPath(path), shard.getGridPosition()); try (final LockedChannel lock = getKeyValueAccess().lockForWriting(shardPath)) { - try (final OutputStream out = lock.newOutputStream()) { - InMemoryShard.fromShard(shard).write(out); + try (final OutputStream shardOut = lock.newOutputStream()) { + InMemoryShard.fromShard(shard).write(shardOut); } } catch (final IOException | UncheckedIOException e) { throw new N5IOException( diff --git a/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java b/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java index 2471135c..01606204 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/N5Writer.java @@ -35,6 +35,7 @@ import java.util.List; import java.util.Map; +import org.checkerframework.checker.units.qual.A; import org.janelia.saalfeldlab.n5.codec.Codec; import org.janelia.saalfeldlab.n5.codec.N5BlockCodec; import org.janelia.saalfeldlab.n5.shard.Shard; @@ -299,12 +300,11 @@ default void writeBlocks( * @param datasetAttributes the dataset attributes * @param shard the shard * @param the data block data type - * @param the attribute type * @throws N5Exception the exception */ - void writeShard( + void writeShard( final String datasetPath, - final A datasetAttributes, + final DatasetAttributes datasetAttributes, final Shard shard) throws N5Exception; /** diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java index f7a01fc8..c7274d85 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/InMemoryShard.java @@ -95,7 +95,7 @@ public List> getBlocks( int[] blockIndexes ) { for( int idx : blockIndexes ) { GridIterator.indexToPosition(idx, blocksPerShard, position); DataBlock blk = getBlock(position); - if( blk != null ); + if( blk != null ) out.add(blk); } return out; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java index 2e1d7e7e..3f55dfbc 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/Shard.java @@ -84,7 +84,7 @@ default long[] getShardMinPosition(long... shardPosition) { * * @return the shard position */ - default long[] getShard(long... blockPosition) { + default long[] getShardPosition(long... blockPosition) { final int[] shardBlockDimensions = getBlockGridSize(); final long[] shardGridPosition = new long[shardBlockDimensions.length]; @@ -99,6 +99,8 @@ default long[] getShard(long... blockPosition) { public void writeBlock(DataBlock block); + //TODO Caleb: add writeBlocks that does NOT always expect to overwrite the entire existing Shard + default Iterator> iterator() { return new DataBlockIterator<>(this); @@ -130,9 +132,9 @@ default Iterator blockPositionIterator() { return new GridIterator(GridIterator.int2long(getBlockGridSize()), min); } - public ShardIndex getIndex(); + ShardIndex getIndex(); - public static Shard createEmpty(final A attributes, long... shardPosition) { + static Shard createEmpty(final A attributes, long... shardPosition) { final long[] emptyIndex = new long[(int)(2 * attributes.getNumBlocks())]; Arrays.fill(emptyIndex, ShardIndex.EMPTY_INDEX_NBYTES); @@ -140,11 +142,12 @@ public static Shard createE return new InMemoryShard(attributes, shardPosition, shardIndex); } - public static class DataBlockIterator implements Iterator> { + class DataBlockIterator implements Iterator> { private final GridIterator it; private final Shard shard; private final ShardIndex index; + // TODO ShardParameters is deprecated? private final ShardParameters attributes; private int blockIndex = 0; diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java index f50a6d62..655bf62c 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardIndex.java @@ -11,10 +11,8 @@ import org.janelia.saalfeldlab.n5.LongArrayDataBlock; import org.janelia.saalfeldlab.n5.N5Exception; import org.janelia.saalfeldlab.n5.N5Exception.N5IOException; -import org.janelia.saalfeldlab.n5.ShardedDatasetAttributes; import org.janelia.saalfeldlab.n5.codec.Codec; import org.janelia.saalfeldlab.n5.codec.DeterministicSizeCodec; -import org.janelia.saalfeldlab.n5.codec.N5BlockCodec; import org.janelia.saalfeldlab.n5.shard.ShardingCodec.IndexLocation; import java.io.ByteArrayInputStream; @@ -75,7 +73,7 @@ public int getNumBlocks() { public boolean isEmpty() { - return !IntStream.range(0, getNumBlocks()).anyMatch(i -> exists(i)); + return !IntStream.range(0, getNumBlocks()).anyMatch(this::exists); } public IndexLocation getLocation() { @@ -146,7 +144,7 @@ public static boolean read(byte[] data, final ShardIndex index) { try { BoundedInputStream bIs = BoundedInputStream.builder() .setInputStream(is) - .setMaxCount(byteBounds.size).get(); + .setMaxCount(index.numBytes()).get(); read(bIs, index); return true; @@ -321,6 +319,5 @@ public boolean equals(Object other) { } return true; } - } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java index 79c609b8..4da31eff 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/ShardingCodec.java @@ -132,7 +132,7 @@ public DeterministicSizeCodec[] getIndexCodecs() { return new VirtualShard(datasetAttributes, shardPosition, kva, keyPath).getBlock(gridPosition); } - ShardIndex createIndex(final DatasetAttributes attributes) { + public ShardIndex createIndex(final DatasetAttributes attributes) { return new ShardIndex(attributes.getBlocksPerShard(), getIndexLocation(), getIndexCodecs()); } diff --git a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java index ad70e4d5..3b115a90 100644 --- a/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java +++ b/src/main/java/org/janelia/saalfeldlab/n5/shard/VirtualShard.java @@ -136,17 +136,16 @@ public DataBlock getBlock(long... blockGridPosition) { throw new N5IOException("Attempted to read a block from the wrong shard."); final ShardIndex idx = getIndex(); - - final long startByte = idx.getOffset(relativePosition); - - if (startByte == ShardIndex.EMPTY_INDEX_NBYTES ) + if (!idx.exists(relativePosition)) return null; - final long size = idx.getNumBytes(relativePosition); - try (final LockedChannel lockedChannel = keyValueAccess.lockForReading(path, startByte, size)) { - try ( final InputStream channelIn = lockedChannel.newInputStream()) { + final long blockOffset = idx.getOffset(relativePosition); + final long blockSize = idx.getNumBytes(relativePosition); + + try (final LockedChannel lockedChannel = keyValueAccess.lockForReading(path, blockOffset, blockSize)) { + try ( final InputStream in = lockedChannel.newInputStream()) { final long[] blockPosInImg = getDatasetAttributes().getBlockPositionFromShardPosition(getGridPosition(), blockGridPosition); - return getBlock( channelIn, blockPosInImg ); + return getBlock( in, blockPosInImg ); } } catch (final N5Exception.N5NoSuchKeyException e) { return null; @@ -216,7 +215,7 @@ public ShardIndex createIndex() { @Override public ShardIndex getIndex() { - index = createIndex(); + index = createIndex(); ShardIndex.read(keyValueAccess, path, index); return index; diff --git a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardPropertiesTests.java b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardPropertiesTests.java index 48652cd1..eb0d6de4 100644 --- a/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardPropertiesTests.java +++ b/src/test/java/org/janelia/saalfeldlab/n5/shard/ShardPropertiesTests.java @@ -43,10 +43,10 @@ public void testShardProperties() throws Exception { assertArrayEquals(new int[]{4, 4}, shard.getBlockGridSize()); - assertArrayEquals(new long[]{0, 0}, shard.getShard(0, 0)); - assertArrayEquals(new long[]{1, 1}, shard.getShard(5, 5)); - assertArrayEquals(new long[]{1, 0}, shard.getShard(5, 0)); - assertArrayEquals(new long[]{0, 1}, shard.getShard(0, 5)); + assertArrayEquals(new long[]{0, 0}, shard.getShardPosition(0, 0)); + assertArrayEquals(new long[]{1, 1}, shard.getShardPosition(5, 5)); + assertArrayEquals(new long[]{1, 0}, shard.getShardPosition(5, 0)); + assertArrayEquals(new long[]{0, 1}, shard.getShardPosition(0, 5)); // assertNull(shard.getBlockPosition(0, 0)); // assertNull(shard.getBlockPosition(3, 3));