From de1cf3abf81f45f1e83151ffd350fe1e7881598c Mon Sep 17 00:00:00 2001 From: Larry Booker Date: Tue, 7 Jan 2025 10:52:01 -0800 Subject: [PATCH 1/4] Test index retention through GC events. --- .../engine/table/impl/QueryTableTest.java | 45 +++++++++++++++ .../table/ParquetTableReadWriteTest.java | 55 +++++++++++++++++++ 2 files changed, 100 insertions(+) diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableTest.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableTest.java index 5aa0832ebd1..cd3f72df863 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableTest.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableTest.java @@ -964,6 +964,51 @@ public void testStringContainsFilter() { } } + public void testIndexRetentionThroughGC() { + final Table childTable; + + try (final SafeCloseable scope = LivenessScopeStack.open()) { + + final Random random = new Random(0); + + final int size = 500; + + final ColumnInfo[] columnInfo; + QueryTable parentTable = getTable(false, size, random, + columnInfo = initColumnInfos(new String[] {"S1", "S2"}, + new SetGenerator<>("aa", "bb", "cc", "dd", "AA", "BB", "CC", "DD"), + new SetGenerator<>("aaa", "bbb", "ccc", "ddd", "AAA", "BBB", "CCC", "DDD"))); + + // Explicitly retain the index references. + DataIndex di1 = DataIndexer.getOrCreateDataIndex(parentTable, "S1"); + DataIndex di2 = DataIndexer.getOrCreateDataIndex(parentTable, "S2"); + + childTable = parentTable.update("isEven = ii % 2 == 0"); + + // While retained, the indexes will survive GC + System.gc(); + + // While the references are held, the parent and child tables should have the indexes. + Assert.assertTrue(DataIndexer.hasDataIndex(parentTable, "S1")); + Assert.assertTrue(DataIndexer.hasDataIndex(parentTable, "S2")); + + Assert.assertTrue(DataIndexer.hasDataIndex(childTable, "S1")); + Assert.assertTrue(DataIndexer.hasDataIndex(childTable, "S2")); + + // Explicitly release the references. + parentTable = null; + di1 = null; + di2 = null; + } + + // After a GC, the child table should not have the indexes. + System.gc(); + + Assert.assertFalse(DataIndexer.hasDataIndex(childTable, "S1")); + Assert.assertFalse(DataIndexer.hasDataIndex(childTable, "S2")); + } + + public void testStringMatchFilterIndexed() { // MatchFilters (currently) only use indexes on initial creation but this incremental test will recreate // index-enabled match filtered tables and compare them against incremental non-indexed filtered tables. diff --git a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java index e7faf5be88c..ea64e20ca99 100644 --- a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java +++ b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java @@ -10,6 +10,7 @@ import io.deephaven.base.FileUtils; import io.deephaven.base.verify.Assert; import io.deephaven.engine.context.ExecutionContext; +import io.deephaven.engine.liveness.LivenessScopeStack; import io.deephaven.engine.primitive.function.ByteConsumer; import io.deephaven.engine.primitive.function.CharConsumer; import io.deephaven.engine.primitive.function.FloatConsumer; @@ -58,6 +59,7 @@ import io.deephaven.test.types.OutOfBandTest; import io.deephaven.time.DateTimeUtils; import io.deephaven.util.QueryConstants; +import io.deephaven.util.SafeCloseable; import io.deephaven.util.codec.SimpleByteArrayCodec; import io.deephaven.util.compare.DoubleComparisons; import io.deephaven.util.compare.FloatComparisons; @@ -88,6 +90,7 @@ import java.math.BigInteger; import java.net.URI; import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.StandardCopyOption; import java.time.Instant; import java.time.LocalDate; @@ -337,6 +340,58 @@ public void vectorParquetFormat() { groupedTable("largeAggParquet", LARGE_TABLE_SIZE, false); } + @Test + public void indexRetentionThroughGC() { + final String destPath = Path.of(rootFile.getPath(), "ParquetTest_indexRetention_test").toString(); + final int tableSize = 10_000; + + final Table testTable = TableTools.emptyTable(tableSize).update( + "symbol = randomInt(0,4)", + "price = randomInt(0,10000) * 0.01", + "str_id = `str_` + String.format(`%08d`, randomInt(0,1_000_000))", + "indexed_val = ii % 10_000"); + + final ParquetInstructions writeInstructions = ParquetInstructions.builder() + .setGenerateMetadataFiles(true) + .addIndexColumns("indexed_val") + .build(); + + final PartitionedTable partitionedTable = testTable.partitionBy("symbol"); + ParquetTools.writeKeyValuePartitionedTable(partitionedTable, destPath, writeInstructions); + + final Table child; + + // Read from disk and validate the indexes through GC. + try (final SafeCloseable scope = LivenessScopeStack.open()) { + Table parent = ParquetTools.readTable(destPath); + + child = parent.update("new_val = indexed_val + 1") + .update("new_val = new_val + 1") + .update("new_val = new_val + 1") + .update("new_val = new_val + 1"); + + // These indexes will survive GC because the parent table is holding strong references. + System.gc(); + + // The parent table should have the indexes. + Assert.eqTrue(DataIndexer.hasDataIndex(parent, "symbol"), "hasDataIndex -> symbol"); + Assert.eqTrue(DataIndexer.hasDataIndex(parent, "indexed_val"), "hasDataIndex -> indexed_val"); + + // The child table should have the indexes while the parent is retained. + Assert.eqTrue(DataIndexer.hasDataIndex(child, "symbol"), "hasDataIndex -> symbol"); + Assert.eqTrue(DataIndexer.hasDataIndex(child, "indexed_val"), "hasDataIndex -> indexed_val"); + + // Explicitly release the parent table to encourage GC. + parent = null; + } + + // After a GC, the child table should still have access to the indexes. + System.gc(); + + Assert.eqTrue(DataIndexer.hasDataIndex(child, "symbol"), "hasDataIndex -> symbol"); + Assert.eqTrue(DataIndexer.hasDataIndex(child, "indexed_val"), "hasDataIndex -> indexed_val"); + } + @Test public void indexByLongKey() { final TableDefinition definition = TableDefinition.of( From 8e41bfc298e26f1496f7f2e2faee54eafe61aec0 Mon Sep 17 00:00:00 2001 From: Larry Booker Date: Tue, 7 Jan 2025 11:16:23 -0800 Subject: [PATCH 2/4] Added test of RemappedDataIndex (using select()) --- .../table/ParquetTableReadWriteTest.java | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java index ea64e20ca99..00a72e36907 100644 --- a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java +++ b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java @@ -392,6 +392,56 @@ public void indexRetentionThroughGC() { Assert.eqTrue(DataIndexer.hasDataIndex(child, "indexed_val"), "hasDataIndex -> indexed_val"); } + @Test + public void remappedIndexRetentionThroughGC() { + final String destPath = Path.of(rootFile.getPath(), "ParquetTest_remappedIndexRetention_test.parquet").toString(); + final int tableSize = 10_000; + + final Table testTable = TableTools.emptyTable(tableSize).update( + "symbol = randomInt(0,4)", + "price = randomInt(0,10000) * 0.01", + "str_id = `str_` + String.format(`%08d`, randomInt(0,1_000_000))", + "indexed_val = ii % 10_000"); + + final ParquetInstructions writeInstructions = ParquetInstructions.builder() + .setGenerateMetadataFiles(true) + .addIndexColumns("symbol") + .addIndexColumns("indexed_val") + .build(); + + ParquetTools.writeTable(testTable, destPath, writeInstructions); + + final Table child; + + // Read from disk and validate the indexes through GC. + try (final SafeCloseable scope = LivenessScopeStack.open()) { + Table parent = ParquetTools.readTable(destPath); + + // select() produces in-memory column sources, triggering the remapping of the indexes. + child = parent.select(); + + // These indexes will survive GC because the parent table is holding strong references. + System.gc(); + + // The parent table should have the indexes. + Assert.eqTrue(DataIndexer.hasDataIndex(parent, "symbol"), "hasDataIndex -> symbol"); + Assert.eqTrue(DataIndexer.hasDataIndex(parent, "indexed_val"), "hasDataIndex -> indexed_val"); + + // The child table should have the indexes while the parent is retained. + Assert.eqTrue(DataIndexer.hasDataIndex(child, "symbol"), "hasDataIndex -> symbol"); + Assert.eqTrue(DataIndexer.hasDataIndex(child, "indexed_val"), "hasDataIndex -> indexed_val"); + + // Explicitly release the parent table to encourage GC. + parent = null; + } + + // After a GC, the child table should still have access to the indexes. + System.gc(); + + Assert.eqTrue(DataIndexer.hasDataIndex(child, "symbol"), "hasDataIndex -> symbol"); + Assert.eqTrue(DataIndexer.hasDataIndex(child, "indexed_val"), "hasDataIndex -> indexed_val"); + } + @Test public void indexByLongKey() { final TableDefinition definition = TableDefinition.of( From 08e1b519d8a6ebc7c2ec351c246573025a3f8cab Mon Sep 17 00:00:00 2001 From: Larry Booker Date: Tue, 7 Jan 2025 11:48:27 -0800 Subject: [PATCH 3/4] Spotless --- .../io/deephaven/parquet/table/ParquetTableReadWriteTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java index 00a72e36907..6ef5ff4545a 100644 --- a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java +++ b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java @@ -394,7 +394,8 @@ public void indexRetentionThroughGC() { @Test public void remappedIndexRetentionThroughGC() { - final String destPath = Path.of(rootFile.getPath(), "ParquetTest_remappedIndexRetention_test.parquet").toString(); + final String destPath = + Path.of(rootFile.getPath(), "ParquetTest_remappedIndexRetention_test.parquet").toString(); final int tableSize = 10_000; final Table testTable = TableTools.emptyTable(tableSize).update( From 0e9c7787024a161065c2059b13b860373ef690fb Mon Sep 17 00:00:00 2001 From: Larry Booker Date: Tue, 7 Jan 2025 15:13:44 -0800 Subject: [PATCH 4/4] Updated tests for clarity and correctness. --- .../engine/table/impl/QueryTableTest.java | 26 +++++++----------- .../table/ParquetTableReadWriteTest.java | 27 +++++++------------ 2 files changed, 19 insertions(+), 34 deletions(-) diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableTest.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableTest.java index cd3f72df863..be2b9f54678 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableTest.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableTest.java @@ -967,22 +967,20 @@ public void testStringContainsFilter() { public void testIndexRetentionThroughGC() { final Table childTable; - try (final SafeCloseable scope = LivenessScopeStack.open()) { - + // We don't need this liveness scope for liveness management, but rather to opt out of the enclosing scope's + // enforceStrongReachability + try (final SafeCloseable ignored = LivenessScopeStack.open()) { + final Map retained = new HashMap<>(); final Random random = new Random(0); - final int size = 500; - - final ColumnInfo[] columnInfo; - QueryTable parentTable = getTable(false, size, random, - columnInfo = initColumnInfos(new String[] {"S1", "S2"}, + final QueryTable parentTable = getTable(false, size, random, + initColumnInfos(new String[] {"S1", "S2"}, new SetGenerator<>("aa", "bb", "cc", "dd", "AA", "BB", "CC", "DD"), new SetGenerator<>("aaa", "bbb", "ccc", "ddd", "AAA", "BBB", "CCC", "DDD"))); // Explicitly retain the index references. - DataIndex di1 = DataIndexer.getOrCreateDataIndex(parentTable, "S1"); - DataIndex di2 = DataIndexer.getOrCreateDataIndex(parentTable, "S2"); - + retained.put("di1", DataIndexer.getOrCreateDataIndex(parentTable, "S1")); + retained.put("di2", DataIndexer.getOrCreateDataIndex(parentTable, "S2")); childTable = parentTable.update("isEven = ii % 2 == 0"); // While retained, the indexes will survive GC @@ -991,24 +989,18 @@ public void testIndexRetentionThroughGC() { // While the references are held, the parent and child tables should have the indexes. Assert.assertTrue(DataIndexer.hasDataIndex(parentTable, "S1")); Assert.assertTrue(DataIndexer.hasDataIndex(parentTable, "S2")); - Assert.assertTrue(DataIndexer.hasDataIndex(childTable, "S1")); Assert.assertTrue(DataIndexer.hasDataIndex(childTable, "S2")); // Explicitly release the references. - parentTable = null; - di1 = null; - di2 = null; + retained.clear(); } - // After a GC, the child table should not have the indexes. System.gc(); - Assert.assertFalse(DataIndexer.hasDataIndex(childTable, "S1")); Assert.assertFalse(DataIndexer.hasDataIndex(childTable, "S2")); } - public void testStringMatchFilterIndexed() { // MatchFilters (currently) only use indexes on initial creation but this incremental test will recreate // index-enabled match filtered tables and compare them against incremental non-indexed filtered tables. diff --git a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java index 6ef5ff4545a..530be1c5d6b 100644 --- a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java +++ b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java @@ -344,27 +344,24 @@ public void vectorParquetFormat() { public void indexRetentionThroughGC() { final String destPath = Path.of(rootFile.getPath(), "ParquetTest_indexRetention_test").toString(); final int tableSize = 10_000; - final Table testTable = TableTools.emptyTable(tableSize).update( "symbol = randomInt(0,4)", "price = randomInt(0,10000) * 0.01", "str_id = `str_` + String.format(`%08d`, randomInt(0,1_000_000))", "indexed_val = ii % 10_000"); - final ParquetInstructions writeInstructions = ParquetInstructions.builder() .setGenerateMetadataFiles(true) .addIndexColumns("indexed_val") .build(); - final PartitionedTable partitionedTable = testTable.partitionBy("symbol"); ParquetTools.writeKeyValuePartitionedTable(partitionedTable, destPath, writeInstructions); - final Table child; - // Read from disk and validate the indexes through GC. - try (final SafeCloseable scope = LivenessScopeStack.open()) { + // We don't need this liveness scope for liveness management, but rather to opt out of the enclosing scope's + // enforceStrongReachability + try (final SafeCloseable ignored = LivenessScopeStack.open()) { + // Read from disk and validate the indexes through GC. Table parent = ParquetTools.readTable(destPath); - child = parent.update("new_val = indexed_val + 1") .update("new_val = new_val + 1") .update("new_val = new_val + 1") @@ -381,13 +378,12 @@ public void indexRetentionThroughGC() { Assert.eqTrue(DataIndexer.hasDataIndex(child, "symbol"), "hasDataIndex -> symbol"); Assert.eqTrue(DataIndexer.hasDataIndex(child, "indexed_val"), "hasDataIndex -> indexed_val"); - // Explicitly release the parent table to encourage GC. + // Force the parent to null to allow GC to collect it. parent = null; } // After a GC, the child table should still have access to the indexes. System.gc(); - Assert.eqTrue(DataIndexer.hasDataIndex(child, "symbol"), "hasDataIndex -> symbol"); Assert.eqTrue(DataIndexer.hasDataIndex(child, "indexed_val"), "hasDataIndex -> indexed_val"); } @@ -397,25 +393,23 @@ public void remappedIndexRetentionThroughGC() { final String destPath = Path.of(rootFile.getPath(), "ParquetTest_remappedIndexRetention_test.parquet").toString(); final int tableSize = 10_000; - final Table testTable = TableTools.emptyTable(tableSize).update( "symbol = randomInt(0,4)", "price = randomInt(0,10000) * 0.01", "str_id = `str_` + String.format(`%08d`, randomInt(0,1_000_000))", "indexed_val = ii % 10_000"); - final ParquetInstructions writeInstructions = ParquetInstructions.builder() .setGenerateMetadataFiles(true) .addIndexColumns("symbol") .addIndexColumns("indexed_val") .build(); - ParquetTools.writeTable(testTable, destPath, writeInstructions); - final Table child; - // Read from disk and validate the indexes through GC. - try (final SafeCloseable scope = LivenessScopeStack.open()) { + // We don't need this liveness scope for liveness management, but rather to opt out of the enclosing scope's + // enforceStrongReachability + try (final SafeCloseable ignored = LivenessScopeStack.open()) { + // Read from disk and validate the indexes through GC. Table parent = ParquetTools.readTable(destPath); // select() produces in-memory column sources, triggering the remapping of the indexes. @@ -432,13 +426,12 @@ public void remappedIndexRetentionThroughGC() { Assert.eqTrue(DataIndexer.hasDataIndex(child, "symbol"), "hasDataIndex -> symbol"); Assert.eqTrue(DataIndexer.hasDataIndex(child, "indexed_val"), "hasDataIndex -> indexed_val"); - // Explicitly release the parent table to encourage GC. + // Force the parent to null to allow GC to collect it. parent = null; } // After a GC, the child table should still have access to the indexes. System.gc(); - Assert.eqTrue(DataIndexer.hasDataIndex(child, "symbol"), "hasDataIndex -> symbol"); Assert.eqTrue(DataIndexer.hasDataIndex(child, "indexed_val"), "hasDataIndex -> indexed_val"); }