Skip to content

Commit

Permalink
Updated tests for clarity and correctness.
Browse files Browse the repository at this point in the history
  • Loading branch information
lbooker42 committed Jan 7, 2025
1 parent 08e1b51 commit 0e9c778
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -967,22 +967,20 @@ public void testStringContainsFilter() {
public void testIndexRetentionThroughGC() {
final Table childTable;

try (final SafeCloseable scope = LivenessScopeStack.open()) {

// We don't need this liveness scope for liveness management, but rather to opt out of the enclosing scope's
// enforceStrongReachability
try (final SafeCloseable ignored = LivenessScopeStack.open()) {
final Map<String, Object> retained = new HashMap<>();
final Random random = new Random(0);

final int size = 500;

final ColumnInfo<?, ?>[] columnInfo;
QueryTable parentTable = getTable(false, size, random,
columnInfo = initColumnInfos(new String[] {"S1", "S2"},
final QueryTable parentTable = getTable(false, size, random,
initColumnInfos(new String[] {"S1", "S2"},
new SetGenerator<>("aa", "bb", "cc", "dd", "AA", "BB", "CC", "DD"),
new SetGenerator<>("aaa", "bbb", "ccc", "ddd", "AAA", "BBB", "CCC", "DDD")));

// Explicitly retain the index references.
DataIndex di1 = DataIndexer.getOrCreateDataIndex(parentTable, "S1");
DataIndex di2 = DataIndexer.getOrCreateDataIndex(parentTable, "S2");

retained.put("di1", DataIndexer.getOrCreateDataIndex(parentTable, "S1"));
retained.put("di2", DataIndexer.getOrCreateDataIndex(parentTable, "S2"));
childTable = parentTable.update("isEven = ii % 2 == 0");

// While retained, the indexes will survive GC
Expand All @@ -991,24 +989,18 @@ public void testIndexRetentionThroughGC() {
// While the references are held, the parent and child tables should have the indexes.
Assert.assertTrue(DataIndexer.hasDataIndex(parentTable, "S1"));
Assert.assertTrue(DataIndexer.hasDataIndex(parentTable, "S2"));

Assert.assertTrue(DataIndexer.hasDataIndex(childTable, "S1"));
Assert.assertTrue(DataIndexer.hasDataIndex(childTable, "S2"));

// Explicitly release the references.
parentTable = null;
di1 = null;
di2 = null;
retained.clear();
}

// After a GC, the child table should not have the indexes.
System.gc();

Assert.assertFalse(DataIndexer.hasDataIndex(childTable, "S1"));
Assert.assertFalse(DataIndexer.hasDataIndex(childTable, "S2"));
}


public void testStringMatchFilterIndexed() {
// MatchFilters (currently) only use indexes on initial creation but this incremental test will recreate
// index-enabled match filtered tables and compare them against incremental non-indexed filtered tables.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -344,27 +344,24 @@ public void vectorParquetFormat() {
public void indexRetentionThroughGC() {
final String destPath = Path.of(rootFile.getPath(), "ParquetTest_indexRetention_test").toString();
final int tableSize = 10_000;

final Table testTable = TableTools.emptyTable(tableSize).update(
"symbol = randomInt(0,4)",
"price = randomInt(0,10000) * 0.01",
"str_id = `str_` + String.format(`%08d`, randomInt(0,1_000_000))",
"indexed_val = ii % 10_000");

final ParquetInstructions writeInstructions = ParquetInstructions.builder()
.setGenerateMetadataFiles(true)
.addIndexColumns("indexed_val")
.build();

final PartitionedTable partitionedTable = testTable.partitionBy("symbol");
ParquetTools.writeKeyValuePartitionedTable(partitionedTable, destPath, writeInstructions);

final Table child;

// Read from disk and validate the indexes through GC.
try (final SafeCloseable scope = LivenessScopeStack.open()) {
// We don't need this liveness scope for liveness management, but rather to opt out of the enclosing scope's
// enforceStrongReachability
try (final SafeCloseable ignored = LivenessScopeStack.open()) {
// Read from disk and validate the indexes through GC.
Table parent = ParquetTools.readTable(destPath);

child = parent.update("new_val = indexed_val + 1")
.update("new_val = new_val + 1")
.update("new_val = new_val + 1")
Expand All @@ -381,13 +378,12 @@ public void indexRetentionThroughGC() {
Assert.eqTrue(DataIndexer.hasDataIndex(child, "symbol"), "hasDataIndex -> symbol");
Assert.eqTrue(DataIndexer.hasDataIndex(child, "indexed_val"), "hasDataIndex -> indexed_val");

// Explicitly release the parent table to encourage GC.
// Force the parent to null to allow GC to collect it.
parent = null;
}

// After a GC, the child table should still have access to the indexes.
System.gc();

Assert.eqTrue(DataIndexer.hasDataIndex(child, "symbol"), "hasDataIndex -> symbol");
Assert.eqTrue(DataIndexer.hasDataIndex(child, "indexed_val"), "hasDataIndex -> indexed_val");
}
Expand All @@ -397,25 +393,23 @@ public void remappedIndexRetentionThroughGC() {
final String destPath =
Path.of(rootFile.getPath(), "ParquetTest_remappedIndexRetention_test.parquet").toString();
final int tableSize = 10_000;

final Table testTable = TableTools.emptyTable(tableSize).update(
"symbol = randomInt(0,4)",
"price = randomInt(0,10000) * 0.01",
"str_id = `str_` + String.format(`%08d`, randomInt(0,1_000_000))",
"indexed_val = ii % 10_000");

final ParquetInstructions writeInstructions = ParquetInstructions.builder()
.setGenerateMetadataFiles(true)
.addIndexColumns("symbol")
.addIndexColumns("indexed_val")
.build();

ParquetTools.writeTable(testTable, destPath, writeInstructions);

final Table child;

// Read from disk and validate the indexes through GC.
try (final SafeCloseable scope = LivenessScopeStack.open()) {
// We don't need this liveness scope for liveness management, but rather to opt out of the enclosing scope's
// enforceStrongReachability
try (final SafeCloseable ignored = LivenessScopeStack.open()) {
// Read from disk and validate the indexes through GC.
Table parent = ParquetTools.readTable(destPath);

// select() produces in-memory column sources, triggering the remapping of the indexes.
Expand All @@ -432,13 +426,12 @@ public void remappedIndexRetentionThroughGC() {
Assert.eqTrue(DataIndexer.hasDataIndex(child, "symbol"), "hasDataIndex -> symbol");
Assert.eqTrue(DataIndexer.hasDataIndex(child, "indexed_val"), "hasDataIndex -> indexed_val");

// Explicitly release the parent table to encourage GC.
// Force the parent to null to allow GC to collect it.
parent = null;
}

// After a GC, the child table should still have access to the indexes.
System.gc();

Assert.eqTrue(DataIndexer.hasDataIndex(child, "symbol"), "hasDataIndex -> symbol");
Assert.eqTrue(DataIndexer.hasDataIndex(child, "indexed_val"), "hasDataIndex -> indexed_val");
}
Expand Down

0 comments on commit 0e9c778

Please sign in to comment.