diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/FilterSelectColumn.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/FilterSelectColumn.java index 9ad5cec0d06..58898307b88 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/FilterSelectColumn.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/FilterSelectColumn.java @@ -113,6 +113,18 @@ private List checkForInvalidFilters() { "Cannot use a filter with virtual row variables (i, ii, or k) in select, view, update, or updateView: " + filter); } + if (filter.isRefreshing()) { + // TODO: DH-18052: updateView and view should support refreshing Filter Expressions + // + // This would enable us to use a whereIn or whereNotIn for things like conditional formatting; which could + // be attractive. However, a join or actualy wouldMatch gets you there without the additional complexity. + // + // Supporting this requires SelectColumn dependencies, which have not previously existed. Additionally, + // if we were to support these for select and update (as opposed to view and updateView), then the filter + // could require recomputing the entire result table whenever anything changes. + throw new UncheckedTableException( + "Cannot use a refreshing filter in select, view, update, or updateView: " + filter); + } return filter.getColumns(); } @@ -258,24 +270,50 @@ private void doFill(@NotNull final RowSequence rowSequence, final WritableChunk< final WritableObjectChunk booleanDestination = destination.asWritableObjectChunk(); booleanDestination.setSize(rowSequence.intSize()); final RowSet fullSet = usePrev ? tableToFilter.getRowSet().prev() : tableToFilter.getRowSet(); + try (final RowSet inputRowSet = rowSequence.asRowSet(); - final RowSet filtered = filter.filter(inputRowSet, fullSet, tableToFilter, usePrev); - final RowSet.Iterator inputIt = inputRowSet.iterator(); - final RowSet.Iterator trueIt = filtered.iterator()) { - long nextTrue = trueIt.hasNext() ? trueIt.nextLong() : -1; + final RowSet filtered = filter.filter(inputRowSet, fullSet, tableToFilter, usePrev)) { + if (filtered.size() == inputRowSet.size()) { + // if everything matches, short circuit the iteration + booleanDestination.fillWithValue(0, booleanDestination.size(), true); + return; + } + int offset = 0; - while (nextTrue >= 0) { - // the input iterator is a superset of the true iterator, so we can always find out what - // the next value is without needing to check hasNext - final long nextInput = inputIt.nextLong(); - final boolean found = nextInput == nextTrue; - booleanDestination.set(offset++, found); - if (found) { - nextTrue = trueIt.hasNext() ? trueIt.nextLong() : -1; + + try (final RowSet.Iterator inputRows = inputRowSet.iterator(); + final RowSet.Iterator trueRows = filtered.iterator()) { + long nextTrue = trueRows.hasNext() ? trueRows.nextLong() : -1; + while (nextTrue >= 0) { + // the input iterator is a superset of the true iterator, so we can always find out what + // the next value is without needing to check hasNext + final long nextInput = inputRows.nextLong(); + final boolean found = nextInput == nextTrue; + booleanDestination.set(offset++, found); + if (found) { + nextTrue = trueRows.hasNext() ? trueRows.nextLong() : -1; + } } } - // fill everything else up with false, because nothing else can match - booleanDestination.fillWithValue(offset, booleanDestination.size() - offset, false); + + /* + * This alternative formulation from Ryan is fairly close in terms of performance. It might be very + * slightly worse on the dense cases, and slightly better on the sparse cases. + */ + /* + * try (final RowSequence.Iterator inputRows = inputRowSet.getRowSequenceIterator(); final + * RowSet.Iterator trueRows = filtered.iterator()) { while (trueRows.hasNext()) { final long nextTrue = + * trueRows.nextLong(); // Find all the false rows between the last consumed input row and the next true + * row final int falsesSkipped = (int) inputRows.advanceAndGetPositionDistance(nextTrue + 1) - 1; if + * (falsesSkipped > 0) { booleanDestination.fillWithValue(offset, falsesSkipped, false); offset += + * falsesSkipped; } booleanDestination.set(offset++, true); } } + */ + + final int remainingFalses = booleanDestination.size() - offset; + // Fill everything else up with false, because we've exhausted the trues + if (remainingFalses > 0) { + booleanDestination.fillWithValue(offset, remainingFalses, false); + } } } } diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableSelectUpdateTest.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableSelectUpdateTest.java index cce17482150..10e1b015cf5 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableSelectUpdateTest.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableSelectUpdateTest.java @@ -3,15 +3,11 @@ // package io.deephaven.engine.table.impl; -import io.deephaven.api.ColumnName; -import io.deephaven.api.JoinMatch; -import io.deephaven.api.Selectable; -import io.deephaven.api.TableOperations; +import io.deephaven.api.*; import io.deephaven.api.filter.Filter; import io.deephaven.api.filter.FilterIn; import io.deephaven.api.literal.Literal; import io.deephaven.base.testing.BaseArrayTestCase; -import io.deephaven.chunk.Chunk; import io.deephaven.chunk.ObjectChunk; import io.deephaven.chunk.attributes.Values; import io.deephaven.configuration.Configuration; @@ -23,11 +19,7 @@ import io.deephaven.engine.primitive.iterator.CloseableIterator; import io.deephaven.engine.rowset.*; import io.deephaven.engine.table.*; -import io.deephaven.engine.table.impl.select.DhFormulaColumn; -import io.deephaven.engine.table.impl.select.FormulaCompilationException; -import io.deephaven.engine.table.impl.select.WhereFilterFactory; -import io.deephaven.engine.table.impl.select.SelectColumn; -import io.deephaven.engine.table.impl.select.SelectColumnFactory; +import io.deephaven.engine.table.impl.select.*; import io.deephaven.engine.table.impl.sources.InMemoryColumnSource; import io.deephaven.engine.table.impl.sources.LongSparseArraySource; import io.deephaven.engine.table.impl.sources.RedirectedColumnSource; @@ -51,10 +43,12 @@ import org.junit.Rule; import org.junit.Test; +import java.text.DecimalFormat; import java.util.*; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Supplier; import java.util.stream.Collectors; +import java.util.stream.IntStream; import static io.deephaven.engine.testutil.TstUtils.*; import static io.deephaven.engine.util.TableTools.*; @@ -1398,7 +1392,8 @@ public void testFilterExpressionGetPrev() { // with a chunk try (final ChunkSource.GetContext fc = resultColumn.makeGetContext(4)) { - final ObjectChunk prevValues = resultColumn.getPrevChunk(fc, prevRowset).asObjectChunk(); + final ObjectChunk prevValues = + resultColumn.getPrevChunk(fc, prevRowset).asObjectChunk(); assertEquals(false, prevValues.get(0)); assertEquals(true, prevValues.get(1)); assertEquals(false, prevValues.get(2)); @@ -1416,7 +1411,44 @@ public void testFilterExpressionGetPrev() { assertEquals(false, resultColumn.get(rs.get(3))); assertEquals(true, resultColumn.get(rs.get(4))); }); + } + @Test + public void testFilterExpressionFillChunkPerformance() { + testFilterExpressionFillChunkPerformance(1.0); + testFilterExpressionFillChunkPerformance(.9999); + testFilterExpressionFillChunkPerformance(.999); + testFilterExpressionFillChunkPerformance(.8725); + testFilterExpressionFillChunkPerformance(.75); + testFilterExpressionFillChunkPerformance(.5); + testFilterExpressionFillChunkPerformance(.25); + testFilterExpressionFillChunkPerformance(.125); + testFilterExpressionFillChunkPerformance(0.001); + testFilterExpressionFillChunkPerformance(0.0001); + } + + public void testFilterExpressionFillChunkPerformance(final double density) { + final int numIterations = 1; + final int size = 100_000; + final Filter filter = FilterIn.of(ColumnName.of("A"), Literal.of(1)); + + final Random random = new Random(20241120); + final List values = IntStream.range(0, size).mapToObj(ignored -> random.nextDouble() < density) + .collect(Collectors.toList()); + QueryScope.addParam("values", values); + final Table t = TableTools.emptyTable(size).update("A=(Boolean)(values[i]) ? 1: 0"); + QueryScope.addParam("values", null); + + final Table upv = t.updateView(List.of(Selectable.of(ColumnName.of("AWM"), filter))); + final long startTime = System.nanoTime(); + for (int iters = 0; iters < numIterations; ++iters) { + final long trueValues = upv.columnIterator("AWM").stream().filter(x -> (Boolean) x).count(); + assertEquals(values.stream().filter(x -> x).count(), trueValues); + } + final long endTime = System.nanoTime(); + final double duration = endTime - startTime; + System.out.println("Density: " + new DecimalFormat("0.0000").format(density) + ", Nanos: " + (long) duration + + ", per cell=" + new DecimalFormat("0.00").format(duration / (size * numIterations))); } @Test @@ -1424,6 +1456,8 @@ public void testFilterExpressionArray() { final Filter filter = WhereFilterFactory.getExpression("A=A_[i-1]"); final Filter filterArrayOnly = WhereFilterFactory.getExpression("A=A_.size() = 1"); final Filter filterKonly = WhereFilterFactory.getExpression("A=k+1"); + final QueryTable setTable = TstUtils.testRefreshingTable(intCol("B")); + final Filter whereIn = new DynamicWhereFilter(setTable, true, MatchPairFactory.getExpression("A=B")); final QueryTable table = TstUtils.testRefreshingTable(intCol("A", 1, 1, 2, 3, 5, 8, 9, 9)); final UncheckedTableException wme = Assert.assertThrows(UncheckedTableException.class, @@ -1446,7 +1480,7 @@ public void testFilterExpressionArray() { () -> table.updateView(List.of(Selectable.of(ColumnName.of("AWM"), filter)))); Assert.assertEquals( "Cannot use a filter with column Vectors (_ syntax) in select, view, update, or updateView: A=A_[i-1]", - upe.getMessage()); + uve.getMessage()); final UncheckedTableException se = Assert.assertThrows(UncheckedTableException.class, () -> table.select(List.of(Selectable.of(ColumnName.of("AWM"), filterKonly)))); @@ -1460,6 +1494,12 @@ public void testFilterExpressionArray() { "Cannot use a filter with virtual row variables (i, ii, or k) in select, view, update, or updateView: A=k+1", ve.getMessage()); + final UncheckedTableException dw = Assert.assertThrows(UncheckedTableException.class, + () -> table.view(List.of(Selectable.of(ColumnName.of("AWM"), whereIn)))); + Assert.assertEquals( + "Cannot use a refreshing filter in select, view, update, or updateView: DynamicWhereFilter([A=B])", + dw.getMessage()); + } @Test