Skip to content

Commit

Permalink
[BUG]: fix binary search impl of LimitOperator (#3489)
Browse files Browse the repository at this point in the history
  • Loading branch information
codetheweb authored Jan 15, 2025
1 parent 4442978 commit babded1
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 2 deletions.
34 changes: 33 additions & 1 deletion chromadb/test/property/test_filtering.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Any, Dict, List, Optional, cast
from hypothesis import given, settings, HealthCheck
import uuid
from hypothesis import example, given, settings, HealthCheck
import pytest
from chromadb.api import ClientAPI
from chromadb.test.property import invariants
Expand Down Expand Up @@ -230,6 +231,37 @@ def test_filterable_metadata_get(
offset=st.integers(min_value=0, max_value=10),
should_compact=st.booleans(),
)
# Repro of a former off-by-one error in distributed Chroma. Fixed in https://github.com/chroma-core/chroma/pull/3489.
@example(
collection=strategies.Collection(
name="test",
metadata={"test": "test"},
embedding_function=None,
id=uuid.uuid4(),
dimension=2,
dtype="float32",
known_metadata_keys={},
known_document_keywords=[],
),
record_set=strategies.RecordSet(
ids=[str(i) for i in range(11)],
embeddings=[np.random.rand(2).tolist() for _ in range(11)],
metadatas=[{"test": "test"} for _ in range(11)],
documents=None,
),
filters=[
strategies.Filter(
{
"where_document": {"$not_contains": "foo"},
"ids": None,
"where": None,
}
)
],
limit=10,
offset=10,
should_compact=True,
)
def test_filterable_metadata_get_limit_offset(
caplog,
client: ClientAPI,
Expand Down
23 changes: 22 additions & 1 deletion rust/worker/src/execution/operators/limit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,10 @@ impl SeekScanner<'_> {
size -= half;
}

Ok(base)
// The above loop tests all midpoints. However, it does not test the very last element.
// We want the greatest offset such that self.join_rank(offset) <= skip, so we need to test the last element as well.
let cmp = self.joint_rank(base).await?.cmp(&skip);
Ok(base + (cmp == Ordering::Less) as u32)
}

// Seek the start in the log and record segment, then scan for the specified number of offset ids
Expand Down Expand Up @@ -421,4 +424,22 @@ mod tests {
.collect()
);
}

#[tokio::test]
async fn test_returns_last_offset() {
let limit_input =
setup_limit_input(SignedRoaringBitmap::empty(), SignedRoaringBitmap::full()).await;

let limit_operator = LimitOperator {
skip: 99,
fetch: Some(1),
};

let limit_output = limit_operator
.run(&limit_input)
.await
.expect("LimitOperator should not fail");

assert_eq!(limit_output.offset_ids, (100..=100).collect());
}
}

0 comments on commit babded1

Please sign in to comment.