diff --git a/README.md b/README.md index 9609045..665e43c 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,7 @@ Install the package with: pip install vicinity ``` + The following code snippet demonstrates how to use Vicinity for nearest neighbor search: ```python import numpy as np @@ -68,7 +69,6 @@ vicinity = Vicinity.load('my_vector_store') Vicinity provides the following features: - Lightweight: Minimal dependencies and fast performance. - Flexible Backend Support: Use different backends for vector storage and search. -- Dynamic Updates: Insert and delete items in the vector store. - Serialization: Save and load vector stores for persistence. - Easy to Use: Simple and intuitive API. @@ -81,6 +81,8 @@ The following backends are supported: - [PYNNDescent](https://github.com/lmcinnes/pynndescent): ANN search using PyNNDescent. - [USEARCH](https://github.com/unum-cloud/usearch): ANN search using Usearch. This uses a highly optimized version of the HNSW algorithm. +NOTE: the ANN backends do not support dynamic deletion. To delete items, you need to recreate the index. Insertion is supported in the following backends: `FAISS`, `HNSW`, and `Usearch`. The `BASIC` backend supports both insertion and deletion. + ### Backend Parameters diff --git a/tests/test_vicinity.py b/tests/test_vicinity.py index 400a2ed..ea07e5e 100644 --- a/tests/test_vicinity.py +++ b/tests/test_vicinity.py @@ -156,6 +156,9 @@ def test_vicinity_delete_nonexistent(vicinity_instance: Vicinity) -> None: :param vicinity_instance: A Vicinity instance. :raises ValueError: If deleting items that do not exist. """ + if vicinity_instance.backend.backend_type != Backend.BASIC: + # Skip delete for non-basic backends + return with pytest.raises(ValueError): vicinity_instance.delete(["item10002"]) diff --git a/vicinity/backends/annoy.py b/vicinity/backends/annoy.py index 75a95ce..6a86f0b 100644 --- a/vicinity/backends/annoy.py +++ b/vicinity/backends/annoy.py @@ -113,11 +113,11 @@ def query(self, vectors: npt.NDArray, k: int) -> QueryResult: def insert(self, vectors: npt.NDArray) -> None: """Insert vectors into the backend.""" - raise NotImplementedError("Annoy does not support insertion.") + raise NotImplementedError("Insertion is not supported in ANNOY backend.") def delete(self, indices: list[int]) -> None: """Delete vectors from the backend.""" - raise NotImplementedError("Annoy does not support deletion.") + raise NotImplementedError("Deletion is not supported in ANNOY backend.") def threshold(self, vectors: npt.NDArray, threshold: float) -> list[npt.NDArray]: """Threshold the backend.""" diff --git a/vicinity/backends/faiss.py b/vicinity/backends/faiss.py index 35e0516..d499f17 100644 --- a/vicinity/backends/faiss.py +++ b/vicinity/backends/faiss.py @@ -124,10 +124,6 @@ def from_vectors( # noqa: C901 index.add(vectors) - # Enable DirectMap for IVF indexes so they can be used with delete - if isinstance(index, faiss.IndexIVF): - index.set_direct_map_type(faiss.DirectMap.Hashtable) - arguments = FaissArgs( dim=dim, index_type=index_type, @@ -169,17 +165,8 @@ def insert(self, vectors: npt.NDArray) -> None: self.index.add(vectors) def delete(self, indices: list[int]) -> None: - """Delete vectors from the backend, if supported.""" - if hasattr(self.index, "remove_ids"): - if isinstance(self.index, faiss.IndexIVF): - # Use IDSelectorArray for IVF indexes - id_selector = faiss.IDSelectorArray(np.array(indices, dtype=np.int64)) - else: - # Use IDSelectorBatch for other indexes - id_selector = faiss.IDSelectorBatch(np.array(indices, dtype=np.int64)) - self.index.remove_ids(id_selector) - else: - raise NotImplementedError("This FAISS index type does not support deletion.") + """Delete vectors from the backend.""" + raise NotImplementedError("Deletion is not supported in FAISS backends.") def threshold(self, vectors: npt.NDArray, threshold: float) -> list[npt.NDArray]: """Query vectors within a distance threshold, using range_search if supported.""" diff --git a/vicinity/backends/hnsw.py b/vicinity/backends/hnsw.py index 2bc55d3..827adab 100644 --- a/vicinity/backends/hnsw.py +++ b/vicinity/backends/hnsw.py @@ -87,8 +87,7 @@ def insert(self, vectors: npt.NDArray) -> None: def delete(self, indices: list[int]) -> None: """Delete vectors from the backend.""" - for index in indices: - self.index.mark_deleted(index) + raise NotImplementedError("Deletion is not supported in HNSW backend.") def threshold(self, vectors: npt.NDArray, threshold: float) -> list[npt.NDArray]: """Threshold the backend.""" diff --git a/vicinity/backends/pynndescent.py b/vicinity/backends/pynndescent.py index 07079bd..092a1b1 100644 --- a/vicinity/backends/pynndescent.py +++ b/vicinity/backends/pynndescent.py @@ -69,11 +69,11 @@ def query(self, vectors: npt.NDArray, k: int) -> QueryResult: def insert(self, vectors: npt.NDArray) -> None: """Insert vectors into the index (not supported by pynndescent).""" - raise NotImplementedError("Dynamic insertion is not supported by pynndescent.") + raise NotImplementedError("Insertion is not supported by pynndescent.") def delete(self, indices: list[int]) -> None: """Delete vectors from the index (not supported by pynndescent).""" - raise NotImplementedError("Dynamic deletion is not supported by pynndescent.") + raise NotImplementedError("Deletion is not supported in pynndescent backend.") def threshold(self, vectors: npt.NDArray, threshold: float) -> list[npt.NDArray]: """Find neighbors within a distance threshold."""