Skip to content

Commit

Permalink
Depcrecated deletion support for ANN backends
Browse files Browse the repository at this point in the history
  • Loading branch information
Pringled committed Nov 19, 2024
1 parent 031db4f commit b451458
Show file tree
Hide file tree
Showing 6 changed files with 13 additions and 22 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ Install the package with:
pip install vicinity
```


The following code snippet demonstrates how to use Vicinity for nearest neighbor search:
```python
import numpy as np
Expand Down Expand Up @@ -68,7 +69,6 @@ vicinity = Vicinity.load('my_vector_store')
Vicinity provides the following features:
- Lightweight: Minimal dependencies and fast performance.
- Flexible Backend Support: Use different backends for vector storage and search.
- Dynamic Updates: Insert and delete items in the vector store.
- Serialization: Save and load vector stores for persistence.
- Easy to Use: Simple and intuitive API.

Expand All @@ -81,6 +81,8 @@ The following backends are supported:
- [PYNNDescent](https://github.com/lmcinnes/pynndescent): ANN search using PyNNDescent.
- [USEARCH](https://github.com/unum-cloud/usearch): ANN search using Usearch. This uses a highly optimized version of the HNSW algorithm.

NOTE: the ANN backends do not support dynamic deletion. To delete items, you need to recreate the index. Insertion is supported in the following backends: `FAISS`, `HNSW`, and `Usearch`. The `BASIC` backend supports both insertion and deletion.

### Backend Parameters


Expand Down
3 changes: 3 additions & 0 deletions tests/test_vicinity.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,9 @@ def test_vicinity_delete_nonexistent(vicinity_instance: Vicinity) -> None:
:param vicinity_instance: A Vicinity instance.
:raises ValueError: If deleting items that do not exist.
"""
if vicinity_instance.backend.backend_type != Backend.BASIC:
# Skip delete for non-basic backends
return
with pytest.raises(ValueError):
vicinity_instance.delete(["item10002"])

Expand Down
4 changes: 2 additions & 2 deletions vicinity/backends/annoy.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,11 @@ def query(self, vectors: npt.NDArray, k: int) -> QueryResult:

def insert(self, vectors: npt.NDArray) -> None:
"""Insert vectors into the backend."""
raise NotImplementedError("Annoy does not support insertion.")
raise NotImplementedError("Insertion is not supported in ANNOY backend.")

def delete(self, indices: list[int]) -> None:
"""Delete vectors from the backend."""
raise NotImplementedError("Annoy does not support deletion.")
raise NotImplementedError("Deletion is not supported in ANNOY backend.")

def threshold(self, vectors: npt.NDArray, threshold: float) -> list[npt.NDArray]:
"""Threshold the backend."""
Expand Down
17 changes: 2 additions & 15 deletions vicinity/backends/faiss.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,6 @@ def from_vectors( # noqa: C901

index.add(vectors)

# Enable DirectMap for IVF indexes so they can be used with delete
if isinstance(index, faiss.IndexIVF):
index.set_direct_map_type(faiss.DirectMap.Hashtable)

arguments = FaissArgs(
dim=dim,
index_type=index_type,
Expand Down Expand Up @@ -169,17 +165,8 @@ def insert(self, vectors: npt.NDArray) -> None:
self.index.add(vectors)

def delete(self, indices: list[int]) -> None:
"""Delete vectors from the backend, if supported."""
if hasattr(self.index, "remove_ids"):
if isinstance(self.index, faiss.IndexIVF):
# Use IDSelectorArray for IVF indexes
id_selector = faiss.IDSelectorArray(np.array(indices, dtype=np.int64))
else:
# Use IDSelectorBatch for other indexes
id_selector = faiss.IDSelectorBatch(np.array(indices, dtype=np.int64))
self.index.remove_ids(id_selector)
else:
raise NotImplementedError("This FAISS index type does not support deletion.")
"""Delete vectors from the backend."""
raise NotImplementedError("Deletion is not supported in FAISS backends.")

def threshold(self, vectors: npt.NDArray, threshold: float) -> list[npt.NDArray]:
"""Query vectors within a distance threshold, using range_search if supported."""
Expand Down
3 changes: 1 addition & 2 deletions vicinity/backends/hnsw.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,7 @@ def insert(self, vectors: npt.NDArray) -> None:

def delete(self, indices: list[int]) -> None:
"""Delete vectors from the backend."""
for index in indices:
self.index.mark_deleted(index)
raise NotImplementedError("Deletion is not supported in HNSW backend.")

def threshold(self, vectors: npt.NDArray, threshold: float) -> list[npt.NDArray]:
"""Threshold the backend."""
Expand Down
4 changes: 2 additions & 2 deletions vicinity/backends/pynndescent.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,11 @@ def query(self, vectors: npt.NDArray, k: int) -> QueryResult:

def insert(self, vectors: npt.NDArray) -> None:
"""Insert vectors into the index (not supported by pynndescent)."""
raise NotImplementedError("Dynamic insertion is not supported by pynndescent.")
raise NotImplementedError("Insertion is not supported by pynndescent.")

def delete(self, indices: list[int]) -> None:
"""Delete vectors from the index (not supported by pynndescent)."""
raise NotImplementedError("Dynamic deletion is not supported by pynndescent.")
raise NotImplementedError("Deletion is not supported in pynndescent backend.")

def threshold(self, vectors: npt.NDArray, threshold: float) -> list[npt.NDArray]:
"""Find neighbors within a distance threshold."""
Expand Down

0 comments on commit b451458

Please sign in to comment.