-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Rajvaibhav Rahane <[email protected]>
- Loading branch information
Showing
11 changed files
with
309 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
22 changes: 22 additions & 0 deletions
22
remote_vector_index_builder/core/common/models/index_builder/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# Copyright OpenSearch Contributors | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# The OpenSearch Contributors require contributions made to | ||
# this file be licensed under the Apache-2.0 license or a | ||
# compatible open source license. | ||
|
||
from .gpu_index_cagra_config import GPUIndexCagraConfig | ||
from .ivf_pq_search_cagra_config import IVFPQSearchCagraConfig | ||
from .ivf_pq_build_cagra_config import IVFPQBuildCagraConfig | ||
from .index_hnsw_cagra_config import IndexHNSWCagraConfig | ||
from .gpu_index_build_config import GPUIndexBuildConfig | ||
from .graph_build_algo import GraphBuildAlgo | ||
|
||
__all__ = [ | ||
"GPUIndexCagraConfig", | ||
"IVFPQSearchCagraConfig", | ||
"IVFPQBuildCagraConfig", | ||
"IndexHNSWCagraConfig", | ||
"GPUIndexBuildConfig", | ||
"GraphBuildAlgo", | ||
] |
26 changes: 26 additions & 0 deletions
26
remote_vector_index_builder/core/common/models/index_builder/gpu_index_build_config.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# Copyright OpenSearch Contributors | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# The OpenSearch Contributors require contributions made to | ||
# this file be licensed under the Apache-2.0 license or a | ||
# compatible open source license. | ||
|
||
from dataclasses import dataclass, field | ||
|
||
from ..index_build_parameters import SpaceType | ||
|
||
from .gpu_index_cagra_config import GPUIndexCagraConfig | ||
from .index_hnsw_cagra_config import IndexHNSWCagraConfig | ||
|
||
|
||
@dataclass | ||
class GPUIndexBuildConfig: | ||
index_hnsw_cagra_config: IndexHNSWCagraConfig = field( | ||
default_factory=IndexHNSWCagraConfig | ||
) | ||
gpu_index_cagra_config: GPUIndexCagraConfig = field( | ||
default_factory=GPUIndexCagraConfig | ||
) | ||
|
||
# type of metric the gpuIndex is created with | ||
metric: SpaceType = SpaceType.L2 |
34 changes: 34 additions & 0 deletions
34
remote_vector_index_builder/core/common/models/index_builder/gpu_index_cagra_config.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# Copyright OpenSearch Contributors | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# The OpenSearch Contributors require contributions made to | ||
# this file be licensed under the Apache-2.0 license or a | ||
# compatible open source license. | ||
|
||
from dataclasses import dataclass, field | ||
|
||
from .graph_build_algo import GraphBuildAlgo | ||
from .ivf_pq_build_cagra_config import IVFPQBuildCagraConfig | ||
from .ivf_pq_search_cagra_config import IVFPQSearchCagraConfig | ||
|
||
|
||
@dataclass | ||
class GPUIndexCagraConfig: | ||
# Degree of input graph for pruning | ||
intermediate_graph_degree: int = 64 | ||
# Degree of output graph | ||
graph_degree: int = 32 | ||
# ANN Algorithm to build the knn graph | ||
graph_build_algo: GraphBuildAlgo = GraphBuildAlgo.IVF_PQ | ||
|
||
store_dataset: bool = False | ||
# GPU Device on which the index is resident | ||
device: int = 0 | ||
|
||
ivf_pq_build_config: IVFPQBuildCagraConfig = field( | ||
default_factory=IVFPQBuildCagraConfig | ||
) | ||
|
||
ivf_pq_search_config: IVFPQSearchCagraConfig = field( | ||
default_factory=IVFPQSearchCagraConfig | ||
) |
13 changes: 13 additions & 0 deletions
13
remote_vector_index_builder/core/common/models/index_builder/graph_build_algo.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# Copyright OpenSearch Contributors | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# The OpenSearch Contributors require contributions made to | ||
# this file be licensed under the Apache-2.0 license or a | ||
# compatible open source license. | ||
|
||
from enum import Enum | ||
|
||
|
||
class GraphBuildAlgo(Enum): | ||
IVF_PQ = "IVF_PQ" | ||
NN_DESCENT = "NN_DESCENT" |
28 changes: 28 additions & 0 deletions
28
remote_vector_index_builder/core/common/models/index_builder/index_hnsw_cagra_config.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# Copyright OpenSearch Contributors | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# The OpenSearch Contributors require contributions made to | ||
# this file be licensed under the Apache-2.0 license or a | ||
# compatible open source license. | ||
|
||
from dataclasses import dataclass | ||
|
||
|
||
@dataclass | ||
class IndexHNSWCagraConfig: | ||
# expansion factor at search time | ||
ef_search: int = 256 | ||
|
||
# expansion factor at construction time | ||
ef_construction: int = 40 | ||
|
||
# When set to true, the index is immutable. | ||
# This option is used to copy the knn graph from GpuIndexCagra | ||
# to the base level of IndexHNSWCagra without adding upper levels. | ||
# Doing so enables to search the HNSW index, but removes the | ||
# ability to add vectors. | ||
base_level_only: bool = True | ||
|
||
# Set to true to delete internal storage:Index variable | ||
# when destructor is called | ||
own_fields: bool = True |
50 changes: 50 additions & 0 deletions
50
remote_vector_index_builder/core/common/models/index_builder/ivf_pq_build_cagra_config.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# Copyright OpenSearch Contributors | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# The OpenSearch Contributors require contributions made to | ||
# this file be licensed under the Apache-2.0 license or a | ||
# compatible open source license. | ||
|
||
from dataclasses import dataclass | ||
|
||
|
||
@dataclass | ||
class IVFPQBuildCagraConfig: | ||
# The number of inverted lists (clusters) | ||
# Hint: the number of vectors per cluster (`n_rows/n_lists`) should be | ||
# approximately 1,000 to 10,000. | ||
n_lists: int = 1000 | ||
|
||
# The number of iterations searching for kmeans centers (index building). | ||
kmeans_n_iters: int = 10 | ||
# The fraction of data to use during iterative kmeans building. | ||
kmeans_trainset_fraction: float = 0.1 | ||
|
||
# The bit length of the vector element after compression by PQ. | ||
# Possible values: [4, 5, 6, 7, 8]. | ||
# Hint: the smaller the 'pq_bits', the smaller the index size and the | ||
# better the search performance, but the lower the recall. | ||
pq_bits: int = 8 | ||
|
||
# The dimensionality of the vector after compression by PQ. When zero, an | ||
# optimal value is selected using a heuristic. | ||
# pq_bits` must be a multiple of 8. | ||
# Hint: a smaller 'pq_dim' results in a smaller index size and better | ||
# search performance, but lower recall. If 'pq_bits' is 8, 'pq_dim' can be | ||
# set to any number, but multiple of 8 are desirable for good performance. | ||
# If 'pq_bits' is not 8, 'pq_dim' should be a multiple of 8. For good | ||
# performance, it is desirable that 'pq_dim' is a multiple of 32 | ||
# Ideally 'pq_dim' should be also a divisor of the dataset dim. | ||
pq_dim: int = 16 | ||
|
||
# By default, the algorithm allocates more space than necessary for | ||
# individual clusters | ||
# This allows to amortize the cost of memory allocation and | ||
# reduce the number of data copies during repeated calls to `extend` | ||
# (extending the database). | ||
# | ||
# The alternative is the conservative allocation behavior; when enabled, | ||
# the algorithm always allocates the minimum amount of memory required to | ||
# store the given number of records. Set this flag to `true` if you prefer | ||
# to use as little GPU memory for the database as possible. | ||
conservative_memory_allocation: bool = True |
14 changes: 14 additions & 0 deletions
14
remote_vector_index_builder/core/common/models/index_builder/ivf_pq_search_cagra_config.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
# Copyright OpenSearch Contributors | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# The OpenSearch Contributors require contributions made to | ||
# this file be licensed under the Apache-2.0 license or a | ||
# compatible open source license. | ||
|
||
from dataclasses import dataclass | ||
|
||
|
||
@dataclass | ||
class IVFPQSearchCagraConfig: | ||
# The number of clusters to search. | ||
n_probes: int = 30 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# Copyright OpenSearch Contributors | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# The OpenSearch Contributors require contributions made to | ||
# this file be licensed under the Apache-2.0 license or a | ||
# compatible open source license. |
73 changes: 73 additions & 0 deletions
73
remote_vector_index_builder/core/index_builder/index_config_builder.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
from typing import Any, Dict, Optional | ||
from remote_vector_index_builder.core.common.models import ( | ||
IndexHNSWCagraConfig, | ||
GPUIndexCagraConfig, | ||
SpaceType, | ||
IVFPQBuildCagraConfig, | ||
IVFPQSearchCagraConfig, | ||
GraphBuildAlgo, | ||
GPUIndexBuildConfig, | ||
) | ||
|
||
|
||
class IndexConfigBuilder: | ||
def __init__(self): | ||
self._hnsw_config: Optional[IndexHNSWCagraConfig] = None | ||
self._gpu_config: Optional[GPUIndexCagraConfig] = None | ||
self._metric: SpaceType = SpaceType("l2") # default metric | ||
|
||
def set_hnsw_config(self, params: Dict[str, Any]) -> "IndexConfigBuilder": | ||
self._hnsw_config = ( | ||
IndexHNSWCagraConfig(**params) if params else IndexHNSWCagraConfig() | ||
) | ||
return self | ||
|
||
def set_gpu_config(self, params: Dict[str, Any]) -> "IndexConfigBuilder": | ||
if not params: | ||
self._gpu_config = GPUIndexCagraConfig() | ||
return self | ||
|
||
ivf_pq_build_params = params.pop("ivf_pq_build_params", None) | ||
ivf_pq_build_config = ( | ||
IVFPQBuildCagraConfig(**ivf_pq_build_params) | ||
if ivf_pq_build_params | ||
else IVFPQBuildCagraConfig() | ||
) | ||
|
||
ivf_pq_search_params = params.pop("ivf_pq_search_params", None) | ||
ivf_pq_search_config = ( | ||
IVFPQSearchCagraConfig(**ivf_pq_search_params) | ||
if ivf_pq_search_params | ||
else IVFPQSearchCagraConfig() | ||
) | ||
|
||
graph_build_algo_param = params.pop("graph_build_algo", None) | ||
graph_build_algo = ( | ||
GraphBuildAlgo(graph_build_algo_param) | ||
if graph_build_algo_param | ||
else GraphBuildAlgo.IVF_PQ | ||
) | ||
|
||
self._gpu_config = GPUIndexCagraConfig( | ||
**params, | ||
graph_build_algo=graph_build_algo, | ||
ivf_pq_build_config=ivf_pq_build_config, | ||
ivf_pq_search_config=ivf_pq_search_config | ||
) | ||
return self | ||
|
||
def set_metric(self, metric: str) -> "IndexConfigBuilder": | ||
self._metric = SpaceType(metric) | ||
return self | ||
|
||
def build(self) -> GPUIndexBuildConfig: | ||
if not self._hnsw_config: | ||
self._hnsw_config = IndexHNSWCagraConfig() | ||
if not self._gpu_config: | ||
self._gpu_config = GPUIndexCagraConfig() | ||
|
||
return GPUIndexBuildConfig( | ||
index_hnsw_cagra_config=self._hnsw_config, | ||
gpu_index_cagra_config=self._gpu_config, | ||
metric=self._metric, | ||
) |
22 changes: 22 additions & 0 deletions
22
remote_vector_index_builder/core/index_builder/index_config_director.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
from typing import Dict, Any | ||
from remote_vector_index_builder.core.common.models.index_builder.gpu_index_build_config import ( | ||
GPUIndexBuildConfig, | ||
) | ||
from remote_vector_index_builder.core.index_builder.index_config_builder import ( | ||
IndexConfigBuilder, | ||
) | ||
|
||
|
||
class IndexConfigDirector: | ||
"""Director class to construct index configurations using the builder""" | ||
|
||
def __init__(self, builder: IndexConfigBuilder): | ||
self._builder = builder | ||
|
||
def construct_config(self, config_params: Dict[str, Any]) -> GPUIndexBuildConfig: | ||
return ( | ||
self._builder.set_hnsw_config(config_params.get("hnsw_config", {})) | ||
.set_gpu_config(config_params.get("gpu_config", {})) | ||
.set_metric(config_params.get("metric", "l2")) | ||
.build() | ||
) |