Skip to content

Commit

Permalink
Add GPU Index Config Files
Browse files Browse the repository at this point in the history
Signed-off-by: Rajvaibhav Rahane <[email protected]>
  • Loading branch information
Rajrahane committed Mar 5, 2025
1 parent 1f19156 commit 4a6705b
Show file tree
Hide file tree
Showing 11 changed files with 309 additions and 0 deletions.
21 changes: 21 additions & 0 deletions remote_vector_index_builder/core/common/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,24 @@
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.

from .index_build_parameters import SpaceType
from .index_builder.gpu_index_cagra_config import (
GPUIndexCagraConfig,
IVFPQSearchCagraConfig,
IVFPQBuildCagraConfig,
)
from .index_builder.index_hnsw_cagra_config import IndexHNSWCagraConfig
from .index_builder.gpu_index_build_config import GPUIndexBuildConfig

from .index_builder.graph_build_algo import GraphBuildAlgo

__all__ = [
"SpaceType",
"GPUIndexCagraConfig",
"IVFPQSearchCagraConfig",
"IVFPQBuildCagraConfig",
"IndexHNSWCagraConfig",
"GPUIndexBuildConfig",
"GraphBuildAlgo",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.

from .gpu_index_cagra_config import GPUIndexCagraConfig
from .ivf_pq_search_cagra_config import IVFPQSearchCagraConfig
from .ivf_pq_build_cagra_config import IVFPQBuildCagraConfig
from .index_hnsw_cagra_config import IndexHNSWCagraConfig
from .gpu_index_build_config import GPUIndexBuildConfig
from .graph_build_algo import GraphBuildAlgo

__all__ = [
"GPUIndexCagraConfig",
"IVFPQSearchCagraConfig",
"IVFPQBuildCagraConfig",
"IndexHNSWCagraConfig",
"GPUIndexBuildConfig",
"GraphBuildAlgo",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.

from dataclasses import dataclass, field

from ..index_build_parameters import SpaceType

from .gpu_index_cagra_config import GPUIndexCagraConfig
from .index_hnsw_cagra_config import IndexHNSWCagraConfig


@dataclass
class GPUIndexBuildConfig:
index_hnsw_cagra_config: IndexHNSWCagraConfig = field(
default_factory=IndexHNSWCagraConfig
)
gpu_index_cagra_config: GPUIndexCagraConfig = field(
default_factory=GPUIndexCagraConfig
)

# type of metric the gpuIndex is created with
metric: SpaceType = SpaceType.L2
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.

from dataclasses import dataclass, field

from .graph_build_algo import GraphBuildAlgo
from .ivf_pq_build_cagra_config import IVFPQBuildCagraConfig
from .ivf_pq_search_cagra_config import IVFPQSearchCagraConfig


@dataclass
class GPUIndexCagraConfig:
# Degree of input graph for pruning
intermediate_graph_degree: int = 64
# Degree of output graph
graph_degree: int = 32
# ANN Algorithm to build the knn graph
graph_build_algo: GraphBuildAlgo = GraphBuildAlgo.IVF_PQ

store_dataset: bool = False
# GPU Device on which the index is resident
device: int = 0

ivf_pq_build_config: IVFPQBuildCagraConfig = field(
default_factory=IVFPQBuildCagraConfig
)

ivf_pq_search_config: IVFPQSearchCagraConfig = field(
default_factory=IVFPQSearchCagraConfig
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.

from enum import Enum


class GraphBuildAlgo(Enum):
IVF_PQ = "IVF_PQ"
NN_DESCENT = "NN_DESCENT"
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.

from dataclasses import dataclass


@dataclass
class IndexHNSWCagraConfig:
# expansion factor at search time
ef_search: int = 256

# expansion factor at construction time
ef_construction: int = 40

# When set to true, the index is immutable.
# This option is used to copy the knn graph from GpuIndexCagra
# to the base level of IndexHNSWCagra without adding upper levels.
# Doing so enables to search the HNSW index, but removes the
# ability to add vectors.
base_level_only: bool = True

# Set to true to delete internal storage:Index variable
# when destructor is called
own_fields: bool = True
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.

from dataclasses import dataclass


@dataclass
class IVFPQBuildCagraConfig:
# The number of inverted lists (clusters)
# Hint: the number of vectors per cluster (`n_rows/n_lists`) should be
# approximately 1,000 to 10,000.
n_lists: int = 1000

# The number of iterations searching for kmeans centers (index building).
kmeans_n_iters: int = 10
# The fraction of data to use during iterative kmeans building.
kmeans_trainset_fraction: float = 0.1

# The bit length of the vector element after compression by PQ.
# Possible values: [4, 5, 6, 7, 8].
# Hint: the smaller the 'pq_bits', the smaller the index size and the
# better the search performance, but the lower the recall.
pq_bits: int = 8

# The dimensionality of the vector after compression by PQ. When zero, an
# optimal value is selected using a heuristic.
# pq_bits` must be a multiple of 8.
# Hint: a smaller 'pq_dim' results in a smaller index size and better
# search performance, but lower recall. If 'pq_bits' is 8, 'pq_dim' can be
# set to any number, but multiple of 8 are desirable for good performance.
# If 'pq_bits' is not 8, 'pq_dim' should be a multiple of 8. For good
# performance, it is desirable that 'pq_dim' is a multiple of 32
# Ideally 'pq_dim' should be also a divisor of the dataset dim.
pq_dim: int = 16

# By default, the algorithm allocates more space than necessary for
# individual clusters
# This allows to amortize the cost of memory allocation and
# reduce the number of data copies during repeated calls to `extend`
# (extending the database).
#
# The alternative is the conservative allocation behavior; when enabled,
# the algorithm always allocates the minimum amount of memory required to
# store the given number of records. Set this flag to `true` if you prefer
# to use as little GPU memory for the database as possible.
conservative_memory_allocation: bool = True
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.

from dataclasses import dataclass


@dataclass
class IVFPQSearchCagraConfig:
# The number of clusters to search.
n_probes: int = 30
6 changes: 6 additions & 0 deletions remote_vector_index_builder/core/index_builder/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from typing import Any, Dict, Optional
from remote_vector_index_builder.core.common.models import (
IndexHNSWCagraConfig,
GPUIndexCagraConfig,
SpaceType,
IVFPQBuildCagraConfig,
IVFPQSearchCagraConfig,
GraphBuildAlgo,
GPUIndexBuildConfig,
)


class IndexConfigBuilder:
def __init__(self):
self._hnsw_config: Optional[IndexHNSWCagraConfig] = None
self._gpu_config: Optional[GPUIndexCagraConfig] = None
self._metric: SpaceType = SpaceType("l2") # default metric

def set_hnsw_config(self, params: Dict[str, Any]) -> "IndexConfigBuilder":
self._hnsw_config = (
IndexHNSWCagraConfig(**params) if params else IndexHNSWCagraConfig()
)
return self

def set_gpu_config(self, params: Dict[str, Any]) -> "IndexConfigBuilder":
if not params:
self._gpu_config = GPUIndexCagraConfig()
return self

ivf_pq_build_params = params.pop("ivf_pq_build_params", None)
ivf_pq_build_config = (
IVFPQBuildCagraConfig(**ivf_pq_build_params)
if ivf_pq_build_params
else IVFPQBuildCagraConfig()
)

ivf_pq_search_params = params.pop("ivf_pq_search_params", None)
ivf_pq_search_config = (
IVFPQSearchCagraConfig(**ivf_pq_search_params)
if ivf_pq_search_params
else IVFPQSearchCagraConfig()
)

graph_build_algo_param = params.pop("graph_build_algo", None)
graph_build_algo = (
GraphBuildAlgo(graph_build_algo_param)
if graph_build_algo_param
else GraphBuildAlgo.IVF_PQ
)

self._gpu_config = GPUIndexCagraConfig(
**params,
graph_build_algo=graph_build_algo,
ivf_pq_build_config=ivf_pq_build_config,
ivf_pq_search_config=ivf_pq_search_config
)
return self

def set_metric(self, metric: str) -> "IndexConfigBuilder":
self._metric = SpaceType(metric)
return self

def build(self) -> GPUIndexBuildConfig:
if not self._hnsw_config:
self._hnsw_config = IndexHNSWCagraConfig()
if not self._gpu_config:
self._gpu_config = GPUIndexCagraConfig()

return GPUIndexBuildConfig(
index_hnsw_cagra_config=self._hnsw_config,
gpu_index_cagra_config=self._gpu_config,
metric=self._metric,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from typing import Dict, Any
from remote_vector_index_builder.core.common.models.index_builder.gpu_index_build_config import (
GPUIndexBuildConfig,
)
from remote_vector_index_builder.core.index_builder.index_config_builder import (
IndexConfigBuilder,
)


class IndexConfigDirector:
"""Director class to construct index configurations using the builder"""

def __init__(self, builder: IndexConfigBuilder):
self._builder = builder

def construct_config(self, config_params: Dict[str, Any]) -> GPUIndexBuildConfig:
return (
self._builder.set_hnsw_config(config_params.get("hnsw_config", {}))
.set_gpu_config(config_params.get("gpu_config", {}))
.set_metric(config_params.get("metric", "l2"))
.build()
)

0 comments on commit 4a6705b

Please sign in to comment.