Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

search iterator support range-search paramter(#1793) #1795

Merged
merged 1 commit into from
Nov 22, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 40 additions & 10 deletions pymilvus/orm/iterator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from copy import deepcopy
from typing import Any, Dict, List, Optional, TypeVar

Expand Down Expand Up @@ -35,6 +36,8 @@
from .schema import CollectionSchema
from .types import DataType

LOGGER = logging.getLogger(__name__)
LOGGER.setLevel(logging.ERROR)
QueryIterator = TypeVar("QueryIterator")
SearchIterator = TypeVar("SearchIterator")

Expand Down Expand Up @@ -313,12 +316,18 @@ def __init__(
def __init_search_iterator(self):
init_page = self.__execute_next_search(self._param, self._expr)
if len(init_page) == 0:
raise MilvusException(
message="Cannot init search iterator because there's no matched vectors returned"
message = (
"Cannot init search iterator because init page contains no matched rows, "
"please check the radius and range_filter set up by searchParams"
)
LOGGER.error(message)
self._cache_id = NO_CACHE_ID
self._init_success = False
return
self._cache_id = iterator_cache.cache(init_page, NO_CACHE_ID)
self.__set_up_range_parameters(init_page)
self.__update_filtered_ids(init_page)
self._init_success = True

def __set_up_range_parameters(self, page: SearchPage):
first_hit, last_hit = page[0], page[-1]
Expand Down Expand Up @@ -371,10 +380,23 @@ def __check_metrics(self):
so range search parameters are disabled to clients"""

def __check_rm_range_search_parameters(self):
if PARAMS in self._param and RADIUS in self._param[PARAMS]:
del self._param[PARAMS][RADIUS]
if PARAMS in self._param and RANGE_FILTER in self._param[PARAMS]:
del self._param[PARAMS][RANGE_FILTER]
if (
(PARAMS in self._param)
and (RADIUS in self._param[PARAMS])
and (RANGE_FILTER in self._param[PARAMS])
):
radius = self._param[PARAMS][RADIUS]
range_filter = self._param[PARAMS][RANGE_FILTER]
if metrics_positive_related(self._param[METRIC_TYPE]) and radius <= range_filter:
raise MilvusException(
message=f"for metrics:{self._param[METRIC_TYPE]}, radius must be "
f"larger than range_filter, please adjust your parameter"
)
if not metrics_positive_related(self._param[METRIC_TYPE]) and radius >= range_filter:
raise MilvusException(
message=f"for metrics:{self._param[METRIC_TYPE]}, radius must be "
f"smalled than range_filter, please adjust your parameter"
)

def __check_offset(self):
if self._kwargs.get(OFFSET, 0) != 0:
Expand Down Expand Up @@ -432,7 +454,7 @@ def __push_new_page_to_cache(self, page: SearchPage) -> int:

def next(self):
# 0. check reached limit
if self.__check_reached_limit():
if not self._init_success or self.__check_reached_limit():
return SearchPage(None)
ret_len = self._iterator_params[BATCH_SIZE]
if self._limit is not UNLIMITED:
Expand All @@ -446,7 +468,7 @@ def next(self):
return ret_page

# 2. if cached page not enough, try to fill the result by probing with constant width
# until finish filling or exceed max trial time: 10
# until finish filling or exceeding max trial time: 10
new_page = self.__try_search_fill()
cached_page_len = self.__push_new_page_to_cache(new_page)
ret_len = min(cached_page_len, ret_len)
Expand Down Expand Up @@ -522,9 +544,17 @@ def __next_params(self, coefficient: int):
coefficient = max(1, coefficient)
next_params = self._param.copy()
if metrics_positive_related(self._param[METRIC_TYPE]):
next_params[PARAMS][RADIUS] = self._tail_band + self._width * coefficient
next_radius = self._tail_band + self._width * coefficient
if RADIUS in self._param[PARAMS] and next_radius > self._param[PARAMS][RADIUS]:
next_params[PARAMS][RADIUS] = self._param[PARAMS][RADIUS]
else:
next_params[PARAMS][RADIUS] = next_radius
else:
next_params[PARAMS][RADIUS] = self._tail_band - self._width * coefficient
next_radius = self._tail_band - self._width * coefficient
if RADIUS in self._param[PARAMS] and next_radius < self._param[PARAMS][RADIUS]:
next_params[PARAMS][RADIUS] = self._param[PARAMS][RADIUS]
else:
next_params[PARAMS][RADIUS] = next_radius
next_params[PARAMS][RANGE_FILTER] = self._tail_band
return next_params

Expand Down
Loading