From 5f02f0975c417458625517c4bedf5103c70a2b89 Mon Sep 17 00:00:00 2001 From: congqixia Date: Thu, 22 Sep 2022 21:14:52 +0800 Subject: [PATCH] Fix L2 string case logic (#19369) Signed-off-by: Congqi Xia Signed-off-by: Congqi Xia --- tests/python_client/testcases/test_search.py | 113 ++++++++++--------- 1 file changed, 57 insertions(+), 56 deletions(-) diff --git a/tests/python_client/testcases/test_search.py b/tests/python_client/testcases/test_search.py index fb12d14546a93..b34ddebf3cf83 100644 --- a/tests/python_client/testcases/test_search.py +++ b/tests/python_client/testcases/test_search.py @@ -2562,7 +2562,7 @@ def test_search_with_expression_large(self, dim): """ target: test search with large expression method: test search with large expression - expected: searched successfully + expected: searched successfully """ # 1. initialize with data nb = 10000 @@ -2570,7 +2570,7 @@ def test_search_with_expression_large(self, dim): nb, dim=dim, is_index=True)[0:4] - + # 2. create index index_param = {"index_type": "IVF_FLAT", "metric_type": "L2", "params": {"nlist": 100}} collection_w.create_index("float_vector", index_param) @@ -2596,21 +2596,21 @@ def test_search_with_expression_large_two(self, dim): """ target: test search with large expression method: test one of the collection ids to another collection search for it, with the large expression - expected: searched successfully + expected: searched successfully """ # 1. initialize with data nb = 10000 collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb, dim=dim, - is_index=True)[0:4] + is_index=True)[0:4] + - # 2. create index index_param = {"index_type": "IVF_FLAT", "metric_type": "L2", "params": {"nlist": 100}} collection_w.create_index("float_vector", index_param) collection_w.load() - + nums = 5000 vectors = [[random.random() for _ in range(dim)] for _ in range(nums)] vectors_id = [random.randint(0,nums)for _ in range(nums)] @@ -2631,13 +2631,13 @@ def test_search_with_consistency_bounded(self, nq, dim, auto_id, _async): method: 1. create a collection 2. insert data 3. search with consistency_level is "bounded" - expected: searched successfully + expected: searched successfully """ limit = 1000 nb_old = 500 collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb_old, auto_id=auto_id, - dim=dim)[0:4] + dim=dim)[0:4] # 2. search for original data after load vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] collection_w.search(vectors[:nq], default_search_field, @@ -2649,7 +2649,7 @@ def test_search_with_consistency_bounded(self, nq, dim, auto_id, _async): "limit": nb_old, "_async": _async, }) - + kwargs = {} consistency_level = kwargs.get("consistency_level", CONSISTENCY_BOUNDED) kwargs.update({"consistency_level": consistency_level}) @@ -2664,7 +2664,7 @@ def test_search_with_consistency_bounded(self, nq, dim, auto_id, _async): default_search_params, limit, default_search_exp, _async=_async, **kwargs, - ) + ) @pytest.mark.tags(CaseLabel.L1) def test_search_with_consistency_strong(self, nq, dim, auto_id, _async): @@ -2673,7 +2673,7 @@ def test_search_with_consistency_strong(self, nq, dim, auto_id, _async): method: 1. create a collection 2. insert data 3. search with consistency_level is "Strong" - expected: searched successfully + expected: searched successfully """ limit = 1000 nb_old = 500 @@ -2690,7 +2690,7 @@ def test_search_with_consistency_strong(self, nq, dim, auto_id, _async): "ids": insert_ids, "limit": nb_old, "_async": _async}) - + nb_new = 400 _, _, _, insert_ids_new, _ = cf.insert_data(collection_w, nb_new, auto_id=auto_id, dim=dim, @@ -2708,7 +2708,7 @@ def test_search_with_consistency_strong(self, nq, dim, auto_id, _async): check_items={"nq": nq, "ids": insert_ids, "limit": nb_old + nb_new, - "_async": _async}) + "_async": _async}) @pytest.mark.tags(CaseLabel.L1) def test_search_with_consistency_eventually(self, nq, dim, auto_id, _async): @@ -2717,7 +2717,7 @@ def test_search_with_consistency_eventually(self, nq, dim, auto_id, _async): method: 1. create a collection 2. insert data 3. search with consistency_level is "eventually" - expected: searched successfully + expected: searched successfully """ limit = 1000 nb_old = 500 @@ -2746,7 +2746,7 @@ def test_search_with_consistency_eventually(self, nq, dim, auto_id, _async): default_search_params, limit, default_search_exp, _async=_async, **kwargs - ) + ) @pytest.mark.tags(CaseLabel.L1) def test_search_with_consistency_session(self, nq, dim, auto_id, _async): @@ -2755,13 +2755,13 @@ def test_search_with_consistency_session(self, nq, dim, auto_id, _async): method: 1. create a collection 2. insert data 3. search with consistency_level is "session" - expected: searched successfully + expected: searched successfully """ limit = 1000 nb_old = 500 collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb_old, auto_id=auto_id, - dim=dim)[0:4] + dim=dim)[0:4] # 2. search for original data after load vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] collection_w.search(vectors[:nq], default_search_field, @@ -2772,7 +2772,7 @@ def test_search_with_consistency_session(self, nq, dim, auto_id, _async): "ids": insert_ids, "limit": nb_old, "_async": _async}) - + kwargs = {} consistency_level = kwargs.get("consistency_level", CONSISTENCY_SESSION) kwargs.update({"consistency_level": consistency_level}) @@ -2790,7 +2790,7 @@ def test_search_with_consistency_session(self, nq, dim, auto_id, _async): check_items={"nq": nq, "ids": insert_ids, "limit": nb_old + nb_new, - "_async": _async}) + "_async": _async}) class TestSearchBase(TestcaseBase): @@ -3193,7 +3193,7 @@ def test_query_vector_only(self): class TestsearchString(TestcaseBase): """ ****************************************************************** - The following cases are used to test search about string + The following cases are used to test search about string ****************************************************************** """ @@ -3222,7 +3222,7 @@ def _async(self, request): def test_search_string_field_not_primary(self, auto_id, _async): """ target: test search with string expr and string field is not primary - method: create collection and insert data + method: create collection and insert data create index and collection load collection search uses string expr in string field, string field is not primary expected: Search successfully @@ -3235,7 +3235,7 @@ def test_search_string_field_not_primary(self, auto_id, _async): vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)] output_fields = [default_string_field_name, default_float_field_name] collection_w.search(vectors[:default_nq], default_search_field, - default_search_params, default_limit, + default_search_params, default_limit, default_search_string_exp, output_fields=output_fields, _async=_async, @@ -3246,12 +3246,12 @@ def test_search_string_field_not_primary(self, auto_id, _async): "limit": default_limit, "_async": _async}) - + @pytest.mark.tags(CaseLabel.L2) def test_search_string_field_is_primary_true(self, dim, _async): """ target: test search with string expr and string field is primary - method: create collection and insert data + method: create collection and insert data create index and collection load collection search uses string expr in string field ,string field is primary expected: Search successfully @@ -3265,7 +3265,7 @@ def test_search_string_field_is_primary_true(self, dim, _async): output_fields = [default_string_field_name, default_float_field_name] collection_w.search(vectors[:default_nq], default_search_field, default_search_params, default_limit, - default_search_string_exp, + default_search_string_exp, output_fields=output_fields, _async=_async, travel_timestamp=0, @@ -3274,7 +3274,7 @@ def test_search_string_field_is_primary_true(self, dim, _async): "ids": insert_ids, "limit": default_limit, "_async": _async}) - + @pytest.mark.tags(CaseLabel.L2) def test_search_string_mix_expr(self, dim, auto_id, _async): @@ -3282,7 +3282,7 @@ def test_search_string_mix_expr(self, dim, auto_id, _async): target: test search with mix string and int expr method: create collection and insert data create index and collection load - collection search uses mix expr + collection search uses mix expr expected: Search successfully """ # 1. initialize with data @@ -3310,7 +3310,7 @@ def test_search_string_with_invalid_expr(self, auto_id): target: test search data method: create collection and insert data create index and collection load - collection search uses invalid string expr + collection search uses invalid string expr expected: Raise exception """ @@ -3322,13 +3322,13 @@ def test_search_string_with_invalid_expr(self, auto_id): vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)] collection_w.search(vectors[:default_nq], default_search_field, default_search_params, default_limit, - default_invaild_string_exp, + default_invaild_string_exp, check_task=CheckTasks.err_res, check_items={"err_code": 1, "err_msg": "failed to create query plan: type mismatch"} ) - + @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("expression", cf.gen_normal_string_expressions(ct.default_string_field_name)) @@ -3383,12 +3383,12 @@ def test_search_with_different_string_expr(self, dim, expression, _async): def test_search_string_field_is_primary_binary(self, dim, _async): """ target: test search with string expr and string field is primary - method: create collection and insert data + method: create collection and insert data create index and collection load collection search uses string expr in string field ,string field is primary expected: Search successfully """ - + # 1. initialize with binary data collection_w, _, binary_raw_vector, insert_ids = self.init_collection_general(prefix, True, 2, is_binary=True, @@ -3411,19 +3411,19 @@ def test_search_string_field_is_primary_binary(self, dim, _async): "ids": insert_ids, "limit": 2, "_async": _async}) - + @pytest.mark.tags(CaseLabel.L2) def test_search_string_field_binary(self, auto_id, dim, _async): """ target: test search with string expr and string field is not primary - method: create an binary collection and insert data + method: create an binary collection and insert data create index and collection load - collection search uses string expr in string field, string field is not primary + collection search uses string expr in string field, string field is not primary expected: Search successfully """ # 1. initialize with binary data - + collection_w, _, binary_raw_vector, insert_ids = self.init_collection_general(prefix, True, 2, is_binary=True, auto_id=auto_id, @@ -3431,8 +3431,8 @@ def test_search_string_field_binary(self, auto_id, dim, _async): is_index=True)[0:4] # 2. create index default_index = {"index_type": "BIN_IVF_FLAT", "params": {"nlist": 128}, "metric_type": "JACCARD"} - collection_w.create_index("binary_vector", default_index) - collection_w.load() + collection_w.create_index("binary_vector", default_index) + collection_w.load() # 2. search with exception binary_vectors = cf.gen_binary_vectors(3000, dim)[1] search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}} @@ -3452,7 +3452,7 @@ def test_search_mix_expr_with_binary(self, dim, auto_id, _async): target: test search with mix string and int expr method: create an binary collection and insert data create index and collection load - collection search uses mix expr + collection search uses mix expr expected: Search successfully """ # 1. initialize with data @@ -3480,10 +3480,10 @@ def test_search_mix_expr_with_binary(self, dim, auto_id, _async): "_async": _async}) @pytest.mark.tags(CaseLabel.L2) - def test_search_string_field_not_primary_perfix(self, auto_id, _async): + def test_search_string_field_not_primary_prefix(self, auto_id, _async): """ target: test search with string expr and string field is not primary - method: create collection and insert data + method: create collection and insert data create index and collection load collection search uses string expr in string field, string field is not primary expected: Search successfully @@ -3501,7 +3501,8 @@ def test_search_string_field_not_primary_perfix(self, auto_id, _async): vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)] output_fields = [default_float_field_name, default_string_field_name] collection_w.search(vectors[:default_nq], default_search_field, - default_search_params, default_limit, + # search all buckets + {"metric_type": "L2", "params": {"nprobe": 100}}, default_limit, perfix_expr, output_fields=output_fields, _async=_async, @@ -3518,7 +3519,7 @@ def test_search_all_index_with_compare_expr(self, _async): """ target: test delete after creating index method: 1.create collection , insert data, primary_field is string field - 2.create string and float index ,delete entities, query + 2.create string and float index ,delete entities, query 3.search expected: assert index and deleted id not in search result """ @@ -3540,7 +3541,7 @@ def test_search_all_index_with_compare_expr(self, _async): vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)] output_fields = [default_int64_field_name, default_float_field_name, default_string_field_name] collection_w.search(vectors[:default_nq], default_search_field, - default_search_params, default_limit, + default_search_params, default_limit, expr, output_fields=output_fields, _async=_async, @@ -3558,7 +3559,7 @@ def test_search_string_field_is_primary_insert_empty(self, _async): target: test search with string expr and string field is primary method: create collection ,string field is primary collection load and insert data - collection search uses string expr in string field + collection search uses string expr in string field expected: Search successfully """ # 1. initialize with data @@ -3567,12 +3568,12 @@ def test_search_string_field_is_primary_insert_empty(self, _async): nb = 3000 data = cf.gen_default_list_data(nb) - data[2] = [""for _ in range(nb)] + data[2] = [""for _ in range(nb)] collection_w.insert(data=data) - + collection_w.load() - - + + search_string_exp = "varchar >= \"\"" limit =1 @@ -3582,7 +3583,7 @@ def test_search_string_field_is_primary_insert_empty(self, _async): output_fields = [default_string_field_name, default_float_field_name] collection_w.search(vectors[:default_nq], default_search_field, default_search_params, limit, - search_string_exp, + search_string_exp, output_fields=output_fields, _async=_async, travel_timestamp=0, @@ -3596,18 +3597,19 @@ def test_search_string_field_is_primary_insert_empty(self, _async): def test_search_string_field_not_primary_is_empty(self, _async): """ target: test search with string expr and string field is not primary - method: create collection and insert data + method: create collection and insert data create index and collection load collection search uses string expr in string field, string field is not primary expected: Search successfully """ # 1. initialize with data - collection_w, _, _, insert_ids = \ + collection_w, _, _, _= \ self.init_collection_general(prefix, False, primary_field=ct.default_int64_field_name)[0:4] nb = 3000 data = cf.gen_default_list_data(nb) - data[2] = [""for _ in range(nb)] + insert_ids = data[0] + data[2] = [""for _ in range(nb)] collection_w.insert(data) assert collection_w.num_entities == nb @@ -3619,13 +3621,13 @@ def test_search_string_field_not_primary_is_empty(self, _async): search_string_exp = "varchar >= \"\"" - + # 3. search log.info("test_search_string_field_not_primary: searching collection %s" % collection_w.name) vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)] output_fields = [default_string_field_name, default_float_field_name] collection_w.search(vectors[:default_nq], default_search_field, - default_search_params, default_limit, + default_search_params, default_limit, search_string_exp, output_fields=output_fields, _async=_async, @@ -3635,4 +3637,3 @@ def test_search_string_field_not_primary_is_empty(self, _async): "ids": insert_ids, "limit": default_limit, "_async": _async}) -