Skip to content

Commit

Permalink
enchance: refine group_by_field and provide group_by example(#1809)
Browse files Browse the repository at this point in the history
Signed-off-by: MrPresent-Han <[email protected]>
  • Loading branch information
MrPresent-Han committed Jan 5, 2024
1 parent f393302 commit 30ee1ab
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 3 deletions.
63 changes: 63 additions & 0 deletions examples/group_by.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from pymilvus import CollectionSchema, FieldSchema, Collection, connections, DataType, Partition, utility
import random
import numpy as np
import secrets


def generate_random_hex_string(length):
return secrets.token_hex(length // 2)


IP = "localhost"
connections.connect("default", host=IP, port="19530")

dim = 128
clean_exist = False
prepare_data = True

fields = [
FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True),
FieldSchema(name="int64", dtype=DataType.INT64),
FieldSchema(name="float", dtype=DataType.FLOAT),
FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=dim),
FieldSchema(name="bool", dtype=DataType.BOOL),
FieldSchema(name="string", dtype=DataType.VARCHAR, max_length=512)
]
schema = CollectionSchema(fields=fields)
collection_name = 'test_group_by_' + generate_random_hex_string(24)
if clean_exist and utility.has_collection(collection_name):
utility.drop_collection(collection_name)

collection = Collection(collection_name, schema=schema)
nb = 1500
batch_num = 3
vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
# insert data
if prepare_data:
for i in range(batch_num):
data = [
[i for i in range(nb * i, nb * (i + 1))],
[i % 33 for i in range(nb)],
[np.float32(i) for i in range(nb)],
vectors,
[bool(random.randrange(2)) for i in range(nb)],
[str(i % 44) for i in range(nb * i, nb * (i + 1))],
]
collection.insert(data)
print("insert data done")
collection.flush()
collection.create_index("float_vector", {"metric_type": "COSINE"})

# create collection and load
collection.load()
batch_size = 100
search_params = {"metric_type": "COSINE"}
result = collection.search(vectors[:3], "float_vector", search_params, limit=batch_size, timeout=600,
output_fields=["int64"]) #set up group_by_field

for i in range(len(result)):
resultI = result[i]
print(f"---result{i}_size:{len(result[i])}-------------------------")
for j in range(len(resultI)):
print(resultI[j])
print("----------------------------")
7 changes: 4 additions & 3 deletions pymilvus/client/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,12 +622,13 @@ def search_requests_with_expr(
raise ParamError(message=f"wrong type for offset, expect int, got {type(offset)}")
search_params["offset"] = offset

group_by_field = kwargs.get(GROUP_BY_FIELD)
if group_by_field is not None:
search_params[GROUP_BY_FIELD] = group_by_field

if param.get("metric_type", None) is not None:
search_params["metric_type"] = param["metric_type"]

if param.get(GROUP_BY_FIELD, None) is not None:
search_params[GROUP_BY_FIELD] = param[GROUP_BY_FIELD]

if anns_field:
search_params["anns_field"] = anns_field

Expand Down

0 comments on commit 30ee1ab

Please sign in to comment.