diff --git a/pymilvus/bulk_writer/buffer.py b/pymilvus/bulk_writer/buffer.py index 47dd5d6c6..c81e61538 100644 --- a/pymilvus/bulk_writer/buffer.py +++ b/pymilvus/bulk_writer/buffer.py @@ -267,10 +267,8 @@ def _persist_parquet(self, local_path: str, **kwargs): buffer_row_count = kwargs.get("buffer_row_count", 1) size_per_row = int(buffer_size / buffer_row_count) + 1 row_group_size = int(row_group_bytes / size_per_row) - if row_group_size < row_group_size_min: - row_group_size = row_group_size_min - if row_group_size > row_group_size_max: - row_group_size = row_group_size_max + row_group_size = max(row_group_size, row_group_size_min) + row_group_size = min(row_group_size, row_group_size_max) # write to Parquet file data_frame = pd.DataFrame(data=data) diff --git a/pymilvus/client/abstract.py b/pymilvus/client/abstract.py index d5d752cda..eddf2a38c 100644 --- a/pymilvus/client/abstract.py +++ b/pymilvus/client/abstract.py @@ -52,6 +52,11 @@ def __pack(self, raw: Any): self.params[type_param.key] = json.loads(type_param.value) else: + if type_param.key in ["mmap.enabled"]: + self.params["mmap_enabled"] = ( + bool(type_param.value) if type_param.value.lower() != "false" else False + ) + continue self.params[type_param.key] = type_param.value if type_param.key in ["dim"]: self.params[type_param.key] = int(type_param.value) diff --git a/pymilvus/client/prepare.py b/pymilvus/client/prepare.py index d4ba91a14..5ffafe1ef 100644 --- a/pymilvus/client/prepare.py +++ b/pymilvus/client/prepare.py @@ -130,7 +130,9 @@ def get_schema_from_collection_schema( is_clustering_key=f.is_clustering_key, ) for k, v in f.params.items(): - kv_pair = common_types.KeyValuePair(key=str(k), value=str(v)) + kv_pair = common_types.KeyValuePair( + key=str(k) if k != "mmap_enabled" else "mmap.enabled", value=str(v) + ) field_schema.type_params.append(kv_pair) schema.fields.append(field_schema) @@ -187,7 +189,12 @@ def get_field_schema( type_params = field.get("params", {}) if not isinstance(type_params, dict): raise ParamError(message="params should be dictionary type") - kvs = [common_types.KeyValuePair(key=str(k), value=str(v)) for k, v in type_params.items()] + kvs = [ + common_types.KeyValuePair( + key=str(k) if k != "mmap_enabled" else "mmap.enabled", value=str(v) + ) + for k, v in type_params.items() + ] field_schema.type_params.extend(kvs) return field_schema, primary_field, auto_id_field diff --git a/pymilvus/orm/iterator.py b/pymilvus/orm/iterator.py index 8ddaa03c5..702d744a1 100644 --- a/pymilvus/orm/iterator.py +++ b/pymilvus/orm/iterator.py @@ -51,8 +51,7 @@ def extend_batch_size(batch_size: int, next_param: dict, to_extend_batch_size: b extend_rate = DEFAULT_SEARCH_EXTENSION_RATE if EF in next_param[PARAMS]: real_batch = min(MAX_BATCH_SIZE, batch_size * extend_rate, next_param[PARAMS][EF]) - if next_param[PARAMS][EF] > real_batch: - next_param[PARAMS][EF] = real_batch + next_param[PARAMS][EF] = min(real_batch, next_param[PARAMS][EF]) return real_batch return min(MAX_BATCH_SIZE, batch_size * extend_rate) diff --git a/pymilvus/orm/schema.py b/pymilvus/orm/schema.py index 35d0d693f..c224e45cc 100644 --- a/pymilvus/orm/schema.py +++ b/pymilvus/orm/schema.py @@ -318,6 +318,8 @@ def __init__(self, name: str, dtype: DataType, description: str = "", **kwargs) self.is_partition_key = kwargs.get("is_partition_key", False) self.is_clustering_key = kwargs.get("is_clustering_key", False) self.element_type = kwargs.get("element_type", None) + if "mmap_enabled" in kwargs: + self._type_params["mmap_enabled"] = kwargs["mmap_enabled"] self._parse_type_params() def __repr__(self) -> str: