diff --git a/.gitignore b/.gitignore
index 2f34f10ee..004524444 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,5 @@ __pycache__
 __MACOSX
 .DS_Store
 build/
+venv/
+.idea/
diff --git a/tests/test_models.py b/tests/test_models.py
index 555b0dbab..c61774a22 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -65,6 +65,6 @@ def test_test_result_merge(self):
 
     def test_test_result_display(self):
         result_dir = config.RESULTS_LOCAL_DIR
-        for json_file in result_dir.glob("*.json"):
+        for json_file in result_dir.glob("result*.json"):
             res = TestResult.read_file(json_file)
             res.display()
diff --git a/vectordb_bench/__init__.py b/vectordb_bench/__init__.py
index 7a5df57bd..a9421a048 100644
--- a/vectordb_bench/__init__.py
+++ b/vectordb_bench/__init__.py
@@ -18,12 +18,23 @@ class config:
     USE_SHUFFLED_DATA = env.bool("USE_SHUFFLED_DATA", True)
 
     RESULTS_LOCAL_DIR = pathlib.Path(__file__).parent.joinpath("results")
-    CASE_TIMEOUT_IN_SECOND = 24 * 60 * 60
+
+    CAPACITY_TIMEOUT_IN_SECONDS =  24 * 3600 # 24h
+    LOAD_TIMEOUT_1M             = 1.5 * 3600 # 1.5h
+    LOAD_TIMEOUT_10M            =  15 * 3600 # 15h
+    LOAD_TIMEOUT_100M           = 150 * 3600 # 6.25d
+
+    OPTIMIZE_TIMEOUT_1M         =  15 * 60   # 15min
+    OPTIMIZE_TIMEOUT_10M        = 2.5 * 3600 # 2.5h
+    OPTIMIZE_TIMEOUT_100M       =  25 * 3600 # 1.04d
 
 
     def display(self) -> str:
-        tmp = [i for i in inspect.getmembers(self)
-            if not inspect.ismethod(i[1]) and not i[0].startswith('_') \
+        tmp = [
+            i for i in inspect.getmembers(self)
+            if not inspect.ismethod(i[1])
+            and not i[0].startswith('_')
+            and "TIMEOUT" not in i[0]
         ]
         return tmp
 
diff --git a/vectordb_bench/backend/cases.py b/vectordb_bench/backend/cases.py
index 639ab2984..56b38e3f4 100644
--- a/vectordb_bench/backend/cases.py
+++ b/vectordb_bench/backend/cases.py
@@ -2,8 +2,10 @@
 import logging
 from enum import Enum, auto
 
+from vectordb_bench import config
+from vectordb_bench.base import BaseModel
+
 from .dataset import Dataset, DatasetManager
-from ..base import BaseModel
 
 
 log = logging.getLogger(__name__)
@@ -75,6 +77,9 @@ class Case(BaseModel):
     description: str
     dataset: DatasetManager
 
+    load_timeout: float | int
+    optimize_timeout: float | int | None
+
     filter_rate: float | None
 
     @property
@@ -92,6 +97,8 @@ def filters(self) -> dict | None:
 class CapacityCase(Case, BaseModel):
     label: CaseLabel = CaseLabel.Load
     filter_rate: float | None = None
+    load_timeout: float | int = config.CAPACITY_TIMEOUT_IN_SECONDS
+    optimize_timeout: float | int | None = None
 
 
 class PerformanceCase(Case, BaseModel):
@@ -121,6 +128,8 @@ class Performance10M(PerformanceCase):
     name: str = "Search Performance Test (10M Dataset, 768 Dim)"
     description: str = """This case tests the search performance of a vector database with a large dataset (<b>Cohere 10M vectors</b>, 768 dimensions) at varying parallel levels.
 Results will show index building time, recall, and maximum QPS."""
+    load_timeout: float | int = config.LOAD_TIMEOUT_10M
+    optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_10M
 
 
 class Performance1M(PerformanceCase):
@@ -129,6 +138,8 @@ class Performance1M(PerformanceCase):
     name: str = "Search Performance Test (1M Dataset, 768 Dim)"
     description: str = """This case tests the search performance of a vector database with a medium dataset (<b>Cohere 1M vectors</b>, 768 dimensions) at varying parallel levels.
 Results will show index building time, recall, and maximum QPS."""
+    load_timeout: float | int = config.LOAD_TIMEOUT_1M
+    optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_1M
 
 
 class Performance10M1P(PerformanceCase):
@@ -138,6 +149,8 @@ class Performance10M1P(PerformanceCase):
     name: str = "Filtering Search Performance Test (10M Dataset, 768 Dim, Filter 1%)"
     description: str = """This case tests the search performance of a vector database with a large dataset (<b>Cohere 10M vectors</b>, 768 dimensions) under a low filtering rate (<b>1% vectors</b>), at varying parallel levels.
 Results will show index building time, recall, and maximum QPS."""
+    load_timeout: float | int = config.LOAD_TIMEOUT_10M
+    optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_10M
 
 
 class Performance1M1P(PerformanceCase):
@@ -147,6 +160,8 @@ class Performance1M1P(PerformanceCase):
     name: str = "Filtering Search Performance Test (1M Dataset, 768 Dim, Filter 1%)"
     description: str = """This case tests the search performance of a vector database with a medium dataset (<b>Cohere 1M vectors</b>, 768 dimensions) under a low filtering rate (<b>1% vectors</b>), at varying parallel levels.
 Results will show index building time, recall, and maximum QPS."""
+    load_timeout: float | int = config.LOAD_TIMEOUT_1M
+    optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_1M
 
 
 class Performance10M99P(PerformanceCase):
@@ -156,6 +171,8 @@ class Performance10M99P(PerformanceCase):
     name: str = "Filtering Search Performance Test (10M Dataset, 768 Dim, Filter 99%)"
     description: str = """This case tests the search performance of a vector database with a large dataset (<b>Cohere 10M vectors</b>, 768 dimensions) under a high filtering rate (<b>99% vectors</b>), at varying parallel levels.
 Results will show index building time, recall, and maximum QPS."""
+    load_timeout: float | int = config.LOAD_TIMEOUT_10M
+    optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_10M
 
 
 class Performance1M99P(PerformanceCase):
@@ -165,6 +182,8 @@ class Performance1M99P(PerformanceCase):
     name: str = "Filtering Search Performance Test (1M Dataset, 768 Dim, Filter 99%)"
     description: str = """This case tests the search performance of a vector database with a medium dataset (<b>Cohere 1M vectors</b>, 768 dimensions) under a high filtering rate (<b>99% vectors</b>), at varying parallel levels.
 Results will show index building time, recall, and maximum QPS."""
+    load_timeout: float | int = config.LOAD_TIMEOUT_1M
+    optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_1M
 
 
 
@@ -175,6 +194,8 @@ class Performance100M(PerformanceCase):
     name: str = "Search Performance Test (100M Dataset, 768 Dim)"
     description: str = """This case tests the search performance of a vector database with a large 100M dataset (<b>LAION 100M vectors</b>, 768 dimensions), at varying parallel levels.
 Results will show index building time, recall, and maximum QPS."""
+    load_timeout: float | int = config.LOAD_TIMEOUT_100M
+    optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_100M
 
 
 type2case = {
diff --git a/vectordb_bench/backend/clients/api.py b/vectordb_bench/backend/clients/api.py
index 49ec18df1..74bafbece 100644
--- a/vectordb_bench/backend/clients/api.py
+++ b/vectordb_bench/backend/clients/api.py
@@ -73,7 +73,7 @@ class VectorDB(ABC):
 
     In each process, the benchmark cases ensure VectorDB.init() calls before any other methods operations
 
-    insert_embeddings, search_embedding, and, ready_to_search will be timed for each call.
+    insert_embeddings, search_embedding, and, optimize will be timed for each call.
 
     Examples:
         >>> milvus = Milvus()
@@ -166,13 +166,14 @@ def search_embedding(
 
     # TODO: remove
     @abstractmethod
-    def ready_to_search(self):
-        """ready_to_search will be called between insertion and search in performance cases.
+    def optimize(self):
+        """optimize will be called between insertion and search in performance cases.
 
         Should be blocked until the vectorDB is ready to be tested on
         heavy performance cases.
 
-        Time(insert the dataset) + Time(ready_to_search) will be recorded as "load_duration" metric
+        Time(insert the dataset) + Time(optimize) will be recorded as "load_duration" metric
+        Optimize's execution time is limited, the limited time is based on cases.
         """
         raise NotImplementedError
 
diff --git a/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py b/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py
index 1f88233e8..8cd928399 100644
--- a/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py
+++ b/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py
@@ -143,8 +143,8 @@ def search_embedding(
             log.warning(f"Failed to search: {self.indice} error: {str(e)}")
             raise e from None
 
-    def ready_to_search(self):
-        """ready_to_search will be called between insertion and search in performance cases."""
+    def optimize(self):
+        """optimize will be called between insertion and search in performance cases."""
         pass
 
     def ready_to_load(self):
diff --git a/vectordb_bench/backend/clients/milvus/milvus.py b/vectordb_bench/backend/clients/milvus/milvus.py
index fcb5b324e..d6afa38dc 100644
--- a/vectordb_bench/backend/clients/milvus/milvus.py
+++ b/vectordb_bench/backend/clients/milvus/milvus.py
@@ -53,7 +53,7 @@ def __init__(
             log.info(f"{self.name} create collection: {self.collection_name}")
 
             # Create the collection
-            coll = Collection(
+            Collection(
                 name=self.collection_name,
                 schema=CollectionSchema(fields),
                 consistency_level="Session",
@@ -107,6 +107,14 @@ def _pre_load(self, coll: Collection):
 
     def _optimize(self):
         log.info(f"{self.name} optimizing before search")
+        try:
+            self.col.load()
+        except Exception as e:
+            log.warning(f"{self.name} optimize error: {e}")
+            raise e from None
+
+    def _post_insert(self):
+        log.info(f"{self.name} post insert before optimize")
         try:
             self.col.flush()
             self.col.compact()
@@ -119,10 +127,6 @@ def _optimize(self):
                 index_name=self._index_name,
             )
             utility.wait_for_index_building_complete(self.collection_name)
-            self.col.load()
-            #  self.col.load(_refresh=True)
-            #  utility.wait_for_loading_complete(self.collection_name)
-            #  import time; time.sleep(10)
         except Exception as e:
             log.warning(f"{self.name} optimize error: {e}")
             raise e from None
@@ -132,7 +136,7 @@ def ready_to_load(self):
         self._pre_load(self.col)
         pass
 
-    def ready_to_search(self):
+    def optimize(self):
         assert self.col, "Please call self.init() before"
         self._optimize()
 
@@ -157,6 +161,8 @@ def insert_embeddings(
                 ]
                 res = self.col.insert(insert_data, **kwargs)
                 insert_count += len(res.primary_keys)
+            if kwargs.get("last_batch"):
+                self._post_insert()
         except MilvusException as e:
             log.warning("Failed to insert data")
             return (insert_count, e)
diff --git a/vectordb_bench/backend/clients/pinecone/pinecone.py b/vectordb_bench/backend/clients/pinecone/pinecone.py
index 9cafd0466..1f2b4edab 100644
--- a/vectordb_bench/backend/clients/pinecone/pinecone.py
+++ b/vectordb_bench/backend/clients/pinecone/pinecone.py
@@ -69,7 +69,7 @@ def init(self) -> None:
     def ready_to_load(self):
         pass
 
-    def ready_to_search(self):
+    def optimize(self):
         pass
 
     def insert_embeddings(
diff --git a/vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py b/vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py
index 6dcbc8f03..330bc396f 100644
--- a/vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py
+++ b/vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py
@@ -74,7 +74,7 @@ def ready_to_load(self):
         pass
 
 
-    def ready_to_search(self):
+    def optimize(self):
         assert self.qdrant_client, "Please call self.init() before"
         # wait for vectors to be fully indexed
         SECONDS_WAITING_FOR_INDEXING_API_CALL = 5
diff --git a/vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py b/vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py
index 5852dfeb6..38f87c7e8 100644
--- a/vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py
+++ b/vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py
@@ -70,7 +70,7 @@ def ready_to_load(self):
         """Should call insert first, do nothing"""
         pass
 
-    def ready_to_search(self):
+    def optimize(self):
         assert self.client.schema.exists(self.collection_name)
         self.client.schema.update_config(self.collection_name, {"vectorIndexConfig": self.case_config.search_param() } )
 
diff --git a/vectordb_bench/backend/runner/serial_runner.py b/vectordb_bench/backend/runner/serial_runner.py
index 03fb180f0..ba0d33823 100644
--- a/vectordb_bench/backend/runner/serial_runner.py
+++ b/vectordb_bench/backend/runner/serial_runner.py
@@ -4,77 +4,90 @@
 import concurrent
 import multiprocessing as mp
 import math
+import psutil
+
 import numpy as np
 import pandas as pd
 
 from ..clients import api
 from ...metric import calc_recall
-from ...models import LoadTimeoutError
+from ...models import LoadTimeoutError, PerformanceTimeoutError
 from .. import utils
 from ... import config
+from vectordb_bench.backend.dataset import DatasetManager
 
 NUM_PER_BATCH = config.NUM_PER_BATCH
-LOAD_TIMEOUT = 24 * 60 * 60
 LOAD_MAX_TRY_COUNT = 10
 WAITTING_TIME = 60
 
 log = logging.getLogger(__name__)
 
-
 class SerialInsertRunner:
-    def __init__(self, db: api.VectorDB, train_emb: list[list[float]], train_id: list[int]):
-        log.debug(f"Dataset shape: {len(train_emb)}")
+    def __init__(self, db: api.VectorDB, dataset: DatasetManager, normalize: bool, timeout: float | None = None):
+        self.timeout = timeout if isinstance(timeout, (int, float)) else None
+        self.dataset = dataset
         self.db = db
-        self.shared_emb = train_emb
-        self.train_id = train_id
+        self.normalize = normalize
 
-        self.seq_batches = math.ceil(len(train_emb)/NUM_PER_BATCH)
+    def task(self):
+        count = 0
+        for data_df in self.dataset:
+            all_metadata = data_df['id'].tolist()
 
-    def insert_data(self, left_id: int = 0) -> int:
-        with self.db.init():
-            all_embeddings = self.shared_emb
+            emb_np = np.stack(data_df['emb'])
+            if self.normalize:
+                log.debug("normalize the 100k train data")
+                all_embeddings = emb_np / np.linalg.norm(emb_np, axis=1)[:, np.newaxis].tolist()
+            else:
+                all_embeddings = emb_np.tolist()
+            del(emb_np)
+            log.debug(f"batch dataset size: {len(all_embeddings)}, {len(all_metadata)}")
+
+            last = self.dataset.data.size - count == len(all_metadata)
+            count += self._insert_data(self, all_embeddings, all_metadata, last)
 
-            # unique id for endlessness insertion
-            all_metadata = [i+left_id for i in self.train_id]
 
-            num_conc_batches = math.ceil(len(all_embeddings)/NUM_PER_BATCH)
+    def _insert_data(self, db: api.VectorDB, all_embeddings: list[list[float]], all_metadata: list[int], last: bool) -> int:
+        NUM_BATCHES = math.ceil(len(all_embeddings)/NUM_PER_BATCH)
+
+        with self.db.init():
             log.info(f"({mp.current_process().name:16}) Start inserting {len(all_embeddings)} embeddings in batch {NUM_PER_BATCH}")
             count = 0
-            for batch_id in range(self.seq_batches):
+            for batch_id in range(NUM_BATCHES):
                 metadata = all_metadata[batch_id*NUM_PER_BATCH: (batch_id+1)*NUM_PER_BATCH]
                 embeddings = all_embeddings[batch_id*NUM_PER_BATCH: (batch_id+1)*NUM_PER_BATCH]
 
-                log.debug(f"({mp.current_process().name:16}) batch [{batch_id:3}/{num_conc_batches}], Start inserting {len(metadata)} embeddings")
+                last_batch = last and (batch_id == NUM_BATCHES - 1)
+                log.debug(f"({mp.current_process().name:16}) batch [{batch_id:3}/{NUM_BATCHES}], Start inserting {len(metadata)} embeddings")
                 insert_count, error = self.db.insert_embeddings(
                     embeddings=embeddings,
                     metadata=metadata,
+                    last_batch=last_batch,
                 )
                 if error is not None:
                     raise error
-                log.debug(f"({mp.current_process().name:16}) batch [{batch_id:3}/{num_conc_batches}], Finish inserting {len(metadata)} embeddings")
 
+                log.debug(f"({mp.current_process().name:16}) batch [{batch_id:3}/{NUM_BATCHES}], Finish inserting {len(metadata)} embeddings")
                 assert insert_count == len(metadata)
                 count += insert_count
             log.info(f"({mp.current_process().name:16}) Finish inserting {len(all_embeddings)} embeddings in batch {NUM_PER_BATCH}")
         return count
 
-    def endless_insert_data(self, left_id: int = 0) -> int:
+    def endless_insert_data(self, all_embeddings, all_metadata, left_id: int = 0) -> int:
         with self.db.init():
-            all_embeddings = self.shared_emb
-
             # unique id for endlessness insertion
-            all_metadata = [i+left_id for i in self.train_id]
+            all_metadata = [i+left_id for i in all_metadata]
 
-            num_conc_batches = math.ceil(len(all_embeddings)/NUM_PER_BATCH)
+            NUM_BATCHES = math.ceil(len(all_embeddings)/NUM_PER_BATCH)
             log.info(f"({mp.current_process().name:16}) Start inserting {len(all_embeddings)} embeddings in batch {NUM_PER_BATCH}")
             count = 0
-            for batch_id in range(self.seq_batches):
+            for batch_id in range(NUM_BATCHES):
                 retry_count = 0
                 already_insert_count = 0
                 metadata = all_metadata[batch_id*NUM_PER_BATCH : (batch_id+1)*NUM_PER_BATCH]
                 embeddings = all_embeddings[batch_id*NUM_PER_BATCH : (batch_id+1)*NUM_PER_BATCH]
 
-                log.debug(f"({mp.current_process().name:16}) batch [{batch_id:3}/{num_conc_batches}], Start inserting {len(metadata)} embeddings")
+                log.debug(f"({mp.current_process().name:16}) batch [{batch_id:3}/{NUM_BATCHES}], Start inserting {len(metadata)} embeddings")
                 while retry_count < LOAD_MAX_TRY_COUNT:
                     insert_count, error = self.db.insert_embeddings(
                         embeddings=embeddings[already_insert_count :],
@@ -90,7 +103,7 @@ def endless_insert_data(self, left_id: int = 0) -> int:
                             raise error
                     else:
                         break
-                log.debug(f"({mp.current_process().name:16}) batch [{batch_id:3}/{num_conc_batches}], Finish inserting {len(metadata)} embeddings")
+                log.debug(f"({mp.current_process().name:16}) batch [{batch_id:3}/{NUM_BATCHES}], Finish inserting {len(metadata)} embeddings")
 
                 assert already_insert_count == len(metadata)
                 count += already_insert_count
@@ -101,30 +114,46 @@ def endless_insert_data(self, left_id: int = 0) -> int:
     def _insert_all_batches(self) -> int:
         """Performance case only"""
         with concurrent.futures.ProcessPoolExecutor(mp_context=mp.get_context('spawn'), max_workers=1) as executor:
-            future = executor.submit(self.insert_data)
-            count = future.result()
-            return count
+            future = executor.submit(self.task)
+            try:
+                count = future.result(timeout=self.timeout)
+            except TimeoutError as e:
+                msg = f"VectorDB load dataset timeout in {self.timeout}"
+                log.warning(msg)
+                for pid, _ in executor._processes.items():
+                    psutil.Process(pid).kill()
+                raise PerformanceTimeoutError(msg) from e
+            except Exception as e:
+                log.warning(f"VectorDB optimize error: {e}")
+                raise e from None
+            else:
+                return count
 
     def run_endlessness(self) -> int:
         """run forever util DB raises exception or crash"""
+        # datasets for load tests are quite small, can fit into memory
+        # only 1 file
+        data_df = [data_df for data_df in self.dataset][0]
+        all_embeddings, all_metadata = np.stack(data_df["emb"]).tolist(), data_df['id'].tolist()
+
         start_time = time.perf_counter()
         max_load_count, times = 0, 0
         try:
             with self.db.init():
                 self.db.ready_to_load()
-            while time.perf_counter() - start_time < config.CASE_TIMEOUT_IN_SECOND:
-                count = self.endless_insert_data(left_id=max_load_count)
+            while time.perf_counter() - start_time < self.timeout:
+                count = self.endless_insert_data(all_embeddings, all_metadata, left_id=max_load_count)
                 max_load_count += count
                 times += 1
                 log.info(f"Loaded {times} entire dataset, current max load counts={utils.numerize(max_load_count)}, {max_load_count}")
-            raise LoadTimeoutError("capacity case load timeout and stop")
-        except LoadTimeoutError as e:
-            log.info("load timetout, stop the load case")
-            raise e from None
         except Exception as e:
             log.info(f"Capacity case load reach limit, insertion counts={utils.numerize(max_load_count)}, {max_load_count}, err={e}")
             traceback.print_exc()
             return max_load_count
+        else:
+            msg = f"capacity case load timeout in {self.timeout}s"
+            log.info(msg)
+            raise LoadTimeoutError(msg)
 
     def run(self) -> int:
         count, dur = self._insert_all_batches()
diff --git a/vectordb_bench/backend/task_runner.py b/vectordb_bench/backend/task_runner.py
index f6ebbeed9..034770f30 100644
--- a/vectordb_bench/backend/task_runner.py
+++ b/vectordb_bench/backend/task_runner.py
@@ -1,4 +1,5 @@
 import logging
+import psutil
 import traceback
 import concurrent
 import numpy as np
@@ -7,7 +8,7 @@
 from . import utils
 from .cases import Case, CaseLabel
 from ..base import BaseModel
-from ..models import TaskConfig
+from ..models import TaskConfig, PerformanceTimeoutError
 
 from .clients import (
     api,
@@ -92,38 +93,37 @@ def run(self, drop_old: bool = True) -> Metric:
         self._pre_run(drop_old)
 
         if self.ca.label == CaseLabel.Load:
-            return self._run_load_case()
+            return self._run_capacity_case()
         elif self.ca.label == CaseLabel.Performance:
             return self._run_perf_case(drop_old)
         else:
-            log.warning(f"unknown case type: {self.ca.label}")
-            raise ValueError(f"Unknown case type: {self.ca.label}")
+            msg = f"unknown case type: {self.ca.label}"
+            log.warning(msg)
+            raise ValueError(msg)
 
-
-    def _run_load_case(self) -> Metric:
-        """ run load cases
+    def _run_capacity_case(self) -> Metric:
+        """ run capacity cases
 
         Returns:
             Metric: the max load count
         """
         log.info("Start capacity case")
-        # datasets for load tests are quite small, can fit into memory
-        # only 1 file
-        data_df = [data_df for data_df in self.ca.dataset][0]
-
-        all_embeddings, all_metadata = np.stack(data_df["emb"]).tolist(), data_df['id'].tolist()
-        runner = SerialInsertRunner(self.db, all_embeddings, all_metadata)
         try:
+            runner = SerialInsertRunner(self.db, self.ca.dataset, self.normalize, self.ca.load_timeout)
             count = runner.run_endlessness()
-            log.info(f"load reach limit: insertion counts={count}")
-            return Metric(max_load_count=count)
         except Exception as e:
-            log.warning(f"run capacity case error: {e}")
+            log.warning(f"Failed to run capacity case, reason = {e}")
             raise e from None
-        log.info("End capacity case")
-
+        else:
+            log.info(f"Capacity case loading dataset reaches VectorDB's limit: max capacity = {count}")
+            return Metric(max_load_count=count)
 
     def _run_perf_case(self, drop_old: bool = True) -> Metric:
+        """ run performance cases
+
+        Returns:
+            Metric: load_duration, recall, serial_latency_p99, and, qps
+        """
         try:
             m = Metric()
             if drop_old:
@@ -134,38 +134,24 @@ def _run_perf_case(self, drop_old: bool = True) -> Metric:
             self._init_search_runner()
             m.recall, m.serial_latency_p99 = self._serial_search()
             m.qps = self._conc_search()
-
-            log.info(f"got results: {m}")
-            return m
         except Exception as e:
-            log.warning(f"performance case run error: {e}")
+            log.warning(f"Failed to run performance case, reason = {e}")
             traceback.print_exc()
-            raise e
+            raise e from None
+        else:
+            log.info(f"Performance case got result: {m}")
+            return m
 
     @utils.time_it
     def _load_train_data(self):
         """Insert train data and get the insert_duration"""
-        for data_df in self.ca.dataset:
-            try:
-                all_metadata = data_df['id'].tolist()
-
-                emb_np = np.stack(data_df['emb'])
-                if self.normalize:
-                    log.debug("normalize the 100k train data")
-                    all_embeddings = emb_np / np.linalg.norm(emb_np, axis=1)[:, np.newaxis].tolist()
-                else:
-                    all_embeddings = emb_np.tolist()
-
-                del(emb_np)
-                log.debug(f"normalized size: {len(all_embeddings)}, {len(all_metadata)}")
-
-                runner = SerialInsertRunner(self.db, all_embeddings, all_metadata)
-                runner.run()
-            except Exception as e:
-                raise e from None
-            finally:
-                runner = None
-
+        try:
+            runner = SerialInsertRunner(self.db, self.ca.dataset, self.normalize, self.ca.load_timeout)
+            runner.run()
+        except Exception as e:
+            raise e from None
+        finally:
+            runner = None
 
     def _serial_search(self) -> tuple[float, float]:
         """Performance serial tests, search the entire test data once,
@@ -200,15 +186,20 @@ def _conc_search(self):
     def _task(self) -> None:
         """"""
         with self.db.init():
-            self.db.ready_to_search()
+            self.db.optimize()
 
     def _optimize(self) -> float:
         with concurrent.futures.ProcessPoolExecutor(max_workers=1) as executor:
             future = executor.submit(self._task)
             try:
-                return future.result()[1]
+                return future.result(timeout=self.ca.optimize_timeout)[1]
+            except TimeoutError as e:
+                log.warning(f"VectorDB optimize timeout in {self.ca.optimize_timeout}")
+                for pid, _ in executor._processes.items():
+                    psutil.Process(pid).kill()
+                raise PerformanceTimeoutError("Performance case optimize timeout") from e
             except Exception as e:
-                log.warning(f"VectorDB ready_to_search error: {e}")
+                log.warning(f"VectorDB optimize error: {e}")
                 raise e from None
 
     def _init_search_runner(self):
diff --git a/vectordb_bench/models.py b/vectordb_bench/models.py
index 821d2ff6d..bbd2f5010 100644
--- a/vectordb_bench/models.py
+++ b/vectordb_bench/models.py
@@ -167,20 +167,21 @@ def append_return(x, y):
         max_qps = max(map(len, [str(f.metrics.qps) for f in filtered_results])) + 3
         max_recall = max(map(len, [str(f.metrics.recall) for f in filtered_results])) + 3
 
-        max_db_labels = 8 if max_db_labels == 0 else max_db_labels
-        max_load_dur = 11 if max_load_dur == 0 else max_load_dur + 3
-        max_qps = 10 if max_qps == 0 else max_load_dur + 3
-        max_recall = 13 if max_recall == 0 else max_recall + 3
+        max_db_labels = 8 if max_db_labels < 8 else max_db_labels
+        max_load_dur = 11 if max_load_dur < 11 else max_load_dur
+        max_qps = 10 if max_qps < 10 else max_qps
+        max_recall = 13 if max_recall < 13 else max_recall
 
-        LENGTH = (max_db, max_db_labels, max_case, len(self.task_label), max_load_dur, max_qps, 15, max_recall, 14)
+        LENGTH = (max_db, max_db_labels, max_case, len(self.task_label), max_load_dur, max_qps, 15, max_recall, 14, 5)
 
         DATA_FORMAT = (
-            f"%-{max_db}s | %-{max_db_labels}s %-{max_case}s %-{len(self.task_label)}s "
-            f"| %-{max_load_dur}s %-{max_qps}s %-15s %-{max_recall}s %-14s"
+            f"%-{max_db}s | %-{max_db_labels}s %-{max_case}s %-{len(self.task_label)}s"
+            f" | %-{max_load_dur}s %-{max_qps}s %-15s %-{max_recall}s %-14s"
+            f" | %-5s"
         )
 
         TITLE = DATA_FORMAT % (
-            "DB", "db_label", "case", "label", "load_dur", "qps", "latency(p99)", "recall", "max_load_count")
+            "DB", "db_label", "case", "label", "load_dur", "qps", "latency(p99)", "recall", "max_load_count", "label")
         SPLIT = DATA_FORMAT%tuple(map(lambda x:"-"*x, LENGTH))
         SUMMERY_FORMAT = ("Task summery: run_id=%s, task_label=%s") % (self.run_id[:5], self.task_label)
         fmt = [SUMMERY_FORMAT, TITLE, SPLIT]
@@ -197,6 +198,7 @@ def append_return(x, y):
                 f.metrics.serial_latency_p99,
                 f.metrics.recall,
                 f.metrics.max_load_count,
+                f.label.value,
             ))
 
         tmp_logger = logging.getLogger("no_color")