From 31b56e862f9d889e4ce7fb3bd524a4c2c3c3f976 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Wed, 29 Jan 2025 18:37:09 +0000 Subject: [PATCH] make a singleton thread pool executor for tinygrad since we always want it to run on the same thread --- exo/inference/tinygrad/inference.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/exo/inference/tinygrad/inference.py b/exo/inference/tinygrad/inference.py index 49b521053..8e336dce1 100644 --- a/exo/inference/tinygrad/inference.py +++ b/exo/inference/tinygrad/inference.py @@ -61,12 +61,13 @@ def build_transformer(model_path: Path, shard: Shard, model_size="8B", device=No return model +_executor = ThreadPoolExecutor(max_workers=1) # singleton so tinygrad always runs on the same thread class TinygradDynamicShardInferenceEngine(InferenceEngine): def __init__(self, shard_downloader: ShardDownloader): self.shard = None self.shard_downloader = shard_downloader - self.executor = ThreadPoolExecutor(max_workers=1) self.states = OrderedDict() + self.executor = _executor def poll_state(self, x, request_id: str, max_states=2): if request_id not in self.states: