Skip to content

Commit

Permalink
make a singleton thread pool executor for tinygrad since we always wa…
Browse files Browse the repository at this point in the history
…nt it to run on the same thread
  • Loading branch information
AlexCheema committed Jan 29, 2025
1 parent 9f6c688 commit 31b56e8
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion exo/inference/tinygrad/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,13 @@ def build_transformer(model_path: Path, shard: Shard, model_size="8B", device=No

return model

_executor = ThreadPoolExecutor(max_workers=1) # singleton so tinygrad always runs on the same thread
class TinygradDynamicShardInferenceEngine(InferenceEngine):
def __init__(self, shard_downloader: ShardDownloader):
self.shard = None
self.shard_downloader = shard_downloader
self.executor = ThreadPoolExecutor(max_workers=1)
self.states = OrderedDict()
self.executor = _executor

def poll_state(self, x, request_id: str, max_states=2):
if request_id not in self.states:
Expand Down

0 comments on commit 31b56e8

Please sign in to comment.