Skip to content

Commit

Permalink
cleanup and fix torch.multiprocessing Queue exception handling
Browse files Browse the repository at this point in the history
  • Loading branch information
mmanzoorTT committed Dec 12, 2024
1 parent 83cf430 commit 0cdca23
Showing 1 changed file with 4 additions and 5 deletions.
9 changes: 4 additions & 5 deletions tt_torch/dynamo/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def compile_op(self, node, *inputs, **kwargs):
op.add_stable_hlo_graph(module.operation.get_asm())

# mp.set_start_method('spawn')
# mp.set_sharing_strategy('file_system')
mp.set_sharing_strategy("file_system")
torch.set_num_threads(1)
sender = mp.Queue()
receiver = mp.Queue()
Expand Down Expand Up @@ -287,7 +287,7 @@ def compile_op(self, node, *inputs, **kwargs):
ttnn_event.set()
op.compilation_status = OpCompilationStatus.CONVERTED_TO_TTNN
break
except mp.queues.Empty:
except Exception as e:
pass
if time.time() - start > self.compiler_config.single_op_timeout:
process.terminate()
Expand All @@ -312,7 +312,6 @@ def pre_process_inputs(self, *inputs):
return processed_inputs

def run_op(self, binary, *inputs):
print("run_op", file=sys.stderr)
inputs = self.pre_process_inputs(*inputs)
# mp.set_start_method('spawn')
# mp.set_sharing_strategy('file_system')
Expand All @@ -334,11 +333,11 @@ def run_op(self, binary, *inputs):
break
try:
# result = receiver.get_nowait()
result = receiver.get(timeout=0.01)
result = receiver.get(timeout=0.05)
outputs = result["outputs"]
exec_event.set()
break
except mp.queues.Empty:
except Exception as e:
pass
if time.time() - start > self.compiler_config.single_op_timeout:
process.terminate()
Expand Down

0 comments on commit 0cdca23

Please sign in to comment.