diff --git a/run/o2-sim-client.py b/run/o2-sim-client.py index 9e48d5f7c1b68..4a4a0618cb597 100755 --- a/run/o2-sim-client.py +++ b/run/o2-sim-client.py @@ -69,6 +69,11 @@ def getpids(name): service_pid = pids[0] +# check that sim process is actually alive +if not psutil.pid_exists(int(service_pid)): + print ("Could not find simulation service with PID " + str(service_pid) + " .. exiting") + exit (1) + controladdress="ipc:///tmp/o2sim-control-" + str(service_pid) message = args.command context = zmq.Context() @@ -104,6 +109,10 @@ def getSubscriptionAddresses(basepid): if re.match('O2SIM.*DONE', notification) != None: print ("Received DONE notification from server ... quitting", notification) batchdone = True + if re.match('O2SIM.*FAILURE', notification) != None: + print ("Service reported a failure ... unblocking this call") + batchdone = True + exit (1) exit (0) @@ -122,6 +131,7 @@ def getSubscriptionAddresses(basepid): serverok = False workerok = False mergerok = False + failure = False while not (serverok and workerok and mergerok): notification = incomingsocket.recv_string() print ("Received notification ", notification) @@ -131,7 +141,13 @@ def getSubscriptionAddresses(basepid): mergerok = True if re.match('PRIMSERVER.*AWAITING\sINPUT', notification) != None: serverok = True + if re.match('.*O2SIM.*FAILURE.*', notification) != None: + print ("Simservice reported failure ... exiting client") + failure = True + break + if failure: + exit (1) exit (0) exit (0) diff --git a/run/o2sim_parallel.cxx b/run/o2sim_parallel.cxx index cd5ac62cbff37..5520eaa446736 100644 --- a/run/o2sim_parallel.cxx +++ b/run/o2sim_parallel.cxx @@ -712,6 +712,7 @@ int main(int argc, char* argv[]) killpg(p, SIGTERM); // <--- makes sure to shutdown "unknown" child pids via the group property } LOG(error) << "SHUTTING DOWN DUE TO SIGNALED EXIT IN COMPONENT " << cpid; + o2::simpubsub::publishMessage(externalpublishchannel, o2::simpubsub::simStatusString("O2SIM", "STATE", "FAILURE")); errored = true; } }