Skip to content

Commit

Permalink
Improve failure propagation/handling between sim service and control …
Browse files Browse the repository at this point in the history
  • Loading branch information
sawenzel committed Jun 22, 2023
1 parent 05aaaa9 commit 939bc04
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 0 deletions.
16 changes: 16 additions & 0 deletions run/o2-sim-client.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,11 @@ def getpids(name):
service_pid = pids[0]


# check that sim process is actually alive
if not psutil.pid_exists(int(service_pid)):
print ("Could not find simulation service with PID " + str(service_pid) + " .. exiting")
exit (1)

controladdress="ipc:///tmp/o2sim-control-" + str(service_pid)
message = args.command
context = zmq.Context()
Expand Down Expand Up @@ -104,6 +109,10 @@ def getSubscriptionAddresses(basepid):
if re.match('O2SIM.*DONE', notification) != None:
print ("Received DONE notification from server ... quitting", notification)
batchdone = True
if re.match('O2SIM.*FAILURE', notification) != None:
print ("Service reported a failure ... unblocking this call")
batchdone = True
exit (1)

exit (0)

Expand All @@ -122,6 +131,7 @@ def getSubscriptionAddresses(basepid):
serverok = False
workerok = False
mergerok = False
failure = False
while not (serverok and workerok and mergerok):
notification = incomingsocket.recv_string()
print ("Received notification ", notification)
Expand All @@ -131,7 +141,13 @@ def getSubscriptionAddresses(basepid):
mergerok = True
if re.match('PRIMSERVER.*AWAITING\sINPUT', notification) != None:
serverok = True
if re.match('.*O2SIM.*FAILURE.*', notification) != None:
print ("Simservice reported failure ... exiting client")
failure = True
break

if failure:
exit (1)
exit (0)

exit (0)
1 change: 1 addition & 0 deletions run/o2sim_parallel.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -712,6 +712,7 @@ int main(int argc, char* argv[])
killpg(p, SIGTERM); // <--- makes sure to shutdown "unknown" child pids via the group property
}
LOG(error) << "SHUTTING DOWN DUE TO SIGNALED EXIT IN COMPONENT " << cpid;
o2::simpubsub::publishMessage(externalpublishchannel, o2::simpubsub::simStatusString("O2SIM", "STATE", "FAILURE"));
errored = true;
}
}
Expand Down

0 comments on commit 939bc04

Please sign in to comment.