diff --git a/docs/resources/log.config b/docs/resources/log.config index f36fcb1914..4fd98dcb48 100644 --- a/docs/resources/log.config +++ b/docs/resources/log.config @@ -21,7 +21,7 @@ args=(sys.stdout,) class=FileHandler level=ERROR formatter=fullFormatter -args=('error.log', 'a') +args=('error_log.txt', 'a') [formatter_fullFormatter] format=%(asctime)s - %(name)s - %(levelname)s - %(message)s \ No newline at end of file diff --git a/nvflare/apis/fl_constant.py b/nvflare/apis/fl_constant.py index 3b609c9b0d..5d12a0e522 100644 --- a/nvflare/apis/fl_constant.py +++ b/nvflare/apis/fl_constant.py @@ -376,6 +376,7 @@ class WorkspaceConstants: DEFAULT_LOGGING_CONFIG = LOGGING_CONFIG + ".default" AUDIT_LOG = "audit.log" LOG_FILE_NAME = "log.txt" + ERROR_LOG_FILE_NAME = "error_log.txt" STATS_POOL_SUMMARY_FILE_NAME = "stats_pool_summary.json" STATS_POOL_RECORDS_FILE_NAME = "stats_pool_records.csv" diff --git a/nvflare/apis/workspace.py b/nvflare/apis/workspace.py index 9ec4b47d47..9cc4aba198 100644 --- a/nvflare/apis/workspace.py +++ b/nvflare/apis/workspace.py @@ -153,6 +153,9 @@ def get_app_dir(self, job_id: str) -> str: def get_app_log_file_path(self, job_id: str) -> str: return os.path.join(self.get_run_dir(job_id), WorkspaceConstants.LOG_FILE_NAME) + def get_app_error_log_file_path(self, job_id: str) -> str: + return os.path.join(self.get_run_dir(job_id), WorkspaceConstants.ERROR_LOG_FILE_NAME) + def get_app_config_dir(self, job_id: str) -> str: return os.path.join(self.get_app_dir(job_id), self.config_folder) diff --git a/nvflare/private/fed/app/simulator/log.config b/nvflare/private/fed/app/simulator/log.config index 3b85259cfd..19a5f1ab13 100644 --- a/nvflare/private/fed/app/simulator/log.config +++ b/nvflare/private/fed/app/simulator/log.config @@ -21,7 +21,7 @@ args=(sys.stdout,) class=FileHandler level=ERROR formatter=fullFormatter -args=('error.log', 'a') +args=('error_log.txt', 'a') [formatter_fullFormatter] format=%(asctime)s - %(name)s - %(levelname)s - %(message)s diff --git a/nvflare/private/fed/client/client_executor.py b/nvflare/private/fed/client/client_executor.py index 898dd2e93c..ac67006ddb 100644 --- a/nvflare/private/fed/client/client_executor.py +++ b/nvflare/private/fed/client/client_executor.py @@ -179,7 +179,7 @@ def start_app( thread = threading.Thread( target=self._wait_child_process_finish, - args=(client, job_id, allocated_resource, token, resource_manager, args.workspace), + args=(client, job_id, allocated_resource, token, resource_manager, args.workspace, fl_ctx), ) thread.start() @@ -384,7 +384,9 @@ def abort_task(self, job_id): ) self.logger.debug("abort_task sent") - def _wait_child_process_finish(self, client, job_id, allocated_resource, token, resource_manager, workspace): + def _wait_child_process_finish( + self, client, job_id, allocated_resource, token, resource_manager, workspace, fl_ctx + ): self.logger.info(f"run ({job_id}): waiting for child worker process to finish.") job_handle = self.run_processes.get(job_id, {}).get(RunProcessKey.JOB_HANDLE) if job_handle: @@ -393,6 +395,7 @@ def _wait_child_process_finish(self, client, job_id, allocated_resource, token, return_code = get_return_code(job_handle, job_id, workspace, self.logger) self.logger.info(f"run ({job_id}): child worker process finished with RC {return_code}") + if return_code in [ProcessExitCode.UNSAFE_COMPONENT, ProcessExitCode.CONFIG_ERROR]: request = new_cell_message( headers={}, @@ -419,6 +422,11 @@ def _wait_child_process_finish(self, client, job_id, allocated_resource, token, self.run_processes.pop(job_id, None) self.logger.debug(f"run ({job_id}): child worker resources freed.") + engine = fl_ctx.get_engine() + fl_ctx.set_prop(FLContextKey.CURRENT_JOB_ID, job_id, private=True, sticky=False) + fl_ctx.set_prop(FLContextKey.CLIENT_NAME, client.client_name, private=True, sticky=False) + engine.fire_event(EventType.JOB_COMPLETED, fl_ctx) + def get_status(self, job_id): process_status = self.run_processes.get(job_id, {}).get(RunProcessKey.STATUS, ClientStatus.STOPPED) return process_status diff --git a/nvflare/private/fed/utils/fed_utils.py b/nvflare/private/fed/utils/fed_utils.py index 6ffba97a4d..e62a092046 100644 --- a/nvflare/private/fed/utils/fed_utils.py +++ b/nvflare/private/fed/utils/fed_utils.py @@ -65,7 +65,7 @@ def add_logfile_handler(log_file: str): The purpose for this is to handle dynamic log file locations. If a handler named errorFileHandler is found, it will be used as a template to - create a new handler for writing to the error.log file at the same directory as log_file. + create a new handler for writing to the error log file at the same directory as log_file. The original errorFileHandler will be removed and replaced by the new handler. Each log file will be rotated when it reaches 20MB. @@ -90,7 +90,7 @@ def add_logfile_handler(log_file: str): if not configured_error_handler: return - error_log_file = os.path.join(os.path.dirname(log_file), "error.log") + error_log_file = os.path.join(os.path.dirname(log_file), WorkspaceConstants.ERROR_LOG_FILE_NAME) error_file_handler = RotatingFileHandler(error_log_file, maxBytes=20 * 1024 * 1024, backupCount=10) error_file_handler.setLevel(configured_error_handler.level) error_file_handler.setFormatter(configured_error_handler.formatter)