From bf647a87b05077151a6f8fa030f8eea7ee606f3f Mon Sep 17 00:00:00 2001 From: Melody Zhang Date: Wed, 15 Feb 2023 11:27:11 -0500 Subject: [PATCH 01/70] added the track_operations.py file --- flow/hooks/track_operations.py | 65 ++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 flow/hooks/track_operations.py diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py new file mode 100644 index 000000000..28cdd601f --- /dev/null +++ b/flow/hooks/track_operations.py @@ -0,0 +1,65 @@ +# Copyright (c) 2018 The Regents of the University of Michigan +# All rights reserved. +# This software is licensed under the BSD 3-Clause License. +"""TO DO.""" +from signac import Collection + +try: + import git +except ImportError: + from .util import collect_metadata + + GIT = False +else: + from .git_util import collect_metadata_with_git as collect_metadata + + GIT = True + + +FN_LOGFILE = ".operations_log.txt" + + +class TrackOperations: + """TO DO.""" + + def __init__(self, strict_git=True): + if strict_git and not GIT: + raise RuntimeError( + "Unable to collect git metadata from the repository, " + "because the GitPython package is not installed.\n\n" + "You can use '{}(strict_git=False)' to ignore this " + "error.".format(type(self).__name__) + ) + self.strict_git = strict_git + + def log_operation(self, stage): + """TO DO.""" + + def _log_operation(operation, error=None): + if self.strict_git: + if git.Repo(operation.job._project.root_directory()).is_dirty(): + raise RuntimeError( + "Unable to reliably log operation, because the git repository in " + "the project root directory is dirty.\n\nMake sure to commit all " + "changes or ignore this warning by setting '{}(strict_git=False)'.".format( + type(self).__name__ + ) + ) + metadata = collect_metadata(operation) + + # Add additional execution-related information to metadata. + metadata["stage"] = stage + metadata["error"] = None if error is None else str(error) + + # Write metadata to collection inside job workspace. + with Collection.open(operation.job.fn(self._fn_logfile)) as logfile: + logfile.insert_one(metadata) + + return _log_operation + + def install_hooks(self, project): + """TO DO.""" + project.hooks.on_start.append(self.log_operation(stage="prior")) + return project + + __call__ = install_hooks From d330db1cf6f8cbcb6f56d857c23bc1d9ec903ac9 Mon Sep 17 00:00:00 2001 From: Melody Zhang Date: Wed, 15 Feb 2023 11:36:44 -0500 Subject: [PATCH 02/70] added util.py file --- flow/hooks/util.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 flow/hooks/util.py diff --git a/flow/hooks/util.py b/flow/hooks/util.py new file mode 100644 index 000000000..7333708d0 --- /dev/null +++ b/flow/hooks/util.py @@ -0,0 +1,25 @@ +# Copyright (c) 2018 The Regents of the University of Michigan +# All rights reserved. +# This software is licensed under the BSD 3-Clause License. +"""TO DO.""" +from datetime import datetime, timezone + + +def collect_metadata(operation): + """TO DO.""" + return { + # the metadata schema version: + "_schema_version": "1", + "time": datetime.now(timezone.utc).isoformat(), + "project": { + "path": operation.job._project.root_directory(), + # the project schema version: + "schema_version": operation.job._project.config.get("schema_version"), + }, + "job-operation": { + "name": operation.name, + "cmd": operation.cmd, + "directives": operation.directives, + "job_id": operation.job.get_id(), + }, + } From e15f0be68c01c89d68d1851644b36ca2088feed8 Mon Sep 17 00:00:00 2001 From: Melody Zhang Date: Wed, 15 Feb 2023 12:51:09 -0500 Subject: [PATCH 03/70] modified track_operations.py --- flow/hooks/track_operations.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index 28cdd601f..92b50cc76 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -52,14 +52,28 @@ def _log_operation(operation, error=None): metadata["error"] = None if error is None else str(error) # Write metadata to collection inside job workspace. - with Collection.open(operation.job.fn(self._fn_logfile)) as logfile: + with Collection.open(operation.job.fn(FN_LOGFILE)) as logfile: logfile.insert_one(metadata) return _log_operation - def install_hooks(self, project): + def on_start(self, operation, job): """TO DO.""" - project.hooks.on_start.append(self.log_operation(stage="prior")) + self.log_operation(stage="prior")(operation) + + def on_success(self, operation, job): + """TO DO.""" + self.log_operation(stage="after")(operation) + + def on_exception(self, operation, error, job): + """TO DO.""" + self.log_operation(stage="after")(operation, error) + + def install_project_hooks(self, project): + """TO DO.""" + project.project_hooks.on_start.append(self.on_start) + project.project_hooks.on_success.append(self.on_success) + project.project_hooks.on_exception.append(self.on_exception) return project - __call__ = install_hooks + __call__ = install_project_hooks From f18c07ef314b11d798dd28395c3792bc89692cb1 Mon Sep 17 00:00:00 2001 From: Kelly Wang Date: Wed, 1 Mar 2023 12:14:20 -0500 Subject: [PATCH 04/70] Updated code for modern signac-flow api. --- flow/hooks/__init__.py | 3 ++- flow/hooks/track_operations.py | 14 +++++++------- flow/hooks/util.py | 26 ++++++++++++++++++-------- 3 files changed, 27 insertions(+), 16 deletions(-) diff --git a/flow/hooks/__init__.py b/flow/hooks/__init__.py index bba7812f1..99fafac5a 100644 --- a/flow/hooks/__init__.py +++ b/flow/hooks/__init__.py @@ -3,5 +3,6 @@ # This software is licensed under the BSD 3-Clause License. """Operation hooks.""" from .hooks import _Hooks +from .track_operations import TrackOperations -__all__ = ["_Hooks"] +__all__ = ["_Hooks", "TrackOperations"] diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index 92b50cc76..c5623877c 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -35,9 +35,9 @@ def __init__(self, strict_git=True): def log_operation(self, stage): """TO DO.""" - def _log_operation(operation, error=None): + def _log_operation(operation, job, error=None): if self.strict_git: - if git.Repo(operation.job._project.root_directory()).is_dirty(): + if git.Repo(job._project.root_directory()).is_dirty(): raise RuntimeError( "Unable to reliably log operation, because the git repository in " "the project root directory is dirty.\n\nMake sure to commit all " @@ -45,29 +45,29 @@ def _log_operation(operation, error=None): type(self).__name__ ) ) - metadata = collect_metadata(operation) + metadata = collect_metadata(operation, job) # Add additional execution-related information to metadata. metadata["stage"] = stage metadata["error"] = None if error is None else str(error) # Write metadata to collection inside job workspace. - with Collection.open(operation.job.fn(FN_LOGFILE)) as logfile: + with Collection.open(job.fn(FN_LOGFILE)) as logfile: logfile.insert_one(metadata) return _log_operation def on_start(self, operation, job): """TO DO.""" - self.log_operation(stage="prior")(operation) + self.log_operation(stage="prior")(operation, job) def on_success(self, operation, job): """TO DO.""" - self.log_operation(stage="after")(operation) + self.log_operation(stage="after")(operation, job) def on_exception(self, operation, error, job): """TO DO.""" - self.log_operation(stage="after")(operation, error) + self.log_operation(stage="after")(operation, job, error) def install_project_hooks(self, project): """TO DO.""" diff --git a/flow/hooks/util.py b/flow/hooks/util.py index 7333708d0..8eb3dcfad 100644 --- a/flow/hooks/util.py +++ b/flow/hooks/util.py @@ -5,21 +5,31 @@ from datetime import datetime, timezone -def collect_metadata(operation): - """TO DO.""" +def collect_metadata(operation, job): + """TODO. + + We can no longer track the following + because we take in the operation name as a string + rather than as an object, but I think this is + still super useful information. + + Should we just drop it or see if there's still some + way to access this info? + + "cmd": operation.cmd, + "directives": operation.directives, + """ return { # the metadata schema version: "_schema_version": "1", "time": datetime.now(timezone.utc).isoformat(), "project": { - "path": operation.job._project.root_directory(), + "path": job._project.root_directory(), # the project schema version: - "schema_version": operation.job._project.config.get("schema_version"), + "schema_version": job._project.config.get("schema_version"), }, "job-operation": { - "name": operation.name, - "cmd": operation.cmd, - "directives": operation.directives, - "job_id": operation.job.get_id(), + "name": operation, + "job_id": job.id, }, } From ef2751e3fc68386d429b1a3357c14631049fa6a1 Mon Sep 17 00:00:00 2001 From: Melody Zhang Date: Thu, 9 Mar 2023 16:57:02 -0500 Subject: [PATCH 05/70] modified __init__.py, util.py, track_operations.py files --- flow/hooks/__init__.py | 3 ++- flow/hooks/track_operations.py | 32 +++++++++++++++++++++++++------- flow/hooks/util.py | 12 +++++------- 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/flow/hooks/__init__.py b/flow/hooks/__init__.py index bba7812f1..99fafac5a 100644 --- a/flow/hooks/__init__.py +++ b/flow/hooks/__init__.py @@ -3,5 +3,6 @@ # This software is licensed under the BSD 3-Clause License. """Operation hooks.""" from .hooks import _Hooks +from .track_operations import TrackOperations -__all__ = ["_Hooks"] +__all__ = ["_Hooks", "TrackOperations"] diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index 92b50cc76..491eaad14 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -35,9 +35,9 @@ def __init__(self, strict_git=True): def log_operation(self, stage): """TO DO.""" - def _log_operation(operation, error=None): + def _log_operation(operation, job, error=None): if self.strict_git: - if git.Repo(operation.job._project.root_directory()).is_dirty(): + if git.Repo(job._project.root_directory()).is_dirty(): raise RuntimeError( "Unable to reliably log operation, because the git repository in " "the project root directory is dirty.\n\nMake sure to commit all " @@ -45,29 +45,47 @@ def _log_operation(operation, error=None): type(self).__name__ ) ) - metadata = collect_metadata(operation) + metadata = collect_metadata(operation, job) # Add additional execution-related information to metadata. metadata["stage"] = stage metadata["error"] = None if error is None else str(error) # Write metadata to collection inside job workspace. - with Collection.open(operation.job.fn(FN_LOGFILE)) as logfile: + with Collection.open(job.fn(FN_LOGFILE)) as logfile: logfile.insert_one(metadata) return _log_operation def on_start(self, operation, job): """TO DO.""" - self.log_operation(stage="prior")(operation) + self.log_operation(stage="prior")(operation, job) def on_success(self, operation, job): """TO DO.""" - self.log_operation(stage="after")(operation) + self.log_operation(stage="after")(operation, job) def on_exception(self, operation, error, job): """TO DO.""" - self.log_operation(stage="after")(operation, error) + self.log_operation(stage="after")(operation, job, error) + + def install_operation_hooks(self, op, project_cls=None): + """TO DO. + + Parameters + ---------- + op : function or type + An operation function to log or a subclass of `flow.FlowProject` if ``project_cls`` is + ``None``. + project_cls : type + A subclass of `flow.FlowProject`. + """ + if project_cls is None: + return lambda func: self.install_operation_hooks(func, op) + project_cls.operation_hooks.on_start(self.on_start)(op) + project_cls.operation_hooks.on_success(self.on_success)(op) + project_cls.operation_hooks.on_exception(self.on_exception)(op) + return op def install_project_hooks(self, project): """TO DO.""" diff --git a/flow/hooks/util.py b/flow/hooks/util.py index 7333708d0..ffa0cf0e3 100644 --- a/flow/hooks/util.py +++ b/flow/hooks/util.py @@ -5,21 +5,19 @@ from datetime import datetime, timezone -def collect_metadata(operation): +def collect_metadata(operation, job): """TO DO.""" return { # the metadata schema version: "_schema_version": "1", "time": datetime.now(timezone.utc).isoformat(), "project": { - "path": operation.job._project.root_directory(), + "path": job._project.path, # the project schema version: - "schema_version": operation.job._project.config.get("schema_version"), + "schema_version": job._project.config.get("schema_version"), }, "job-operation": { - "name": operation.name, - "cmd": operation.cmd, - "directives": operation.directives, - "job_id": operation.job.get_id(), + "name": operation, + "job_id": job.id, }, } From 9c648214d641229f7e4c5be38d6d15c8a19b967f Mon Sep 17 00:00:00 2001 From: Kelly Wang Date: Wed, 29 Mar 2023 11:14:10 -0400 Subject: [PATCH 06/70] Flake8 changes. --- tests/test_project.py | 61 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/tests/test_project.py b/tests/test_project.py index 0a2c184cb..ceebd190f 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -2,6 +2,7 @@ # All rights reserved. # This software is licensed under the BSD 3-Clause License. import datetime +import json import logging import os import subprocess @@ -18,6 +19,7 @@ from define_aggregate_test_project import _AggregateTestProject from define_dag_test_project import DagTestProject from define_directives_test_project import _DirectivesTestProject +from define_hooks_track_operations_project import _HooksTrackOperations from define_test_project import _DynamicTestProject, _TestProject from deprecation import fail_if_not_removed @@ -2435,6 +2437,65 @@ def test_raise_exception_in_hook_cmd(self): assert "RuntimeError" in error_output +class TestHooksTrackOperations(TestHooksSetUp): + project_class = _HooksTrackOperations + entrypoint = dict( + path=os.path.realpath( + os.path.join( + os.path.dirname(__file__), "define_hooks_track_operations_project.py" + ) + ) + ) + log_fname = ".operations_log.txt" + + @pytest.fixture( + params=[ + "strict_git_false", + # "strict_git_false_cmd" + ] + ) + def operation_name(self, request): + return request.param + + def mock_project(self): + project = self.project_class.get_project(root=self._tmp_dir.name) + project.open_job(dict(raise_exception=False)).init() + project.open_job(dict(raise_exception=True)).init() + project = project.get_project(root=self._tmp_dir.name) + return project + + def split_log(self, job): + with open(job.fn(self.log_fname)) as f: + values = f.read().split("\n") + if values[-1] == "": + values = values[:-1] + return values + + def test_on_start(self, project, job, operation_name): + assert not job.isfile(self.log_fname) + + if job.sp.raise_exception: + with pytest.raises(subprocess.CalledProcessError): + self.call_subcmd(f"run -o {operation_name} -j {job.id}") + else: + self.call_subcmd(f"run -o {operation_name} -j {job.id}") + + assert job.isfile(self.log_fname) + values = self.split_log(job) + + # There will always be 4 entries in this file because + # for every operation, we install the following hooks + # at the operation and project level: + # (1) on_start and (2) on_exception / on_success + assert len(values) == 4 + for value in values[:2]: + metadata = json.loads(value) + assert metadata["stage"] == "prior" + job_op_metadata = metadata["job-operation"] + assert job_op_metadata["job_id"] == job.id + assert job_op_metadata["name"] == operation_name + + class TestIgnoreConditions: def test_str(self): expected_results = { From c151c00545a24df4f23d243e9f3beb6a426c7d01 Mon Sep 17 00:00:00 2001 From: Kelly Wang Date: Wed, 29 Mar 2023 11:58:22 -0400 Subject: [PATCH 07/70] Added more tests to make sure metadata for on start matches with expectationl . --- tests/test_project.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/test_project.py b/tests/test_project.py index ceebd190f..2d7a9d00b 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -2474,6 +2474,7 @@ def split_log(self, job): def test_on_start(self, project, job, operation_name): assert not job.isfile(self.log_fname) + time = datetime.datetime.now(datetime.timezone.utc) if job.sp.raise_exception: with pytest.raises(subprocess.CalledProcessError): self.call_subcmd(f"run -o {operation_name} -j {job.id}") @@ -2491,6 +2492,19 @@ def test_on_start(self, project, job, operation_name): for value in values[:2]: metadata = json.loads(value) assert metadata["stage"] == "prior" + # I think job._project.path gives us a relative path in the test, while + # job._project.path in hooks.utils.collect_metadata gives us the absolute path + # I don't know why this is happening. + assert job._project.path in metadata["project"]["path"] + assert metadata["project"]["schema_version"] == job._project.config.get( + "schema_version" + ) + # Just assumed that the operation should start within 5 minutes + # from where we recorded the time line 2477. This seems generous and + # we can adjust it if needed. + start_time = datetime.datetime.fromisoformat(metadata["time"]) + difference = start_time - time + assert difference.seconds < 5 * 60 job_op_metadata = metadata["job-operation"] assert job_op_metadata["job_id"] == job.id assert job_op_metadata["name"] == operation_name From a2f7e7fcf291eb69d7877c1bbc3501e718338cf1 Mon Sep 17 00:00:00 2001 From: Kelly Wang Date: Wed, 29 Mar 2023 12:12:47 -0400 Subject: [PATCH 08/70] Added test for success and exception hooks. --- .../define_hooks_track_operations_project.py | 62 +++++++++++++++++++ tests/test_project.py | 28 ++++++++- 2 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 tests/define_hooks_track_operations_project.py diff --git a/tests/define_hooks_track_operations_project.py b/tests/define_hooks_track_operations_project.py new file mode 100644 index 000000000..0f6fbfd20 --- /dev/null +++ b/tests/define_hooks_track_operations_project.py @@ -0,0 +1,62 @@ +from define_hooks_test_project import HOOKS_ERROR_MESSAGE + +from flow import FlowProject +from flow.hooks import TrackOperations + + +class _HooksTrackOperations(FlowProject): + pass + + +@_HooksTrackOperations.operation_hooks.on_start( + TrackOperations(strict_git=False).on_start +) +@_HooksTrackOperations.operation_hooks.on_exception( + TrackOperations(strict_git=False).on_exception +) +@_HooksTrackOperations.operation_hooks.on_success( + TrackOperations(strict_git=False).on_success +) +@_HooksTrackOperations.operation +def strict_git_false(job): + if job.sp.raise_exception: + raise RuntimeError(HOOKS_ERROR_MESSAGE) + + +@_HooksTrackOperations.operation_hooks.on_start( + TrackOperations(strict_git=False).on_start +) +@_HooksTrackOperations.operation_hooks.on_exception( + TrackOperations(strict_git=False).on_exception +) +@_HooksTrackOperations.operation_hooks.on_success( + TrackOperations(strict_git=False).on_success +) +@_HooksTrackOperations.operation(cmd=True, with_job=True) +def strict_git_false_cmd(job): + if job.sp.raise_exception: + return "exit 42" + else: + return "touch base_cmd.txt" + + +@TrackOperations(strict_git=False).install_operation_hooks(_HooksTrackOperations) +@_HooksTrackOperations.operation +def strict_git_false_install(job): + if job.sp.raise_exception: + raise RuntimeError(HOOKS_ERROR_MESSAGE) + + +@TrackOperations(strict_git=False).install_operation_hooks(_HooksTrackOperations) +@_HooksTrackOperations.operation(cmd=True, with_job=True) +def strict_git_false_install_cmd(job): + if job.sp.raise_exception: + return "exit 42" + else: + return "touch base_cmd.txt" + + +if __name__ == "__main__": + TrackOperations(strict_git=False).install_project_hooks( + _HooksTrackOperations() + ).main() diff --git a/tests/test_project.py b/tests/test_project.py index 2d7a9d00b..aa377eda5 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -2471,7 +2471,7 @@ def split_log(self, job): values = values[:-1] return values - def test_on_start(self, project, job, operation_name): + def test_metadata(self, project, job, operation_name): assert not job.isfile(self.log_fname) time = datetime.datetime.now(datetime.timezone.utc) @@ -2509,6 +2509,32 @@ def test_on_start(self, project, job, operation_name): assert job_op_metadata["job_id"] == job.id assert job_op_metadata["name"] == operation_name + for value in values[2:]: + metadata = json.loads(value) + assert metadata["stage"] == "after" + # I think job._project.path gives us a relative path in the test, while + # job._project.path in hooks.utils.collect_metadata gives us the absolute path + # I don't know why this is happening. + assert job._project.path in metadata["project"]["path"] + assert metadata["project"]["schema_version"] == job._project.config.get( + "schema_version" + ) + # Just assumed that the operation should start within 5 minutes + # from where we recorded the time line 2477. This seems generous and + # we can adjust it if needed. + start_time = datetime.datetime.fromisoformat(metadata["time"]) + difference = start_time - time + assert difference.seconds < 5 * 60 + + if job.sp.raise_exception: + assert metadata["error"] == self.error_message + else: + assert metadata["error"] is None + + job_op_metadata = metadata["job-operation"] + assert job_op_metadata["job_id"] == job.id + assert job_op_metadata["name"] == operation_name + class TestIgnoreConditions: def test_str(self): From bb2b8ad3d11afec8b64bdf8e6418fd48513891a4 Mon Sep 17 00:00:00 2001 From: Kelly Wang Date: Wed, 5 Apr 2023 13:21:54 -0400 Subject: [PATCH 09/70] Added git tracking files from execution-hooks branch. --- flow/hooks/git_util.py | 18 +++++ flow/hooks/git_workspace_tracking.py | 105 +++++++++++++++++++++++++++ 2 files changed, 123 insertions(+) create mode 100644 flow/hooks/git_util.py create mode 100644 flow/hooks/git_workspace_tracking.py diff --git a/flow/hooks/git_util.py b/flow/hooks/git_util.py new file mode 100644 index 000000000..c79098192 --- /dev/null +++ b/flow/hooks/git_util.py @@ -0,0 +1,18 @@ +# Copyright (c) 2018 The Regents of the University of Michigan +# All rights reserved. +# This software is licensed under the BSD 3-Clause License. +"""TO DO.""" +import git + +from .util import collect_metadata + + +def collect_metadata_with_git(operation): + """TODO.""" + repo = git.Repo(operation.job._project.root_directory()) + metadata = collect_metadata(operation) + metadata["project"]["git"] = { + "commit_id": str(repo.commit()), + "dirty": repo.is_dirty(), + } + return metadata diff --git a/flow/hooks/git_workspace_tracking.py b/flow/hooks/git_workspace_tracking.py new file mode 100644 index 000000000..4fc90e7ca --- /dev/null +++ b/flow/hooks/git_workspace_tracking.py @@ -0,0 +1,105 @@ +# Copyright (c) 2018 The Regents of the University of Michigan +# All rights reserved. +# This software is licensed under the BSD 3-Clause License. +"""TO DO.""" +import json +import logging +import os +from collections import defaultdict + +import git + +from .git_util import collect_metadata_with_git as collect_metadata + +logger = logging.getLogger("git_tracking") + +GIT_COMMIT_MSG = """{title} + +*This commit was auto-generated.* +""" + + +_WARNINGS = defaultdict(set) + + +def _git_ignore(root, entries): + """Do not commit hidden files to the git repository.""" + fn_ignore = os.path.join(root, ".gitignore") + with open(fn_ignore, mode="a+") as file: + lines = file.readlines() + for entry in entries: + if entry not in lines: + logger.info(f"Adding '{entry}' to '{fn_ignore}'.") + file.write(entry + "\n") + + +def _git_ignored(root): + fn_ignore = os.path.join(root, ".gitignore") + with open(fn_ignore) as file: + return file.readlines() + + +def _commit(repo, title): + try: + repo.git.commit("-m", GIT_COMMIT_MSG.format(title=title)) + except git.exc.GitCommandError as error: + if "nothing to commit, working tree clean" in str(error): + pass + else: + raise + + +class GitWorkspace: + """Track the workspace state with git.""" + + def __init__(self, jointly=True, ignore=None): + self._jointly = jointly + self._ignore = ignore + + def _get_repo(self, operation): + if self._jointly: + root = operation.job._project.workspace() + else: + root = operation.job.workspace() + + try: + return git.Repo(root) + except git.exc.InvalidGitRepositoryError: + logger.info(f"Initializing git repo for '{root}'") + if self._ignore: + _git_ignore(root, self._ignore) + return git.Repo.init(root) + + def commit_before(self, operation): + """TODO.""" + metadata = collect_metadata(operation) + metadata["stage"] = "prior" + repo = self._get_repo(operation) + repo.git.add(A=True) + _commit(repo, f"Before executing operation {operation}.") + + def commit_after(self, operation, error=None): + """TODO.""" + metadata = collect_metadata(operation) + metadata["stage"] = "post" + metadata["error"] = None if error is None else str(error) + repo = self._get_repo(operation) + repo.git.add(A=True) + if error: + _commit( + repo, + "Executed operation {}.\n\nThe execution failed " + "with error '{}'.".format(operation, error), + ) + else: + _commit(repo, f"Executed operation {operation}.") + repo.git.notes("append", repo.commit(), "-m", "signac:" + json.dumps(metadata)) + + def install_hooks(self, project): + """TODO.""" + project.hooks.on_start.append(self.commit_before) + project.hooks.on_success.append(self.commit_after) + project.hooks.on_fail.append(self.commit_after) + return project + + __call__ = install_hooks From b36ca2732ca1156e9c673949a1a499870a159963 Mon Sep 17 00:00:00 2001 From: Kelly Wang Date: Wed, 5 Apr 2023 13:27:22 -0400 Subject: [PATCH 10/70] Fixed error and started work on git tests. --- flow/hooks/track_operations.py | 7 +-- .../define_hooks_track_operations_project.py | 44 +++++++++++++++++++ tests/test_project.py | 42 ++++++++++++------ 3 files changed, 76 insertions(+), 17 deletions(-) diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index 491eaad14..a0f60b2e3 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -2,7 +2,7 @@ # All rights reserved. # This software is licensed under the BSD 3-Clause License. """TO DO.""" -from signac import Collection +import json try: import git @@ -52,8 +52,9 @@ def _log_operation(operation, job, error=None): metadata["error"] = None if error is None else str(error) # Write metadata to collection inside job workspace. - with Collection.open(job.fn(FN_LOGFILE)) as logfile: - logfile.insert_one(metadata) + with open(job.fn(FN_LOGFILE), "a") as logfile: + # HELP: Is there any reason we used Collections instead of a regular dictionary? + logfile.write(json.dumps(metadata) + "\n") return _log_operation diff --git a/tests/define_hooks_track_operations_project.py b/tests/define_hooks_track_operations_project.py index 0f6fbfd20..7112386c0 100644 --- a/tests/define_hooks_track_operations_project.py +++ b/tests/define_hooks_track_operations_project.py @@ -8,6 +8,11 @@ class _HooksTrackOperations(FlowProject): pass +""" +Strict Git False +""" + + @_HooksTrackOperations.operation_hooks.on_start( TrackOperations(strict_git=False).on_start ) @@ -56,6 +61,45 @@ def strict_git_false_install_cmd(job): return "touch base_cmd.txt" +""" +Strict Git True +""" + + +""" +@_HooksTrackOperations.operation_hooks.on_start( + TrackOperations(strict_git=True).on_start +) +@_HooksTrackOperations.operation_hooks.on_exception( + TrackOperations(strict_git=True).on_exception +) +@_HooksTrackOperations.operation_hooks.on_success( + TrackOperations(strict_git=True).on_success +) +@_HooksTrackOperations.operation +def strict_git_true(job): + if job.sp.raise_exception: + raise RuntimeError(HOOKS_ERROR_MESSAGE) + + +@_HooksTrackOperations.operation_hooks.on_start( + TrackOperations(strict_git=True).on_start +) +@_HooksTrackOperations.operation_hooks.on_exception( + TrackOperations(strict_git=True).on_exception +) +@_HooksTrackOperations.operation_hooks.on_success( + TrackOperations(strict_git=True).on_success +) +@_HooksTrackOperations.operation(cmd=True, with_job=True) +def strict_git_true_cmd(job): + if job.sp.raise_exception: + return "exit 42" + else: + return "touch base_cmd.txt" +""" + + if __name__ == "__main__": TrackOperations(strict_git=False).install_project_hooks( _HooksTrackOperations() diff --git a/tests/test_project.py b/tests/test_project.py index 1f4c14ec7..aaf7ea6e6 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -13,13 +13,13 @@ from itertools import groupby import define_hooks_test_project +import define_hooks_track_operations_project import pytest import signac from conftest import MockScheduler, TestProjectBase from define_aggregate_test_project import _AggregateTestProject from define_dag_test_project import DagTestProject from define_directives_test_project import _DirectivesTestProject -from define_hooks_track_operations_project import _HooksTrackOperations from define_test_project import _DynamicTestProject, _TestProject from deprecation import fail_if_not_removed @@ -40,6 +40,18 @@ _switch_to_directory, ) +""" +try: + import git + + skip_git = False +except ImportError: + skip_git = True + + +git_mark_skipif = pytest.mark.skipif(skip_git, reason="git could not be imported") +""" + @contextmanager def suspend_logging(): @@ -2437,8 +2449,9 @@ def test_raise_exception_in_hook_cmd(self): assert "RuntimeError" in error_output +# @git_mark_skipif class TestHooksTrackOperations(TestHooksSetUp): - project_class = _HooksTrackOperations + project_class = define_hooks_track_operations_project._HooksTrackOperations entrypoint = dict( path=os.path.realpath( os.path.join( @@ -2450,20 +2463,16 @@ class TestHooksTrackOperations(TestHooksSetUp): @pytest.fixture( params=[ - "strict_git_false", - # "strict_git_false_cmd" + ( + "strict_git_false", + define_hooks_track_operations_project.HOOKS_ERROR_MESSAGE, + ), + ("strict_git_false_cmd", "non-zero exit status 42"), ] ) - def operation_name(self, request): + def operation_info(self, request): return request.param - def mock_project(self): - project = self.project_class.get_project(root=self._tmp_dir.name) - project.open_job(dict(raise_exception=False)).init() - project.open_job(dict(raise_exception=True)).init() - project = project.get_project(root=self._tmp_dir.name) - return project - def split_log(self, job): with open(job.fn(self.log_fname)) as f: values = f.read().split("\n") @@ -2471,7 +2480,8 @@ def split_log(self, job): values = values[:-1] return values - def test_metadata(self, project, job, operation_name): + def test_metadata(self, project, job, operation_info): + operation_name, error_message = operation_info assert not job.isfile(self.log_fname) time = datetime.datetime.now(datetime.timezone.utc) @@ -2527,7 +2537,8 @@ def test_metadata(self, project, job, operation_name): assert difference.seconds < 5 * 60 if job.sp.raise_exception: - assert metadata["error"] == self.error_message + assert error_message in metadata["error"] + else: assert metadata["error"] is None @@ -2535,6 +2546,9 @@ def test_metadata(self, project, job, operation_name): assert job_op_metadata["job_id"] == job.id assert job_op_metadata["name"] == operation_name + def test_strict_git(): + pass + class TestIgnoreConditions: def test_str(self): From d649690f0dc89b817be8e95d16f9409f57faa5df Mon Sep 17 00:00:00 2001 From: Kelly Wang Date: Thu, 13 Apr 2023 11:35:09 -0400 Subject: [PATCH 11/70] Fixed git util file. --- flow/hooks/git_util.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flow/hooks/git_util.py b/flow/hooks/git_util.py index c79098192..a9c9fe414 100644 --- a/flow/hooks/git_util.py +++ b/flow/hooks/git_util.py @@ -7,10 +7,10 @@ from .util import collect_metadata -def collect_metadata_with_git(operation): +def collect_metadata_with_git(operation, job): """TODO.""" - repo = git.Repo(operation.job._project.root_directory()) - metadata = collect_metadata(operation) + repo = git.Repo(job._project.root_directory()) + metadata = collect_metadata(operation, job) metadata["project"]["git"] = { "commit_id": str(repo.commit()), "dirty": repo.is_dirty(), From 90dcaee181047e6edd026441c595de7ead86e6cd Mon Sep 17 00:00:00 2001 From: Kelly Wang Date: Thu, 13 Apr 2023 11:59:37 -0400 Subject: [PATCH 12/70] Ignore tests that require git if git is not installed. --- .../define_hooks_track_operations_project.py | 65 ++++++++++--------- tests/test_project.py | 9 +-- 2 files changed, 36 insertions(+), 38 deletions(-) diff --git a/tests/define_hooks_track_operations_project.py b/tests/define_hooks_track_operations_project.py index 7112386c0..a52e77fa7 100644 --- a/tests/define_hooks_track_operations_project.py +++ b/tests/define_hooks_track_operations_project.py @@ -66,38 +66,39 @@ def strict_git_false_install_cmd(job): """ -""" -@_HooksTrackOperations.operation_hooks.on_start( - TrackOperations(strict_git=True).on_start -) -@_HooksTrackOperations.operation_hooks.on_exception( - TrackOperations(strict_git=True).on_exception -) -@_HooksTrackOperations.operation_hooks.on_success( - TrackOperations(strict_git=True).on_success -) -@_HooksTrackOperations.operation -def strict_git_true(job): - if job.sp.raise_exception: - raise RuntimeError(HOOKS_ERROR_MESSAGE) - - -@_HooksTrackOperations.operation_hooks.on_start( - TrackOperations(strict_git=True).on_start -) -@_HooksTrackOperations.operation_hooks.on_exception( - TrackOperations(strict_git=True).on_exception -) -@_HooksTrackOperations.operation_hooks.on_success( - TrackOperations(strict_git=True).on_success -) -@_HooksTrackOperations.operation(cmd=True, with_job=True) -def strict_git_true_cmd(job): - if job.sp.raise_exception: - return "exit 42" - else: - return "touch base_cmd.txt" -""" +try: + @_HooksTrackOperations.operation_hooks.on_start( + TrackOperations(strict_git=True).on_start + ) + @_HooksTrackOperations.operation_hooks.on_exception( + TrackOperations(strict_git=True).on_exception + ) + @_HooksTrackOperations.operation_hooks.on_success( + TrackOperations(strict_git=True).on_success + ) + @_HooksTrackOperations.operation + def strict_git_true(job): + if job.sp.raise_exception: + raise RuntimeError(HOOKS_ERROR_MESSAGE) + + + @_HooksTrackOperations.operation_hooks.on_start( + TrackOperations(strict_git=True).on_start + ) + @_HooksTrackOperations.operation_hooks.on_exception( + TrackOperations(strict_git=True).on_exception + ) + @_HooksTrackOperations.operation_hooks.on_success( + TrackOperations(strict_git=True).on_success + ) + @_HooksTrackOperations.operation(cmd=True, with_job=True) + def strict_git_true_cmd(job): + if job.sp.raise_exception: + return "exit 42" + else: + return "touch base_cmd.txt" +except RuntimeError: + pass if __name__ == "__main__": diff --git a/tests/test_project.py b/tests/test_project.py index aaf7ea6e6..acd27fca2 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -40,17 +40,14 @@ _switch_to_directory, ) -""" try: import git - skip_git = False except ImportError: skip_git = True git_mark_skipif = pytest.mark.skipif(skip_git, reason="git could not be imported") -""" @contextmanager @@ -2449,7 +2446,6 @@ def test_raise_exception_in_hook_cmd(self): assert "RuntimeError" in error_output -# @git_mark_skipif class TestHooksTrackOperations(TestHooksSetUp): project_class = define_hooks_track_operations_project._HooksTrackOperations entrypoint = dict( @@ -2546,8 +2542,9 @@ def test_metadata(self, project, job, operation_info): assert job_op_metadata["job_id"] == job.id assert job_op_metadata["name"] == operation_name - def test_strict_git(): - pass + @git_mark_skipif + def test_strict_git(self, project): + pass class TestIgnoreConditions: From edd8fdedd130b5f960e4f32003bca08878a8f7b6 Mon Sep 17 00:00:00 2001 From: Kelly Wang Date: Thu, 13 Apr 2023 12:07:28 -0400 Subject: [PATCH 13/70] Changed operation name in pytest for strict git false. --- tests/test_project.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_project.py b/tests/test_project.py index acd27fca2..188c997e3 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -2466,7 +2466,7 @@ class TestHooksTrackOperations(TestHooksSetUp): ("strict_git_false_cmd", "non-zero exit status 42"), ] ) - def operation_info(self, request): + def strict_git_false_operation_info(self, request): return request.param def split_log(self, job): @@ -2476,8 +2476,8 @@ def split_log(self, job): values = values[:-1] return values - def test_metadata(self, project, job, operation_info): - operation_name, error_message = operation_info + def test_metadata(self, project, job, strict_git_false_operation_info): + operation_name, error_message = strict_git_false_operation_info assert not job.isfile(self.log_fname) time = datetime.datetime.now(datetime.timezone.utc) @@ -2543,7 +2543,7 @@ def test_metadata(self, project, job, operation_info): assert job_op_metadata["name"] == operation_name @git_mark_skipif - def test_strict_git(self, project): + def test_strict_git(self, project, job, operation_info): pass From e7fd3f88274fe269c75f6ba09fc26ebe691846f6 Mon Sep 17 00:00:00 2001 From: Kelly Wang Date: Thu, 13 Apr 2023 12:36:56 -0400 Subject: [PATCH 14/70] Started strict git tests. --- tests/test_project.py | 45 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/tests/test_project.py b/tests/test_project.py index 188c997e3..33a12779c 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -2469,6 +2469,18 @@ class TestHooksTrackOperations(TestHooksSetUp): def strict_git_false_operation_info(self, request): return request.param + @pytest.fixture( + params=[ + ( + "strict_git_true", + define_hooks_track_operations_project.HOOKS_ERROR_MESSAGE, + ), + ("strict_git_true_cmd", "non-zero exit status 42"), + ] + ) + def strict_git_true_operation_info(self, request): + return request.param + def split_log(self, job): with open(job.fn(self.log_fname)) as f: values = f.read().split("\n") @@ -2542,9 +2554,38 @@ def test_metadata(self, project, job, strict_git_false_operation_info): assert job_op_metadata["job_id"] == job.id assert job_op_metadata["name"] == operation_name + def git_repo(self, project, make_dirty=False): + repo = git.Repo.init(project.path) + if make_dirty: + with open(project.fn("test.txt"), "w") as f: + pass + + @git_mark_skipif - def test_strict_git(self, project, job, operation_info): - pass + def test_strict_git_not_dirty(self, project, job, strict_git_true_operation_info): + operation_name, error_message = strict_git_true_operation_info + assert not job.isfile(self.log_fname) + + if job.sp.raise_exception: + with pytest.raises(subprocess.CalledProcessError): + self.call_subcmd(f"run -o {operation_name} -j {job.id}") + else: + self.call_subcmd(f"run -o {operation_name} -j {job.id}") + + @git_mark_skipif + def test_strict_git_is_dirty(self, project, job, strict_git_true_operation_info): + operation_name, error_message = strict_git_true_operation_info + + breakpoint() + + # assert not job.isfile(self.log_fname) + + # with pytest.raises(RuntimeError): + # if job.sp.raise_exception: + # with pytest.raises(subprocess.CalledProcessError): + # self.call_subcmd(f"run -o {operation_name} -j {job.id}") + # else: + # self.call_subcmd(f"run -o {operation_name} -j {job.id}") class TestIgnoreConditions: From 1328b5c103e30cf5b9d72cd37d89a4662f45bb1b Mon Sep 17 00:00:00 2001 From: Kelly Wang Date: Wed, 19 Apr 2023 11:55:20 -0400 Subject: [PATCH 15/70] Updated to project.path. --- flow/hooks/git_util.py | 2 +- flow/hooks/track_operations.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/flow/hooks/git_util.py b/flow/hooks/git_util.py index a9c9fe414..350657c3e 100644 --- a/flow/hooks/git_util.py +++ b/flow/hooks/git_util.py @@ -9,7 +9,7 @@ def collect_metadata_with_git(operation, job): """TODO.""" - repo = git.Repo(job._project.root_directory()) + repo = git.Repo(job._project.path) metadata = collect_metadata(operation, job) metadata["project"]["git"] = { "commit_id": str(repo.commit()), diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index a0f60b2e3..ec51b6b2f 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -37,7 +37,7 @@ def log_operation(self, stage): def _log_operation(operation, job, error=None): if self.strict_git: - if git.Repo(job._project.root_directory()).is_dirty(): + if git.Repo(job._project.path).is_dirty(): raise RuntimeError( "Unable to reliably log operation, because the git repository in " "the project root directory is dirty.\n\nMake sure to commit all " From 16cdcbd24b6d6e4054e9f3a75494f459da702470 Mon Sep 17 00:00:00 2001 From: Kelly Wang Date: Wed, 19 Apr 2023 11:55:55 -0400 Subject: [PATCH 16/70] Implemented git dirty tests. --- tests/test_project.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/tests/test_project.py b/tests/test_project.py index 3d8576f7f..9a66bb5af 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -2568,14 +2568,21 @@ def test_metadata(self, project, job, strict_git_false_operation_info): def git_repo(self, project, make_dirty=False): repo = git.Repo.init(project.path) + with open(project.fn("test.txt"), "w") as f: + pass + repo.index.add(["test.txt"]) + repo.index.commit("Initial commit") if make_dirty: - with open(project.fn("test.txt"), "w") as f: + with open(project.fn("dirty.txt"), "w") as f: pass + return repo @git_mark_skipif def test_strict_git_not_dirty(self, project, job, strict_git_true_operation_info): operation_name, error_message = strict_git_true_operation_info + repo = self.git_repo(project, False) + assert not job.isfile(self.log_fname) if job.sp.raise_exception: @@ -2587,17 +2594,13 @@ def test_strict_git_not_dirty(self, project, job, strict_git_true_operation_info @git_mark_skipif def test_strict_git_is_dirty(self, project, job, strict_git_true_operation_info): operation_name, error_message = strict_git_true_operation_info + assert not job.isfile(self.log_fname) - breakpoint() - - # assert not job.isfile(self.log_fname) - - # with pytest.raises(RuntimeError): - # if job.sp.raise_exception: - # with pytest.raises(subprocess.CalledProcessError): - # self.call_subcmd(f"run -o {operation_name} -j {job.id}") - # else: - # self.call_subcmd(f"run -o {operation_name} -j {job.id}") + with pytest.raises(subprocess.CalledProcessError): + if job.sp.raise_exception: + self.call_subcmd(f"run -o {operation_name} -j {job.id}") + else: + self.call_subcmd(f"run -o {operation_name} -j {job.id}") class TestIgnoreConditions: From 36dc2285f566d32c7211b5f160c1ea20f87b274d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 19 Apr 2023 16:23:36 +0000 Subject: [PATCH 17/70] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/define_hooks_track_operations_project.py | 3 ++- tests/test_project.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/define_hooks_track_operations_project.py b/tests/define_hooks_track_operations_project.py index a52e77fa7..8538cb52b 100644 --- a/tests/define_hooks_track_operations_project.py +++ b/tests/define_hooks_track_operations_project.py @@ -67,6 +67,7 @@ def strict_git_false_install_cmd(job): try: + @_HooksTrackOperations.operation_hooks.on_start( TrackOperations(strict_git=True).on_start ) @@ -81,7 +82,6 @@ def strict_git_true(job): if job.sp.raise_exception: raise RuntimeError(HOOKS_ERROR_MESSAGE) - @_HooksTrackOperations.operation_hooks.on_start( TrackOperations(strict_git=True).on_start ) @@ -97,6 +97,7 @@ def strict_git_true_cmd(job): return "exit 42" else: return "touch base_cmd.txt" + except RuntimeError: pass diff --git a/tests/test_project.py b/tests/test_project.py index 9a66bb5af..7a61dceef 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -42,6 +42,7 @@ try: import git + skip_git = False except ImportError: skip_git = True @@ -2577,7 +2578,6 @@ def git_repo(self, project, make_dirty=False): pass return repo - @git_mark_skipif def test_strict_git_not_dirty(self, project, job, strict_git_true_operation_info): operation_name, error_message = strict_git_true_operation_info From ec374dec3e9daf0f16c535b1f24174510e2ca56e Mon Sep 17 00:00:00 2001 From: Kelly Wang Date: Wed, 19 Apr 2023 12:29:51 -0400 Subject: [PATCH 18/70] Updated changelog. --- changelog.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/changelog.txt b/changelog.txt index 1657aefd3..8d93a056d 100644 --- a/changelog.txt +++ b/changelog.txt @@ -39,6 +39,7 @@ Added +++++ - Added the OLCF Crusher environment (#708). +- Added `TrackOperations` as a builtin hook (#739). Changed +++++++ From 362f1d8e8fcade1f7eb2bca3afde5cc264455a3c Mon Sep 17 00:00:00 2001 From: Melody Zhang <114299909+melodyyzh@users.noreply.github.com> Date: Fri, 2 Jun 2023 09:47:00 -0400 Subject: [PATCH 19/70] Update changelog.txt Co-authored-by: Bradley Dice --- changelog.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.txt b/changelog.txt index 8d93a056d..c9448a351 100644 --- a/changelog.txt +++ b/changelog.txt @@ -39,7 +39,7 @@ Added +++++ - Added the OLCF Crusher environment (#708). -- Added `TrackOperations` as a builtin hook (#739). +- Added `TrackOperations` as a built in hook (#739). Changed +++++++ From c6026059e29141dcba48e8b9d44886443f0d6838 Mon Sep 17 00:00:00 2001 From: Melody Zhang <114299909+melodyyzh@users.noreply.github.com> Date: Fri, 2 Jun 2023 09:47:14 -0400 Subject: [PATCH 20/70] Update flow/hooks/git_util.py Co-authored-by: Bradley Dice --- flow/hooks/git_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flow/hooks/git_util.py b/flow/hooks/git_util.py index 350657c3e..833841c23 100644 --- a/flow/hooks/git_util.py +++ b/flow/hooks/git_util.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018 The Regents of the University of Michigan +# Copyright (c) 2023 The Regents of the University of Michigan # All rights reserved. # This software is licensed under the BSD 3-Clause License. """TO DO.""" From e65215d1e3678d84451f340bc49f40052c9dc932 Mon Sep 17 00:00:00 2001 From: Melody Zhang Date: Sun, 18 Jun 2023 22:09:46 -0400 Subject: [PATCH 21/70] added doc strings and removed unused variables in test_project.py --- flow/hooks/git_util.py | 8 ++- flow/hooks/git_workspace_tracking.py | 8 +-- flow/hooks/track_operations.py | 86 +++++++++++++++++++++++++--- flow/hooks/util.py | 6 +- tests/test_project.py | 6 +- 5 files changed, 95 insertions(+), 19 deletions(-) diff --git a/flow/hooks/git_util.py b/flow/hooks/git_util.py index 833841c23..bed620436 100644 --- a/flow/hooks/git_util.py +++ b/flow/hooks/git_util.py @@ -1,14 +1,18 @@ # Copyright (c) 2023 The Regents of the University of Michigan # All rights reserved. # This software is licensed under the BSD 3-Clause License. -"""TO DO.""" +"""Define a function to collect metadata with git.""" import git from .util import collect_metadata def collect_metadata_with_git(operation, job): - """TODO.""" + """Collect metadata for a given operation on a job, including git-related information. + + The git-related information includes the commit ID and a flag indicating if the + repository is dirty (has uncommitted changes). + """ repo = git.Repo(job._project.path) metadata = collect_metadata(operation, job) metadata["project"]["git"] = { diff --git a/flow/hooks/git_workspace_tracking.py b/flow/hooks/git_workspace_tracking.py index 4fc90e7ca..2ecc62376 100644 --- a/flow/hooks/git_workspace_tracking.py +++ b/flow/hooks/git_workspace_tracking.py @@ -1,7 +1,7 @@ # Copyright (c) 2018 The Regents of the University of Michigan # All rights reserved. # This software is licensed under the BSD 3-Clause License. -"""TO DO.""" +"""Define functions to track the workspace state using git.""" import json import logging import os @@ -71,7 +71,7 @@ def _get_repo(self, operation): return git.Repo.init(root) def commit_before(self, operation): - """TODO.""" + """Commit changes before executing the operation.""" metadata = collect_metadata(operation) metadata["stage"] = "prior" repo = self._get_repo(operation) @@ -79,7 +79,7 @@ def commit_before(self, operation): _commit(repo, f"Before executing operation {operation}.") def commit_after(self, operation, error=None): - """TODO.""" + """Commit changes after executing the operation.""" metadata = collect_metadata(operation) metadata["stage"] = "post" metadata["error"] = None if error is None else str(error) @@ -96,7 +96,7 @@ def commit_after(self, operation, error=None): repo.git.notes("append", repo.commit(), "-m", "signac:" + json.dumps(metadata)) def install_hooks(self, project): - """TODO.""" + """Install hooks before and after operations.""" project.hooks.on_start.append(self.commit_before) project.hooks.on_success.append(self.commit_after) project.hooks.on_fail.append(self.commit_after) diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index ec51b6b2f..ab0de7552 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -1,7 +1,7 @@ # Copyright (c) 2018 The Regents of the University of Michigan # All rights reserved. # This software is licensed under the BSD 3-Clause License. -"""TO DO.""" +"""Built in execution hook for basic tracking.""" import json try: @@ -20,7 +20,71 @@ class TrackOperations: - """TO DO.""" + """:class:`~.TrackOperations` tracks information about the execution of operations to a logfile. + + This hooks provides information, optionally, on the start, successful completion, and/or + erroring of one or more operations in a `flow.FlowProject` instance. The logs are stored in a + file given by the parameter ``fn_logfile``. This file will be appended to if it already exists. + The default formating for the log provides the [time, job id, log level, and log message]. + .. note:: + All tracking is performed at the INFO level. To ensure outputs are captured in log files, + use the `--debug` flag when running or submitting jobs, or specify + `submit_options=--debug` in your directives (example shown below). + + Examples + -------- + The following example will install :class:`~.TrackOperations` at the operation level. + Where the log will be stored in a file name `foo.log` in the job workspace. + + .. code-block:: python + from flow import FlowProject + from flow.hooks import TrackOperations + + + class Project(FlowProject): + pass + + + def install_operation_track_hook(operation_name, project_cls): + track = TrackOperation(f"{operation_name}.log") + return lambda op: track.install_operation_hooks(op, project_cls) + + + @install_operation_log_hook("foo", Project) + @Project.operation(directives={ + "submit_options": "--debug" # Always submit operation foo with the --debug flag + }) + def foo(job): + pass + + + The code block below provides an example of how install :class:`~.TrackOperations` to a + instance of :class:`~.FlowProject` + + .. code-block:: python + from flow import FlowProject + from flow.hooks import TrackOperations # Import build + + + class Project(FlowProject): + pass + + + # Do something + + + if __name__ == "__main__": + project = Project() + project = TrackOperations().install_project_hooks(project) + project.main() + + + + Parameters + ---------- + fn_logfile: log filename + The name of the log file in the job workspace. Default is "execution-record.log". + """ def __init__(self, strict_git=True): if strict_git and not GIT: @@ -33,7 +97,7 @@ def __init__(self, strict_git=True): self.strict_git = strict_git def log_operation(self, stage): - """TO DO.""" + """Define log_operation to collect metadata of job workspace and write to logfiles.""" def _log_operation(operation, job, error=None): if self.strict_git: @@ -59,19 +123,19 @@ def _log_operation(operation, job, error=None): return _log_operation def on_start(self, operation, job): - """TO DO.""" + """Track the start of execution of a given job(s) operation pair.""" self.log_operation(stage="prior")(operation, job) def on_success(self, operation, job): - """TO DO.""" + """Track the successful completion of a given job(s) operation pair.""" self.log_operation(stage="after")(operation, job) def on_exception(self, operation, error, job): - """TO DO.""" + """Log errors raised during the execution of a specific job(s) operation pair.""" self.log_operation(stage="after")(operation, job, error) def install_operation_hooks(self, op, project_cls=None): - """TO DO. + """Decorate operation to install track operation to one operation in a Signac-flow project. Parameters ---------- @@ -89,7 +153,13 @@ def install_operation_hooks(self, op, project_cls=None): return op def install_project_hooks(self, project): - """TO DO.""" + """Install track operation to all operations in a Signac-flow project. + + Parameters + ---------- + project : flow.FlowProject + THe project to install project wide hooks. + """ project.project_hooks.on_start.append(self.on_start) project.project_hooks.on_success.append(self.on_success) project.project_hooks.on_exception.append(self.on_exception) diff --git a/flow/hooks/util.py b/flow/hooks/util.py index 3c0291bb4..5ddc492db 100644 --- a/flow/hooks/util.py +++ b/flow/hooks/util.py @@ -1,12 +1,14 @@ # Copyright (c) 2018 The Regents of the University of Michigan # All rights reserved. # This software is licensed under the BSD 3-Clause License. -"""TO DO.""" +"""Define a function to collect metadata on the operation and job.""" from datetime import datetime, timezone def collect_metadata(operation, job): - """TODO. + """Collect metadata related to the operation and job. + + Returns a directory including schema version, time, project, and job-operation. We can no longer track the following because we take in the operation name as a string diff --git a/tests/test_project.py b/tests/test_project.py index 7a61dceef..1e8bde758 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -2569,19 +2569,19 @@ def test_metadata(self, project, job, strict_git_false_operation_info): def git_repo(self, project, make_dirty=False): repo = git.Repo.init(project.path) - with open(project.fn("test.txt"), "w") as f: + with open(project.fn("test.txt"), "w"): pass repo.index.add(["test.txt"]) repo.index.commit("Initial commit") if make_dirty: - with open(project.fn("dirty.txt"), "w") as f: + with open(project.fn("dirty.txt"), "w"): pass return repo @git_mark_skipif def test_strict_git_not_dirty(self, project, job, strict_git_true_operation_info): operation_name, error_message = strict_git_true_operation_info - repo = self.git_repo(project, False) + # repo = self.git_repo(project, False) assert not job.isfile(self.log_fname) From fc617e02fef1a5253c1e0451c3bbbfb2eaf76659 Mon Sep 17 00:00:00 2001 From: Melody Zhang Date: Thu, 22 Jun 2023 14:41:13 -0400 Subject: [PATCH 22/70] update tests to avoid issue with python 3.8 --- tests/define_hooks_track_operations_project.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/define_hooks_track_operations_project.py b/tests/define_hooks_track_operations_project.py index 8538cb52b..338326fae 100644 --- a/tests/define_hooks_track_operations_project.py +++ b/tests/define_hooks_track_operations_project.py @@ -8,6 +8,9 @@ class _HooksTrackOperations(FlowProject): pass +track_operations = TrackOperations(strict_git=False) + + """ Strict Git False """ @@ -45,14 +48,14 @@ def strict_git_false_cmd(job): return "touch base_cmd.txt" -@TrackOperations(strict_git=False).install_operation_hooks(_HooksTrackOperations) +@track_operations.install_operation_hooks(_HooksTrackOperations) @_HooksTrackOperations.operation def strict_git_false_install(job): if job.sp.raise_exception: raise RuntimeError(HOOKS_ERROR_MESSAGE) -@TrackOperations(strict_git=False).install_operation_hooks(_HooksTrackOperations) +@track_operations.install_operation_hooks(_HooksTrackOperations) @_HooksTrackOperations.operation(cmd=True, with_job=True) def strict_git_false_install_cmd(job): if job.sp.raise_exception: From 8f87ffb10eed69ce1f7b31a4f2f91eb00f134f54 Mon Sep 17 00:00:00 2001 From: Melody Zhang Date: Sun, 25 Jun 2023 13:25:04 -0400 Subject: [PATCH 23/70] made additional edits on the docstrings --- flow/hooks/__init__.py | 2 +- flow/hooks/git_workspace_tracking.py | 2 +- flow/hooks/track_operations.py | 22 +++++++++------------- flow/hooks/util.py | 2 +- tests/test_project.py | 2 +- 5 files changed, 13 insertions(+), 17 deletions(-) diff --git a/flow/hooks/__init__.py b/flow/hooks/__init__.py index 99fafac5a..9458d33ce 100644 --- a/flow/hooks/__init__.py +++ b/flow/hooks/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018 The Regents of the University of Michigan +# Copyright (c) 2023 The Regents of the University of Michigan # All rights reserved. # This software is licensed under the BSD 3-Clause License. """Operation hooks.""" diff --git a/flow/hooks/git_workspace_tracking.py b/flow/hooks/git_workspace_tracking.py index 2ecc62376..7d1d48c71 100644 --- a/flow/hooks/git_workspace_tracking.py +++ b/flow/hooks/git_workspace_tracking.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018 The Regents of the University of Michigan +# Copyright (c) 2023 The Regents of the University of Michigan # All rights reserved. # This software is licensed under the BSD 3-Clause License. """Define functions to track the workspace state using git.""" diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index ab0de7552..eba7f1c20 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018 The Regents of the University of Michigan +# Copyright (c) 2023 The Regents of the University of Michigan # All rights reserved. # This software is licensed under the BSD 3-Clause License. """Built in execution hook for basic tracking.""" @@ -22,7 +22,7 @@ class TrackOperations: """:class:`~.TrackOperations` tracks information about the execution of operations to a logfile. - This hooks provides information, optionally, on the start, successful completion, and/or + This hooks can provides information on the start, successful completion, and/or erroring of one or more operations in a `flow.FlowProject` instance. The logs are stored in a file given by the parameter ``fn_logfile``. This file will be appended to if it already exists. The default formating for the log provides the [time, job id, log level, and log message]. @@ -58,7 +58,7 @@ def foo(job): pass - The code block below provides an example of how install :class:`~.TrackOperations` to a + The code block below provides an example of how install :class:`~.TrackOperations` to a instance of :class:`~.FlowProject` .. code-block:: python @@ -70,19 +70,15 @@ class Project(FlowProject): pass - # Do something - - if __name__ == "__main__": project = Project() project = TrackOperations().install_project_hooks(project) project.main() - Parameters ---------- - fn_logfile: log filename + fn_logfile: str The name of the log file in the job workspace. Default is "execution-record.log". """ @@ -123,19 +119,19 @@ def _log_operation(operation, job, error=None): return _log_operation def on_start(self, operation, job): - """Track the start of execution of a given job(s) operation pair.""" + """Track the start of execution of an operation on a job.""" self.log_operation(stage="prior")(operation, job) def on_success(self, operation, job): - """Track the successful completion of a given job(s) operation pair.""" + """Track the successful completion of an operation on a job.""" self.log_operation(stage="after")(operation, job) def on_exception(self, operation, error, job): - """Log errors raised during the execution of a specific job(s) operation pair.""" + """Log errors raised during the execution of an operation on a job.""" self.log_operation(stage="after")(operation, job, error) def install_operation_hooks(self, op, project_cls=None): - """Decorate operation to install track operation to one operation in a Signac-flow project. + """Decorate operation to track execution. Parameters ---------- @@ -158,7 +154,7 @@ def install_project_hooks(self, project): Parameters ---------- project : flow.FlowProject - THe project to install project wide hooks. + The project to install hooks on. """ project.project_hooks.on_start.append(self.on_start) project.project_hooks.on_success.append(self.on_success) diff --git a/flow/hooks/util.py b/flow/hooks/util.py index 5ddc492db..aafd510f7 100644 --- a/flow/hooks/util.py +++ b/flow/hooks/util.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018 The Regents of the University of Michigan +# Copyright (c) 2023 The Regents of the University of Michigan # All rights reserved. # This software is licensed under the BSD 3-Clause License. """Define a function to collect metadata on the operation and job.""" diff --git a/tests/test_project.py b/tests/test_project.py index 10a9e3466..9fded8c5f 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018 The Regents of the University of Michigan +# Copyright (c) 2023 The Regents of the University of Michigan # All rights reserved. # This software is licensed under the BSD 3-Clause License. import datetime From fc8d02488188d8ab270bcf2fe478669abc0dcf3b Mon Sep 17 00:00:00 2001 From: Melody Zhang <114299909+melodyyzh@users.noreply.github.com> Date: Tue, 4 Jul 2023 19:48:34 -0700 Subject: [PATCH 24/70] Update flow/hooks/git_util.py Co-authored-by: Bradley Dice --- flow/hooks/git_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flow/hooks/git_util.py b/flow/hooks/git_util.py index bed620436..c9df6c8b3 100644 --- a/flow/hooks/git_util.py +++ b/flow/hooks/git_util.py @@ -13,7 +13,7 @@ def collect_metadata_with_git(operation, job): The git-related information includes the commit ID and a flag indicating if the repository is dirty (has uncommitted changes). """ - repo = git.Repo(job._project.path) + repo = git.Repo(job.project.path) metadata = collect_metadata(operation, job) metadata["project"]["git"] = { "commit_id": str(repo.commit()), From acde5327da54c8b9b09862ca968f86fbc38fda5d Mon Sep 17 00:00:00 2001 From: Melody Zhang <114299909+melodyyzh@users.noreply.github.com> Date: Tue, 4 Jul 2023 19:54:38 -0700 Subject: [PATCH 25/70] Update flow/hooks/track_operations.py Co-authored-by: Bradley Dice --- flow/hooks/track_operations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index eba7f1c20..792723ea7 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -149,7 +149,7 @@ def install_operation_hooks(self, op, project_cls=None): return op def install_project_hooks(self, project): - """Install track operation to all operations in a Signac-flow project. + """Install track operation to all operations in a signac-flow project. Parameters ---------- From 58cc823d751905ce0bedd9690f91b5e67f0a5959 Mon Sep 17 00:00:00 2001 From: Kelly Wang Date: Wed, 5 Jul 2023 15:04:21 -0400 Subject: [PATCH 26/70] Fixed import issue. --- flow/hooks/track_operations.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index 792723ea7..d5f622e3c 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -4,14 +4,14 @@ """Built in execution hook for basic tracking.""" import json +from .util import collect_metadata + try: import git except ImportError: - from .util import collect_metadata - GIT = False else: - from .git_util import collect_metadata_with_git as collect_metadata + from .git_util import collect_metadata_with_git GIT = True @@ -105,7 +105,10 @@ def _log_operation(operation, job, error=None): type(self).__name__ ) ) - metadata = collect_metadata(operation, job) + if self.strict_git: + metadata = collect_metadata_with_git(operation, job) + else: + metadata = collect_metadata(operation, job) # Add additional execution-related information to metadata. metadata["stage"] = stage From 057e0e8292886d794579a4f7ebd38a7be5096e90 Mon Sep 17 00:00:00 2001 From: Kelly Wang Date: Wed, 5 Jul 2023 15:12:24 -0400 Subject: [PATCH 27/70] Initialize repository for git test. --- tests/test_project.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_project.py b/tests/test_project.py index 9fded8c5f..6c708b604 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -2645,7 +2645,7 @@ def git_repo(self, project, make_dirty=False): @git_mark_skipif def test_strict_git_not_dirty(self, project, job, strict_git_true_operation_info): operation_name, error_message = strict_git_true_operation_info - # repo = self.git_repo(project, False) + self.git_repo(project, False) assert not job.isfile(self.log_fname) From 4883020f88981c4d51eff26ff4c12a6a24580586 Mon Sep 17 00:00:00 2001 From: Kelly Wang Date: Wed, 5 Jul 2023 15:19:54 -0400 Subject: [PATCH 28/70] Test if git repository is dirty. --- tests/test_project.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_project.py b/tests/test_project.py index 6c708b604..5d892e12d 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -2640,6 +2640,7 @@ def git_repo(self, project, make_dirty=False): if make_dirty: with open(project.fn("dirty.txt"), "w"): pass + repo.index.add(["dirty.txt"]) return repo @git_mark_skipif @@ -2658,6 +2659,8 @@ def test_strict_git_not_dirty(self, project, job, strict_git_true_operation_info @git_mark_skipif def test_strict_git_is_dirty(self, project, job, strict_git_true_operation_info): operation_name, error_message = strict_git_true_operation_info + self.git_repo(project, True) + assert not job.isfile(self.log_fname) with pytest.raises(subprocess.CalledProcessError): From 252553cfc02e19c141d3df7246cb40bf7570fe3c Mon Sep 17 00:00:00 2001 From: Kelly Wang Date: Wed, 5 Jul 2023 15:26:14 -0400 Subject: [PATCH 29/70] Removed old unused file. --- flow/hooks/git_workspace_tracking.py | 105 --------------------------- 1 file changed, 105 deletions(-) delete mode 100644 flow/hooks/git_workspace_tracking.py diff --git a/flow/hooks/git_workspace_tracking.py b/flow/hooks/git_workspace_tracking.py deleted file mode 100644 index 7d1d48c71..000000000 --- a/flow/hooks/git_workspace_tracking.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright (c) 2023 The Regents of the University of Michigan -# All rights reserved. -# This software is licensed under the BSD 3-Clause License. -"""Define functions to track the workspace state using git.""" -import json -import logging -import os -from collections import defaultdict - -import git - -from .git_util import collect_metadata_with_git as collect_metadata - -logger = logging.getLogger("git_tracking") - -GIT_COMMIT_MSG = """{title} - -*This commit was auto-generated.* -""" - - -_WARNINGS = defaultdict(set) - - -def _git_ignore(root, entries): - """Do not commit hidden files to the git repository.""" - fn_ignore = os.path.join(root, ".gitignore") - with open(fn_ignore, mode="a+") as file: - lines = file.readlines() - for entry in entries: - if entry not in lines: - logger.info(f"Adding '{entry}' to '{fn_ignore}'.") - file.write(entry + "\n") - - -def _git_ignored(root): - fn_ignore = os.path.join(root, ".gitignore") - with open(fn_ignore) as file: - return file.readlines() - - -def _commit(repo, title): - try: - repo.git.commit("-m", GIT_COMMIT_MSG.format(title=title)) - except git.exc.GitCommandError as error: - if "nothing to commit, working tree clean" in str(error): - pass - else: - raise - - -class GitWorkspace: - """Track the workspace state with git.""" - - def __init__(self, jointly=True, ignore=None): - self._jointly = jointly - self._ignore = ignore - - def _get_repo(self, operation): - if self._jointly: - root = operation.job._project.workspace() - else: - root = operation.job.workspace() - - try: - return git.Repo(root) - except git.exc.InvalidGitRepositoryError: - logger.info(f"Initializing git repo for '{root}'") - if self._ignore: - _git_ignore(root, self._ignore) - return git.Repo.init(root) - - def commit_before(self, operation): - """Commit changes before executing the operation.""" - metadata = collect_metadata(operation) - metadata["stage"] = "prior" - repo = self._get_repo(operation) - repo.git.add(A=True) - _commit(repo, f"Before executing operation {operation}.") - - def commit_after(self, operation, error=None): - """Commit changes after executing the operation.""" - metadata = collect_metadata(operation) - metadata["stage"] = "post" - metadata["error"] = None if error is None else str(error) - repo = self._get_repo(operation) - repo.git.add(A=True) - if error: - _commit( - repo, - "Executed operation {}.\n\nThe execution failed " - "with error '{}'.".format(operation, error), - ) - else: - _commit(repo, f"Executed operation {operation}.") - repo.git.notes("append", repo.commit(), "-m", "signac:" + json.dumps(metadata)) - - def install_hooks(self, project): - """Install hooks before and after operations.""" - project.hooks.on_start.append(self.commit_before) - project.hooks.on_success.append(self.commit_after) - project.hooks.on_fail.append(self.commit_after) - return project - - __call__ = install_hooks From 29e1dd7908dca0d25f21aac152b57c893eec1d09 Mon Sep 17 00:00:00 2001 From: Kelly Wang Date: Wed, 5 Jul 2023 15:33:45 -0400 Subject: [PATCH 30/70] Use project instead of _project and removed unneeded old string. --- flow/hooks/track_operations.py | 3 +-- flow/hooks/util.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index d5f622e3c..0a8810811 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -97,7 +97,7 @@ def log_operation(self, stage): def _log_operation(operation, job, error=None): if self.strict_git: - if git.Repo(job._project.path).is_dirty(): + if git.Repo(job.project.path).is_dirty(): raise RuntimeError( "Unable to reliably log operation, because the git repository in " "the project root directory is dirty.\n\nMake sure to commit all " @@ -116,7 +116,6 @@ def _log_operation(operation, job, error=None): # Write metadata to collection inside job workspace. with open(job.fn(FN_LOGFILE), "a") as logfile: - # HELP: Is there any reason we used Collections instead of a regular dictionary? logfile.write(json.dumps(metadata) + "\n") return _log_operation diff --git a/flow/hooks/util.py b/flow/hooks/util.py index aafd510f7..ad900fa8b 100644 --- a/flow/hooks/util.py +++ b/flow/hooks/util.py @@ -26,9 +26,9 @@ def collect_metadata(operation, job): "_schema_version": "1", "time": datetime.now(timezone.utc).isoformat(), "project": { - "path": job._project.path, + "path": job.project.path, # the project schema version: - "schema_version": job._project.config.get("schema_version"), + "schema_version": job.project.config.get("schema_version"), }, "job-operation": { "name": operation, From ac439fc666c48e0e6406059d210f7d54a19adf63 Mon Sep 17 00:00:00 2001 From: Kelly Wang Date: Wed, 5 Jul 2023 15:35:04 -0400 Subject: [PATCH 31/70] Removed __call__ --- flow/hooks/track_operations.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index 0a8810811..6d29e1cdf 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -162,5 +162,3 @@ def install_project_hooks(self, project): project.project_hooks.on_success.append(self.on_success) project.project_hooks.on_exception.append(self.on_exception) return project - - __call__ = install_project_hooks From 7658eed2b53ea5a71fbb74d74b2d0e06c7958326 Mon Sep 17 00:00:00 2001 From: Kelly Wang Date: Wed, 5 Jul 2023 15:38:59 -0400 Subject: [PATCH 32/70] Updated docstring and removed more instances of _project. --- tests/test_project.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/test_project.py b/tests/test_project.py index 5d892e12d..71d665b7b 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -2587,15 +2587,15 @@ def test_metadata(self, project, job, strict_git_false_operation_info): for value in values[:2]: metadata = json.loads(value) assert metadata["stage"] == "prior" - # I think job._project.path gives us a relative path in the test, while - # job._project.path in hooks.utils.collect_metadata gives us the absolute path + # I think job.project.path gives us a relative path in the test, while + # job.project.path in hooks.utils.collect_metadata gives us the absolute path # I don't know why this is happening. - assert job._project.path in metadata["project"]["path"] - assert metadata["project"]["schema_version"] == job._project.config.get( + assert job.project.path in metadata["project"]["path"] + assert metadata["project"]["schema_version"] == job.project.config.get( "schema_version" ) # Just assumed that the operation should start within 5 minutes - # from where we recorded the time line 2477. This seems generous and + # from where we start recorded the time. This seems generous and # we can adjust it if needed. start_time = datetime.datetime.fromisoformat(metadata["time"]) difference = start_time - time @@ -2607,15 +2607,15 @@ def test_metadata(self, project, job, strict_git_false_operation_info): for value in values[2:]: metadata = json.loads(value) assert metadata["stage"] == "after" - # I think job._project.path gives us a relative path in the test, while - # job._project.path in hooks.utils.collect_metadata gives us the absolute path + # I think job.project.path gives us a relative path in the test, while + # job.project.path in hooks.utils.collect_metadata gives us the absolute path # I don't know why this is happening. - assert job._project.path in metadata["project"]["path"] - assert metadata["project"]["schema_version"] == job._project.config.get( + assert job.project.path in metadata["project"]["path"] + assert metadata["project"]["schema_version"] == job.project.config.get( "schema_version" ) # Just assumed that the operation should start within 5 minutes - # from where we recorded the time line 2477. This seems generous and + # from where we start recorded the time. This seems generous and # we can adjust it if needed. start_time = datetime.datetime.fromisoformat(metadata["time"]) difference = start_time - time From f26410f8a277136558ba10618c7b57f39046880f Mon Sep 17 00:00:00 2001 From: Kelly Wang Date: Fri, 7 Jul 2023 13:03:54 -0400 Subject: [PATCH 33/70] Updated changelog. --- changelog.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/changelog.txt b/changelog.txt index a3be4c7d3..d5269637a 100644 --- a/changelog.txt +++ b/changelog.txt @@ -17,6 +17,7 @@ Added - ``test-workflow`` CLI option for testing template environments/submission scripts (#747). - Frontier environment and template (#743). - Added ``-o`` / ``--operation`` flag to report project status information for specific operations (#725). +- Added builtin `TrackOperations` execution hooks (#739). Changed +++++++ From 63a08c142a4c845572b2f578145ceb2549f60caa Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Fri, 13 Oct 2023 14:32:33 -0400 Subject: [PATCH 34/70] test: Add GitPython to test dependencies --- requirements/requirements-test.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements/requirements-test.txt b/requirements/requirements-test.txt index eff8de46d..bacc11684 100644 --- a/requirements/requirements-test.txt +++ b/requirements/requirements-test.txt @@ -1,5 +1,6 @@ click==8.1.7 coverage==7.3.1 +GitPython==3.1.37 pytest-cov==4.1.0 pytest==7.4.2 ruamel.yaml==0.17.33 From b68866cd7a1203dad5b87f0696ccac12d0d2389a Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Wed, 18 Oct 2023 15:02:35 -0400 Subject: [PATCH 35/70] doc: apply suggestions/corrections to documentation Co-authored-by: Bradley Dice --- flow/hooks/track_operations.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index 6d29e1cdf..8e39503ba 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -28,8 +28,8 @@ class TrackOperations: The default formating for the log provides the [time, job id, log level, and log message]. .. note:: All tracking is performed at the INFO level. To ensure outputs are captured in log files, - use the `--debug` flag when running or submitting jobs, or specify - `submit_options=--debug` in your directives (example shown below). + use the ``--debug`` flag when running or submitting jobs, or specify + ``submit_options=--debug`` in your directives (example shown below). Examples -------- @@ -93,7 +93,7 @@ def __init__(self, strict_git=True): self.strict_git = strict_git def log_operation(self, stage): - """Define log_operation to collect metadata of job workspace and write to logfiles.""" + """Define log_operation to collect metadata of job workspace and write to log files.""" def _log_operation(operation, job, error=None): if self.strict_git: @@ -151,7 +151,7 @@ def install_operation_hooks(self, op, project_cls=None): return op def install_project_hooks(self, project): - """Install track operation to all operations in a signac-flow project. + """Install hooks to track all operations in a `flow.FlowProject`. Parameters ---------- From 957b295bc85df4d03cb5f8bbe0f7f18d76caff21 Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Wed, 18 Oct 2023 15:03:08 -0400 Subject: [PATCH 36/70] doc: Add TrackOperations --- doc/api.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/api.rst b/doc/api.rst index 139e406a7..1c5c21295 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -160,6 +160,12 @@ Aggregation .. autofunction:: flow.get_aggregate_id +Hooks +----- + +.. autoclass:: flow.hooks.TrackOperations + :members: + Compute Environments -------------------- From af11f3fca22ac125ff83263eb22a410d4378a924 Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Wed, 18 Oct 2023 15:05:45 -0400 Subject: [PATCH 37/70] feat: Add support for storing metadata in job document. --- flow/hooks/git_util.py | 20 ++++----- flow/hooks/track_operations.py | 79 +++++++++++++++++----------------- 2 files changed, 49 insertions(+), 50 deletions(-) diff --git a/flow/hooks/git_util.py b/flow/hooks/git_util.py index c9df6c8b3..ad4ecff1c 100644 --- a/flow/hooks/git_util.py +++ b/flow/hooks/git_util.py @@ -4,19 +4,19 @@ """Define a function to collect metadata with git.""" import git -from .util import collect_metadata +def collect_git_metadata(job): + """Collect git metadata for a given workspace. -def collect_metadata_with_git(operation, job): - """Collect metadata for a given operation on a job, including git-related information. - - The git-related information includes the commit ID and a flag indicating if the + The information includes the commit ID and a flag indicating if the repository is dirty (has uncommitted changes). """ repo = git.Repo(job.project.path) - metadata = collect_metadata(operation, job) - metadata["project"]["git"] = { - "commit_id": str(repo.commit()), - "dirty": repo.is_dirty(), + return { + "project": { + "git": { + "commit_id": str(repo.commit()), + "dirty": repo.is_dirty(), + } + } } - return metadata diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index 8e39503ba..045ac43fc 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -11,14 +11,11 @@ except ImportError: GIT = False else: - from .git_util import collect_metadata_with_git + from .git_util import collect_git_metadata GIT = True -FN_LOGFILE = ".operations_log.txt" - - class TrackOperations: """:class:`~.TrackOperations` tracks information about the execution of operations to a logfile. @@ -26,10 +23,12 @@ class TrackOperations: erroring of one or more operations in a `flow.FlowProject` instance. The logs are stored in a file given by the parameter ``fn_logfile``. This file will be appended to if it already exists. The default formating for the log provides the [time, job id, log level, and log message]. - .. note:: - All tracking is performed at the INFO level. To ensure outputs are captured in log files, - use the ``--debug`` flag when running or submitting jobs, or specify - ``submit_options=--debug`` in your directives (example shown below). + + Note + ---- + All tracking is performed at the INFO level. To ensure outputs are captured in log files, + use the `--debug` flag when running or submitting jobs, or specify + `submit_options=--debug` in your directives (example shown below). Examples -------- @@ -78,11 +77,15 @@ class Project(FlowProject): Parameters ---------- - fn_logfile: str - The name of the log file in the job workspace. Default is "execution-record.log". + log_filename: str, optional + The name of the log file in the job workspace. If ``None`` store in a list in the job + document in a key labeled ``{operation_name}_history``. Defaults to ``None``. + strict_git: bool, optional + Whether to fail if ``GitPython`` cannot be imported. Defaults to ``True``. """ - def __init__(self, strict_git=True): + def __init__(self, log_filename=None, strict_git=True): + self.log_filename = log_filename if strict_git and not GIT: raise RuntimeError( "Unable to collect git metadata from the repository, " @@ -92,45 +95,41 @@ def __init__(self, strict_git=True): ) self.strict_git = strict_git - def log_operation(self, stage): - """Define log_operation to collect metadata of job workspace and write to log files.""" - - def _log_operation(operation, job, error=None): - if self.strict_git: - if git.Repo(job.project.path).is_dirty(): - raise RuntimeError( - "Unable to reliably log operation, because the git repository in " - "the project root directory is dirty.\n\nMake sure to commit all " - "changes or ignore this warning by setting '{}(strict_git=False)'.".format( - type(self).__name__ - ) + def _write_metadata(self, job, operation_name, data): + if self.log_filename is None: + job.doc.setdefault(f"{operation_name}_history", []).append(data) + return + with open(job.fn(self.log_filename), "a") as logfile: + logfile.write(json.dumps(data) + "\n") + + def _get_metadata(self, operation, job, stage, error=None): + """Define log_operation to collect metadata of job workspace and write to logfiles.""" + # Add execution-related information to metadata. + metadata = {"stage": stage, "error": None if error is None else str(error)} + if self.strict_git: + if git.Repo(job.project.path).is_dirty(): + raise RuntimeError( + "Unable to reliably log operation, because the git repository in " + "the project root directory is dirty.\n\nMake sure to commit all " + "changes or ignore this warning by setting '{}(strict_git=False)'.".format( + type(self).__name__ ) - if self.strict_git: - metadata = collect_metadata_with_git(operation, job) - else: - metadata = collect_metadata(operation, job) - - # Add additional execution-related information to metadata. - metadata["stage"] = stage - metadata["error"] = None if error is None else str(error) - - # Write metadata to collection inside job workspace. - with open(job.fn(FN_LOGFILE), "a") as logfile: - logfile.write(json.dumps(metadata) + "\n") - - return _log_operation + ) + metadata.update(collect_git_metadata(job)) + metadata.update(collect_metadata(operation, job)) + return metadata def on_start(self, operation, job): """Track the start of execution of an operation on a job.""" - self.log_operation(stage="prior")(operation, job) + self._write_metadata(self._get_metadata(operation, job, stage="prior")) def on_success(self, operation, job): """Track the successful completion of an operation on a job.""" - self.log_operation(stage="after")(operation, job) + self._write_metadata(self._get_metadata(operation, job, stage="after")) def on_exception(self, operation, error, job): """Log errors raised during the execution of an operation on a job.""" - self.log_operation(stage="after")(operation, job, error) + self._write_metadata(self._get_metadata(operation, job, error, stage="after")) def install_operation_hooks(self, op, project_cls=None): """Decorate operation to track execution. From b459955ee730f99d617c3f4bd4c81db9062be2ea Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Wed, 18 Oct 2023 15:06:43 -0400 Subject: [PATCH 38/70] doc: Refactor documentation style for collect_metadata --- flow/hooks/util.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/flow/hooks/util.py b/flow/hooks/util.py index ad900fa8b..fdb00067c 100644 --- a/flow/hooks/util.py +++ b/flow/hooks/util.py @@ -10,13 +10,11 @@ def collect_metadata(operation, job): Returns a directory including schema version, time, project, and job-operation. - We can no longer track the following - because we take in the operation name as a string - rather than as an object, but I think this is - still super useful information. + We can no longer track the following because we take in the operation name as a + xstring rather than as an object, but I think this is still super useful + information. - Should we just drop it or see if there's still some - way to access this info? + Should we just drop it or see if there's still some way to access this info? "cmd": operation.cmd, "directives": operation.directives, From 1707bf05424d3e199cbb3d1f0872f85ed0bbdbde Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Thu, 19 Oct 2023 16:48:13 -0400 Subject: [PATCH 39/70] fix: Prior feature to write to document. --- flow/hooks/track_operations.py | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index 045ac43fc..962d3f585 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -3,6 +3,7 @@ # This software is licensed under the BSD 3-Clause License. """Built in execution hook for basic tracking.""" import json +from collections.abc import Mapping from .util import collect_metadata @@ -95,12 +96,16 @@ def __init__(self, log_filename=None, strict_git=True): ) self.strict_git = strict_git - def _write_metadata(self, job, operation_name, data): + def _write_metadata(self, job, metadata): + print(f"Writing... {metadata}.", flush=True) if self.log_filename is None: - job.doc.setdefault(f"{operation_name}_history", []).append(data) + history = job.doc.setdefault("execution_history", {}) + # No need to store job id or operation name. + operation_name = metadata.pop("job-operation")["name"] + history.setdefault(operation_name, []).append(metadata) return with open(job.fn(self.log_filename), "a") as logfile: - logfile.write(json.dumps(data) + "\n") + logfile.write(json.dumps(metadata) + "\n") def _get_metadata(self, operation, job, stage, error=None): """Define log_operation to collect metadata of job workspace and write to logfiles.""" @@ -116,20 +121,33 @@ def _get_metadata(self, operation, job, stage, error=None): ) ) metadata.update(collect_git_metadata(job)) - metadata.update(collect_metadata(operation, job)) - return metadata + + def nested_update(a, b): + for k, v in b.items(): + if k not in a: + a[k] = v + continue + if isinstance(a[k], Mapping) and isinstance(b[k], Mapping): + nested_update(a[k], b[k]) + else: + a[k] = v + return a + + return nested_update(metadata, collect_metadata(operation, job)) def on_start(self, operation, job): """Track the start of execution of an operation on a job.""" - self._write_metadata(self._get_metadata(operation, job, stage="prior")) + self._write_metadata(job, self._get_metadata(operation, job, stage="prior")) def on_success(self, operation, job): """Track the successful completion of an operation on a job.""" - self._write_metadata(self._get_metadata(operation, job, stage="after")) + self._write_metadata(job, self._get_metadata(operation, job, stage="after")) def on_exception(self, operation, error, job): """Log errors raised during the execution of an operation on a job.""" - self._write_metadata(self._get_metadata(operation, job, error, stage="after")) + self._write_metadata( + job, self._get_metadata(operation, job, stage="after", error=error) + ) def install_operation_hooks(self, op, project_cls=None): """Decorate operation to track execution. From cb55e293e9dd6764c84f49dc6dc6cb728d7c2276 Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Thu, 19 Oct 2023 16:49:04 -0400 Subject: [PATCH 40/70] test: Test new TrackOperations changes/features --- .../define_hooks_track_operations_project.py | 92 ++----- tests/test_project.py | 254 +++++++++++------- 2 files changed, 177 insertions(+), 169 deletions(-) diff --git a/tests/define_hooks_track_operations_project.py b/tests/define_hooks_track_operations_project.py index 338326fae..32870926c 100644 --- a/tests/define_hooks_track_operations_project.py +++ b/tests/define_hooks_track_operations_project.py @@ -8,104 +8,52 @@ class _HooksTrackOperations(FlowProject): pass -track_operations = TrackOperations(strict_git=False) +LOG_FILENAME = "operations.log" -""" -Strict Git False -""" +track_operations = TrackOperations(strict_git=False) +track_operations_with_file = TrackOperations(LOG_FILENAME, strict_git=False) -@_HooksTrackOperations.operation_hooks.on_start( - TrackOperations(strict_git=False).on_start -) -@_HooksTrackOperations.operation_hooks.on_exception( - TrackOperations(strict_git=False).on_exception -) -@_HooksTrackOperations.operation_hooks.on_success( - TrackOperations(strict_git=False).on_success -) +@track_operations_with_file.install_operation_hooks(_HooksTrackOperations) +@track_operations.install_operation_hooks(_HooksTrackOperations) @_HooksTrackOperations.operation -def strict_git_false(job): +def base(job): if job.sp.raise_exception: raise RuntimeError(HOOKS_ERROR_MESSAGE) -@_HooksTrackOperations.operation_hooks.on_start( - TrackOperations(strict_git=False).on_start -) -@_HooksTrackOperations.operation_hooks.on_exception( - TrackOperations(strict_git=False).on_exception -) -@_HooksTrackOperations.operation_hooks.on_success( - TrackOperations(strict_git=False).on_success -) +@track_operations_with_file.install_operation_hooks(_HooksTrackOperations) +@track_operations.install_operation_hooks(_HooksTrackOperations) @_HooksTrackOperations.operation(cmd=True, with_job=True) -def strict_git_false_cmd(job): +def cmd(job): if job.sp.raise_exception: return "exit 42" else: return "touch base_cmd.txt" -@track_operations.install_operation_hooks(_HooksTrackOperations) +track_operations_strict = TrackOperations() +track_operations_strict_with_file = TrackOperations(LOG_FILENAME) + + +@track_operations_strict.install_operation_hooks(_HooksTrackOperations) +@track_operations_strict_with_file.install_operation_hooks(_HooksTrackOperations) @_HooksTrackOperations.operation -def strict_git_false_install(job): +def strict_base(job): if job.sp.raise_exception: raise RuntimeError(HOOKS_ERROR_MESSAGE) -@track_operations.install_operation_hooks(_HooksTrackOperations) +@track_operations_strict.install_operation_hooks(_HooksTrackOperations) +@track_operations_strict_with_file.install_operation_hooks(_HooksTrackOperations) @_HooksTrackOperations.operation(cmd=True, with_job=True) -def strict_git_false_install_cmd(job): +def strict_cmd(job): if job.sp.raise_exception: return "exit 42" else: return "touch base_cmd.txt" -""" -Strict Git True -""" - - -try: - - @_HooksTrackOperations.operation_hooks.on_start( - TrackOperations(strict_git=True).on_start - ) - @_HooksTrackOperations.operation_hooks.on_exception( - TrackOperations(strict_git=True).on_exception - ) - @_HooksTrackOperations.operation_hooks.on_success( - TrackOperations(strict_git=True).on_success - ) - @_HooksTrackOperations.operation - def strict_git_true(job): - if job.sp.raise_exception: - raise RuntimeError(HOOKS_ERROR_MESSAGE) - - @_HooksTrackOperations.operation_hooks.on_start( - TrackOperations(strict_git=True).on_start - ) - @_HooksTrackOperations.operation_hooks.on_exception( - TrackOperations(strict_git=True).on_exception - ) - @_HooksTrackOperations.operation_hooks.on_success( - TrackOperations(strict_git=True).on_success - ) - @_HooksTrackOperations.operation(cmd=True, with_job=True) - def strict_git_true_cmd(job): - if job.sp.raise_exception: - return "exit 42" - else: - return "touch base_cmd.txt" - -except RuntimeError: - pass - - if __name__ == "__main__": - TrackOperations(strict_git=False).install_project_hooks( - _HooksTrackOperations() - ).main() + _HooksTrackOperations().main() diff --git a/tests/test_project.py b/tests/test_project.py index 71d665b7b..a12e629cb 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -2523,7 +2523,54 @@ def test_raise_exception_in_hook_cmd(self): assert "RuntimeError" in error_output -class TestHooksTrackOperations(TestHooksSetUp): +class TestHooksTrackOperationDecorators(TestProjectBase): + def test_operation_decorators(self): + class A(FlowProject): + pass + + track = flow.hooks.TrackOperations(strict_git=False) + + @A.operation_hooks.on_exception(track.on_exception) + @A.operation_hooks.on_success(track.on_success) + @A.operation_hooks.on_start(track.on_start) + @A.operation + def op(job): + pass + + assert len(A._OPERATION_HOOK_REGISTRY[op]["on_start"]) + assert len(A._OPERATION_HOOK_REGISTRY[op]["on_success"]) + assert len(A._OPERATION_HOOK_REGISTRY[op]["on_exception"]) + + @track.install_operation_hooks(A) + @A.operation + def op2(job): + pass + + assert len(A._OPERATION_HOOK_REGISTRY[op2]["on_start"]) == 1 + assert len(A._OPERATION_HOOK_REGISTRY[op2]["on_success"]) == 1 + assert len(A._OPERATION_HOOK_REGISTRY[op2]["on_exception"]) == 1 + + def test_project_decorator(self): + class A(FlowProject): + pass + + @A.operation + def op(job): + pass + + @A.operation + def op2(job): + pass + + project = self.mock_project(A) + track = flow.hooks.TrackOperations(strict_git=False) + track.install_project_hooks(project) + assert len(project.project_hooks.on_start) == 1 + assert len(project.project_hooks.on_success) == 1 + assert len(project.project_hooks.on_exception) == 1 + + +class TestHooksTrackOperationsNotStrict(TestHooksSetUp): project_class = define_hooks_track_operations_project._HooksTrackOperations entrypoint = dict( path=os.path.realpath( @@ -2532,41 +2579,76 @@ class TestHooksTrackOperations(TestHooksSetUp): ) ) ) - log_fname = ".operations_log.txt" + log_fname = define_hooks_track_operations_project.LOG_FILENAME @pytest.fixture( params=[ - ( - "strict_git_false", - define_hooks_track_operations_project.HOOKS_ERROR_MESSAGE, - ), - ("strict_git_false_cmd", "non-zero exit status 42"), - ] + ("base", define_hooks_track_operations_project.HOOKS_ERROR_MESSAGE), + ("cmd", "non-zero exit status 42"), + ], + ids=lambda x: x[0], ) - def strict_git_false_operation_info(self, request): + def operation_info(self, request): return request.param - @pytest.fixture( - params=[ - ( - "strict_git_true", - define_hooks_track_operations_project.HOOKS_ERROR_MESSAGE, - ), - ("strict_git_true_cmd", "non-zero exit status 42"), - ] - ) - def strict_git_true_operation_info(self, request): - return request.param + @pytest.fixture + def setup(self): + """Empty setup to enable code reuse in checking strict git hooks.""" + return None - def split_log(self, job): - with open(job.fn(self.log_fname)) as f: + def split_log(self, job, fn): + with open(job.fn(fn)) as f: values = f.read().split("\n") if values[-1] == "": values = values[:-1] return values - def test_metadata(self, project, job, strict_git_false_operation_info): - operation_name, error_message = strict_git_false_operation_info + def get_log(self, job, fn=None): + if fn is None: + execution_history = job.doc["execution_history"]() + # Add back in redundent information to make check_metadata universal + for key, item in execution_history.items(): + for entry in item: + entry["job-operation"] = {"name": key, "job_id": job.id} + return execution_history + log_entries = [json.loads(entry) for entry in self.split_log(job, fn)] + execution_history = {} + for entry in log_entries: + operation_name = entry["job-operation"]["name"] + execution_history.setdefault(operation_name, []).append(entry) + return execution_history + + def check_metadata( + self, + metadata, + job, + operation_name, + error_message, + before_execution, + expected_stage, + ): + assert metadata["stage"] == expected_stage + # I think job.project.path gives us a relative path in the test, while + # job.project.path in hooks.utils.collect_metadata gives us the absolute path + # I don't know why this is happening. + assert job.project.path in metadata["project"]["path"] + assert metadata["project"]["schema_version"] == job.project.config.get( + "schema_version" + ) + start_time = datetime.datetime.fromisoformat(metadata["time"]) + assert before_execution < start_time + job_op_metadata = metadata["job-operation"] + assert job_op_metadata["job_id"] == job.id + assert job_op_metadata["name"] == operation_name + + if expected_stage != "prior" and job.sp.raise_exception: + assert error_message in metadata["error"] + + else: + assert metadata["error"] is None + + def test_metadata(self, project, job, operation_info, setup): + operation_name, error_message = operation_info assert not job.isfile(self.log_fname) time = datetime.datetime.now(datetime.timezone.utc) @@ -2577,59 +2659,32 @@ def test_metadata(self, project, job, strict_git_false_operation_info): self.call_subcmd(f"run -o {operation_name} -j {job.id}") assert job.isfile(self.log_fname) - values = self.split_log(job) - - # There will always be 4 entries in this file because - # for every operation, we install the following hooks - # at the operation and project level: - # (1) on_start and (2) on_exception / on_success - assert len(values) == 4 - for value in values[:2]: - metadata = json.loads(value) - assert metadata["stage"] == "prior" - # I think job.project.path gives us a relative path in the test, while - # job.project.path in hooks.utils.collect_metadata gives us the absolute path - # I don't know why this is happening. - assert job.project.path in metadata["project"]["path"] - assert metadata["project"]["schema_version"] == job.project.config.get( - "schema_version" - ) - # Just assumed that the operation should start within 5 minutes - # from where we start recorded the time. This seems generous and - # we can adjust it if needed. - start_time = datetime.datetime.fromisoformat(metadata["time"]) - difference = start_time - time - assert difference.seconds < 5 * 60 - job_op_metadata = metadata["job-operation"] - assert job_op_metadata["job_id"] == job.id - assert job_op_metadata["name"] == operation_name - - for value in values[2:]: - metadata = json.loads(value) - assert metadata["stage"] == "after" - # I think job.project.path gives us a relative path in the test, while - # job.project.path in hooks.utils.collect_metadata gives us the absolute path - # I don't know why this is happening. - assert job.project.path in metadata["project"]["path"] - assert metadata["project"]["schema_version"] == job.project.config.get( - "schema_version" - ) - # Just assumed that the operation should start within 5 minutes - # from where we start recorded the time. This seems generous and - # we can adjust it if needed. - start_time = datetime.datetime.fromisoformat(metadata["time"]) - difference = start_time - time - assert difference.seconds < 5 * 60 - - if job.sp.raise_exception: - assert error_message in metadata["error"] + for filename in (None, self.log_fname): + metadata = self.get_log(job, filename) + # For the single operation + # Each metadata whether file or document is one since they both + # write to different places. + assert len(metadata) == 1 + for op, entries in metadata.items(): + assert len(entries) == 2 + self.check_metadata( + entries[0], job, operation_name, error_message, time, "prior" + ) + self.check_metadata( + entries[1], job, operation_name, error_message, time, "after" + ) - else: - assert metadata["error"] is None - job_op_metadata = metadata["job-operation"] - assert job_op_metadata["job_id"] == job.id - assert job_op_metadata["name"] == operation_name +class TestHooksTrackOperationsStrict(TestHooksTrackOperationsNotStrict): + @pytest.fixture( + params=[ + ("strict_base", define_hooks_track_operations_project.HOOKS_ERROR_MESSAGE), + ("strict_cmd", "non-zero exit status 42"), + ], + ids=lambda x: x[0], + ) + def operation_info(self, request): + return request.param def git_repo(self, project, make_dirty=False): repo = git.Repo.init(project.path) @@ -2641,33 +2696,38 @@ def git_repo(self, project, make_dirty=False): with open(project.fn("dirty.txt"), "w"): pass repo.index.add(["dirty.txt"]) - return repo - @git_mark_skipif - def test_strict_git_not_dirty(self, project, job, strict_git_true_operation_info): - operation_name, error_message = strict_git_true_operation_info - self.git_repo(project, False) - - assert not job.isfile(self.log_fname) - - if job.sp.raise_exception: - with pytest.raises(subprocess.CalledProcessError): - self.call_subcmd(f"run -o {operation_name} -j {job.id}") - else: - self.call_subcmd(f"run -o {operation_name} -j {job.id}") + @pytest.fixture + def setup(self, project): + self.git_repo(project) + + def check_metadata( + self, + metadata, + job, + operation_name, + error_message, + before_execution, + expected_stage, + ): + super().check_metadata( + metadata, + job, + operation_name, + error_message, + before_execution, + expected_stage, + ) + repo = git.Repo(job.project.path) + assert metadata["project"]["git"]["commit_id"] == str(repo.commit()) + assert metadata["project"]["git"]["dirty"] == repo.is_dirty() @git_mark_skipif - def test_strict_git_is_dirty(self, project, job, strict_git_true_operation_info): - operation_name, error_message = strict_git_true_operation_info + def test_strict_git_is_dirty(self, project, job, operation_info): + operation_name, error_message = operation_info self.git_repo(project, True) - - assert not job.isfile(self.log_fname) - with pytest.raises(subprocess.CalledProcessError): - if job.sp.raise_exception: - self.call_subcmd(f"run -o {operation_name} -j {job.id}") - else: - self.call_subcmd(f"run -o {operation_name} -j {job.id}") + self.call_subcmd(f"run -o {operation_name} -j {job.id}") class TestIgnoreConditions: From 9f3adfdd4fb90f9631d7fed1572cf3270708158a Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Thu, 19 Oct 2023 16:56:21 -0400 Subject: [PATCH 41/70] refactor: Simplify metadata collection logic --- flow/hooks/git_util.py | 9 +-------- flow/hooks/track_operations.py | 21 +++++---------------- 2 files changed, 6 insertions(+), 24 deletions(-) diff --git a/flow/hooks/git_util.py b/flow/hooks/git_util.py index ad4ecff1c..aa4d20b66 100644 --- a/flow/hooks/git_util.py +++ b/flow/hooks/git_util.py @@ -12,11 +12,4 @@ def collect_git_metadata(job): repository is dirty (has uncommitted changes). """ repo = git.Repo(job.project.path) - return { - "project": { - "git": { - "commit_id": str(repo.commit()), - "dirty": repo.is_dirty(), - } - } - } + return {"commit_id": str(repo.commit()), "dirty": repo.is_dirty()} diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index 962d3f585..535115ecf 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -3,7 +3,6 @@ # This software is licensed under the BSD 3-Clause License. """Built in execution hook for basic tracking.""" import json -from collections.abc import Mapping from .util import collect_metadata @@ -80,7 +79,8 @@ class Project(FlowProject): ---------- log_filename: str, optional The name of the log file in the job workspace. If ``None`` store in a list in the job - document in a key labeled ``{operation_name}_history``. Defaults to ``None``. + document in a key labeled ``f"{operation_name}"`` under the ``"execution_history"`` key. + Defaults to ``None``. strict_git: bool, optional Whether to fail if ``GitPython`` cannot be imported. Defaults to ``True``. """ @@ -111,6 +111,7 @@ def _get_metadata(self, operation, job, stage, error=None): """Define log_operation to collect metadata of job workspace and write to logfiles.""" # Add execution-related information to metadata. metadata = {"stage": stage, "error": None if error is None else str(error)} + metadata.update(collect_metadata(operation, job)) if self.strict_git: if git.Repo(job.project.path).is_dirty(): raise RuntimeError( @@ -120,20 +121,8 @@ def _get_metadata(self, operation, job, stage, error=None): type(self).__name__ ) ) - metadata.update(collect_git_metadata(job)) - - def nested_update(a, b): - for k, v in b.items(): - if k not in a: - a[k] = v - continue - if isinstance(a[k], Mapping) and isinstance(b[k], Mapping): - nested_update(a[k], b[k]) - else: - a[k] = v - return a - - return nested_update(metadata, collect_metadata(operation, job)) + metadata["project"]["git"] = collect_git_metadata(job) + return metadata def on_start(self, operation, job): """Track the start of execution of an operation on a job.""" From 6d262f07e1badb09bf20f6b7de141e13edd0ad71 Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Thu, 19 Oct 2023 17:00:25 -0400 Subject: [PATCH 42/70] doc: Update collect_metadata docstring's possible todo --- flow/hooks/util.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/flow/hooks/util.py b/flow/hooks/util.py index fdb00067c..d4f2409cd 100644 --- a/flow/hooks/util.py +++ b/flow/hooks/util.py @@ -10,14 +10,12 @@ def collect_metadata(operation, job): Returns a directory including schema version, time, project, and job-operation. - We can no longer track the following because we take in the operation name as a - xstring rather than as an object, but I think this is still super useful - information. + We can no longer track the following because we take in the operation name as a string rather + than as an object, but they provide useful information. We could try to perform introspection + of the project through the job later if desired to get these values. - Should we just drop it or see if there's still some way to access this info? - - "cmd": operation.cmd, - "directives": operation.directives, + - "cmd": operation.cmd, + - "directives": operation.directives, """ return { # the metadata schema version: From a74b05b45e9e097346847d241f6e16b7b0b4a5c0 Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Fri, 20 Oct 2023 11:44:45 -0400 Subject: [PATCH 43/70] ci: Fix oldest dependency tests without git. --- tests/test_project.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_project.py b/tests/test_project.py index a12e629cb..6d4c3d7ae 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -2718,9 +2718,10 @@ def check_metadata( before_execution, expected_stage, ) - repo = git.Repo(job.project.path) - assert metadata["project"]["git"]["commit_id"] == str(repo.commit()) - assert metadata["project"]["git"]["dirty"] == repo.is_dirty() + if not skip_git: + repo = git.Repo(job.project.path) + assert metadata["project"]["git"]["commit_id"] == str(repo.commit()) + assert metadata["project"]["git"]["dirty"] == repo.is_dirty() @git_mark_skipif def test_strict_git_is_dirty(self, project, job, operation_info): From 19962a4adb6035e658d484ad05d2f45d08591298 Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Fri, 20 Oct 2023 11:57:31 -0400 Subject: [PATCH 44/70] test: Split TrackOperation project into git_strict and not --- .../define_hooks_track_operations_project.py | 22 ----- ...e_hooks_track_operations_strict_project.py | 37 ++++++++ tests/test_project.py | 88 ++++++++++--------- 3 files changed, 83 insertions(+), 64 deletions(-) create mode 100644 tests/define_hooks_track_operations_strict_project.py diff --git a/tests/define_hooks_track_operations_project.py b/tests/define_hooks_track_operations_project.py index 32870926c..bec403fa3 100644 --- a/tests/define_hooks_track_operations_project.py +++ b/tests/define_hooks_track_operations_project.py @@ -33,27 +33,5 @@ def cmd(job): return "touch base_cmd.txt" -track_operations_strict = TrackOperations() -track_operations_strict_with_file = TrackOperations(LOG_FILENAME) - - -@track_operations_strict.install_operation_hooks(_HooksTrackOperations) -@track_operations_strict_with_file.install_operation_hooks(_HooksTrackOperations) -@_HooksTrackOperations.operation -def strict_base(job): - if job.sp.raise_exception: - raise RuntimeError(HOOKS_ERROR_MESSAGE) - - -@track_operations_strict.install_operation_hooks(_HooksTrackOperations) -@track_operations_strict_with_file.install_operation_hooks(_HooksTrackOperations) -@_HooksTrackOperations.operation(cmd=True, with_job=True) -def strict_cmd(job): - if job.sp.raise_exception: - return "exit 42" - else: - return "touch base_cmd.txt" - - if __name__ == "__main__": _HooksTrackOperations().main() diff --git a/tests/define_hooks_track_operations_strict_project.py b/tests/define_hooks_track_operations_strict_project.py new file mode 100644 index 000000000..4bd471d82 --- /dev/null +++ b/tests/define_hooks_track_operations_strict_project.py @@ -0,0 +1,37 @@ +from define_hooks_test_project import HOOKS_ERROR_MESSAGE + +from flow import FlowProject +from flow.hooks import TrackOperations + + +class _HooksTrackOperations(FlowProject): + pass + + +LOG_FILENAME = "operations.log" + + +track_operations = TrackOperations() +track_operations_with_file = TrackOperations(LOG_FILENAME) + + +@track_operations.install_operation_hooks(_HooksTrackOperations) +@track_operations_with_file.install_operation_hooks(_HooksTrackOperations) +@_HooksTrackOperations.operation +def base(job): + if job.sp.raise_exception: + raise RuntimeError(HOOKS_ERROR_MESSAGE) + + +@track_operations.install_operation_hooks(_HooksTrackOperations) +@track_operations_with_file.install_operation_hooks(_HooksTrackOperations) +@_HooksTrackOperations.operation(cmd=True, with_job=True) +def cmd(job): + if job.sp.raise_exception: + return "exit 42" + else: + return "touch base_cmd.txt" + + +if __name__ == "__main__": + _HooksTrackOperations().main() diff --git a/tests/test_project.py b/tests/test_project.py index 6d4c3d7ae..1a02f349a 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -42,6 +42,7 @@ ) try: + import define_hooks_track_operations_strict_project import git skip_git = False @@ -2675,60 +2676,63 @@ def test_metadata(self, project, job, operation_info, setup): ) -class TestHooksTrackOperationsStrict(TestHooksTrackOperationsNotStrict): - @pytest.fixture( - params=[ - ("strict_base", define_hooks_track_operations_project.HOOKS_ERROR_MESSAGE), - ("strict_cmd", "non-zero exit status 42"), - ], - ids=lambda x: x[0], - ) - def operation_info(self, request): - return request.param +if not skip_git: - def git_repo(self, project, make_dirty=False): - repo = git.Repo.init(project.path) - with open(project.fn("test.txt"), "w"): - pass - repo.index.add(["test.txt"]) - repo.index.commit("Initial commit") - if make_dirty: - with open(project.fn("dirty.txt"), "w"): - pass - repo.index.add(["dirty.txt"]) - - @pytest.fixture - def setup(self, project): - self.git_repo(project) + class TestHooksTrackOperationsStrict(TestHooksTrackOperationsNotStrict): + project_class = ( + define_hooks_track_operations_strict_project._HooksTrackOperations + ) + entrypoint = dict( + path=os.path.realpath( + os.path.join( + os.path.dirname(__file__), + "define_hooks_track_operations_strict_project.py", + ) + ) + ) + log_fname = define_hooks_track_operations_project.LOG_FILENAME - def check_metadata( - self, - metadata, - job, - operation_name, - error_message, - before_execution, - expected_stage, - ): - super().check_metadata( + def git_repo(self, project, make_dirty=False): + repo = git.Repo.init(project.path) + with open(project.fn("test.txt"), "w"): + pass + repo.index.add(["test.txt"]) + repo.index.commit("Initial commit") + if make_dirty: + with open(project.fn("dirty.txt"), "w"): + pass + repo.index.add(["dirty.txt"]) + + @pytest.fixture + def setup(self, project): + self.git_repo(project) + + def check_metadata( + self, metadata, job, operation_name, error_message, before_execution, expected_stage, - ) - if not skip_git: + ): + super().check_metadata( + metadata, + job, + operation_name, + error_message, + before_execution, + expected_stage, + ) repo = git.Repo(job.project.path) assert metadata["project"]["git"]["commit_id"] == str(repo.commit()) assert metadata["project"]["git"]["dirty"] == repo.is_dirty() - @git_mark_skipif - def test_strict_git_is_dirty(self, project, job, operation_info): - operation_name, error_message = operation_info - self.git_repo(project, True) - with pytest.raises(subprocess.CalledProcessError): - self.call_subcmd(f"run -o {operation_name} -j {job.id}") + def test_strict_git_is_dirty(self, project, job, operation_info): + operation_name, error_message = operation_info + self.git_repo(project, True) + with pytest.raises(subprocess.CalledProcessError): + self.call_subcmd(f"run -o {operation_name} -j {job.id}") class TestIgnoreConditions: From 98b326146653e5865f331cc051ea565ed915d2ca Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Mon, 23 Oct 2023 15:20:08 -0400 Subject: [PATCH 45/70] test: Actually fix test errors without git. --- tests/test_project.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_project.py b/tests/test_project.py index 1a02f349a..5b44c7f3b 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -46,7 +46,7 @@ import git skip_git = False -except ImportError: +except RuntimeError: skip_git = True From 551e45698a2d2729d2baecd3e94785c697c3911d Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Thu, 26 Oct 2023 16:03:05 -0400 Subject: [PATCH 46/70] refactor: Store git info if possible when strict_git=False --- flow/hooks/track_operations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index 535115ecf..43434faaa 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -112,8 +112,8 @@ def _get_metadata(self, operation, job, stage, error=None): # Add execution-related information to metadata. metadata = {"stage": stage, "error": None if error is None else str(error)} metadata.update(collect_metadata(operation, job)) - if self.strict_git: - if git.Repo(job.project.path).is_dirty(): + if GIT: + if self.strict_git and git.Repo(job.project.path).is_dirty(): raise RuntimeError( "Unable to reliably log operation, because the git repository in " "the project root directory is dirty.\n\nMake sure to commit all " From 19d318b32da7fca8bd867ba8c4cfa7b741614d4b Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Thu, 26 Oct 2023 17:01:37 -0400 Subject: [PATCH 47/70] refactor: change handling of erroring on dirty git --- flow/hooks/track_operations.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index 43434faaa..9822b20bf 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -7,12 +7,10 @@ from .util import collect_metadata try: - import git + from .git_util import collect_git_metadata except ImportError: GIT = False else: - from .git_util import collect_git_metadata - GIT = True @@ -113,7 +111,8 @@ def _get_metadata(self, operation, job, stage, error=None): metadata = {"stage": stage, "error": None if error is None else str(error)} metadata.update(collect_metadata(operation, job)) if GIT: - if self.strict_git and git.Repo(job.project.path).is_dirty(): + git_metadata = collect_git_metadata(job) + if self.strict_git and git_metadata["dirty"]: raise RuntimeError( "Unable to reliably log operation, because the git repository in " "the project root directory is dirty.\n\nMake sure to commit all " @@ -121,7 +120,7 @@ def _get_metadata(self, operation, job, stage, error=None): type(self).__name__ ) ) - metadata["project"]["git"] = collect_git_metadata(job) + metadata["project"]["git"] = git_metadata return metadata def on_start(self, operation, job): From 1843d91d87b8331075daa21169aa14fdd2f017f0 Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Thu, 26 Oct 2023 17:02:27 -0400 Subject: [PATCH 48/70] test: Refactor TestHooksTrackOperationsNotStrict with more options --- tests/test_project.py | 99 +++++++++++++++++++++---------------------- 1 file changed, 48 insertions(+), 51 deletions(-) diff --git a/tests/test_project.py b/tests/test_project.py index 5b44c7f3b..67bebd3e3 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -2581,6 +2581,7 @@ class TestHooksTrackOperationsNotStrict(TestHooksSetUp): ) ) log_fname = define_hooks_track_operations_project.LOG_FILENAME + error_on_no_git = False @pytest.fixture( params=[ @@ -2592,10 +2593,23 @@ class TestHooksTrackOperationsNotStrict(TestHooksSetUp): def operation_info(self, request): return request.param - @pytest.fixture - def setup(self): - """Empty setup to enable code reuse in checking strict git hooks.""" - return None + @pytest.fixture( + params=None if skip_git else (True, False), + ids=None if skip_git else ("git-dirty", "git-clean"), + ) + def git_repo(self, project, request): + if request.param is None: + return + repo = git.Repo.init(project.path) + with open(project.fn("test.txt"), "w"): + pass + repo.index.add(["test.txt"]) + repo.index.commit("Initial commit") + if request.param: + with open(project.fn("dirty.txt"), "w"): + pass + repo.index.add(["dirty.txt"]) + return repo def split_log(self, job, fn): with open(job.fn(fn)) as f: @@ -2627,6 +2641,7 @@ def check_metadata( error_message, before_execution, expected_stage, + repo, ): assert metadata["stage"] == expected_stage # I think job.project.path gives us a relative path in the test, while @@ -2648,12 +2663,24 @@ def check_metadata( else: assert metadata["error"] is None - def test_metadata(self, project, job, operation_info, setup): + if repo is not None: + assert metadata["project"]["git"]["commit_id"] == str(repo.commit()) + assert metadata["project"]["git"]["dirty"] == repo.is_dirty() + else: + if self.error_on_no_git: + # Should not happen in actual testing, here for developers. + assert False, "Expected gitpython for test." + + def test_metadata(self, project, job, operation_info, git_repo): operation_name, error_message = operation_info assert not job.isfile(self.log_fname) time = datetime.datetime.now(datetime.timezone.utc) - if job.sp.raise_exception: + if git_repo is not None and self.error_on_no_git and git_repo.is_dirty(): + with pytest.raises(subprocess.CalledProcessError): + self.call_subcmd(f"run -o {operation_name} -j {job.id}") + return + elif job.sp.raise_exception: with pytest.raises(subprocess.CalledProcessError): self.call_subcmd(f"run -o {operation_name} -j {job.id}") else: @@ -2669,10 +2696,22 @@ def test_metadata(self, project, job, operation_info, setup): for op, entries in metadata.items(): assert len(entries) == 2 self.check_metadata( - entries[0], job, operation_name, error_message, time, "prior" + entries[0], + job, + operation_name, + error_message, + time, + "prior", + git_repo, ) self.check_metadata( - entries[1], job, operation_name, error_message, time, "after" + entries[1], + job, + operation_name, + error_message, + time, + "after", + git_repo, ) @@ -2690,49 +2729,7 @@ class TestHooksTrackOperationsStrict(TestHooksTrackOperationsNotStrict): ) ) ) - log_fname = define_hooks_track_operations_project.LOG_FILENAME - - def git_repo(self, project, make_dirty=False): - repo = git.Repo.init(project.path) - with open(project.fn("test.txt"), "w"): - pass - repo.index.add(["test.txt"]) - repo.index.commit("Initial commit") - if make_dirty: - with open(project.fn("dirty.txt"), "w"): - pass - repo.index.add(["dirty.txt"]) - - @pytest.fixture - def setup(self, project): - self.git_repo(project) - - def check_metadata( - self, - metadata, - job, - operation_name, - error_message, - before_execution, - expected_stage, - ): - super().check_metadata( - metadata, - job, - operation_name, - error_message, - before_execution, - expected_stage, - ) - repo = git.Repo(job.project.path) - assert metadata["project"]["git"]["commit_id"] == str(repo.commit()) - assert metadata["project"]["git"]["dirty"] == repo.is_dirty() - - def test_strict_git_is_dirty(self, project, job, operation_info): - operation_name, error_message = operation_info - self.git_repo(project, True) - with pytest.raises(subprocess.CalledProcessError): - self.call_subcmd(f"run -o {operation_name} -j {job.id}") + error_on_no_git = True class TestIgnoreConditions: From 2ef3f0461aa07d5bcfe4c606ce46ce6a50bd1822 Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Fri, 27 Oct 2023 09:47:10 -0400 Subject: [PATCH 49/70] test: Fix condition test (make more strict). --- tests/test_project.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_project.py b/tests/test_project.py index 67bebd3e3..7d2990a53 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -2538,9 +2538,9 @@ class A(FlowProject): def op(job): pass - assert len(A._OPERATION_HOOK_REGISTRY[op]["on_start"]) - assert len(A._OPERATION_HOOK_REGISTRY[op]["on_success"]) - assert len(A._OPERATION_HOOK_REGISTRY[op]["on_exception"]) + assert len(A._OPERATION_HOOK_REGISTRY[op]["on_start"]) == 1 + assert len(A._OPERATION_HOOK_REGISTRY[op]["on_success"]) == 1 + assert len(A._OPERATION_HOOK_REGISTRY[op]["on_exception"]) == 1 @track.install_operation_hooks(A) @A.operation From a8efc65c822827dbe29148db0a741fa7335b886c Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Fri, 27 Oct 2023 10:44:36 -0400 Subject: [PATCH 50/70] test: Use == on project path and remove comment --- tests/test_project.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/test_project.py b/tests/test_project.py index 7d2990a53..385e7836d 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -2644,10 +2644,7 @@ def check_metadata( repo, ): assert metadata["stage"] == expected_stage - # I think job.project.path gives us a relative path in the test, while - # job.project.path in hooks.utils.collect_metadata gives us the absolute path - # I don't know why this is happening. - assert job.project.path in metadata["project"]["path"] + assert job.project.path == metadata["project"]["path"] assert metadata["project"]["schema_version"] == job.project.config.get( "schema_version" ) From 8888e47e8de917f08af717c9de7ae4c049d6107b Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Fri, 27 Oct 2023 10:57:19 -0400 Subject: [PATCH 51/70] doc: Correct TrackOperations docstring. --- flow/hooks/track_operations.py | 43 ++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index 9822b20bf..875b164ad 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -17,21 +17,27 @@ class TrackOperations: """:class:`~.TrackOperations` tracks information about the execution of operations to a logfile. - This hooks can provides information on the start, successful completion, and/or - erroring of one or more operations in a `flow.FlowProject` instance. The logs are stored in a - file given by the parameter ``fn_logfile``. This file will be appended to if it already exists. - The default formating for the log provides the [time, job id, log level, and log message]. - - Note - ---- - All tracking is performed at the INFO level. To ensure outputs are captured in log files, - use the `--debug` flag when running or submitting jobs, or specify - `submit_options=--debug` in your directives (example shown below). + This hooks can provides information on the start, successful completion, and/or erroring of + one or more operations in a `flow.FlowProject` instance. The logs are stored either in the + job document or the file given by ``log_filename``. If stored in the job document, it uses + the key ``execution_history``. The file or document list will be appended to if it already + exists. + + The hooks stores metadata regarding the execution of the operation and the state of the + project at the time of execution, error, and/or completion. The data will also include + information about the git status if the project is detected as a git repo and + ``GitPython`` is installed in the environment. + + Warning + ------- + This class will error on construction if GitPython is not available and ``strict_git`` is set + to ``True``. If ``strict_git`` is ``True`` trying to execute an operation with uncommitted + changes. Examples -------- The following example will install :class:`~.TrackOperations` at the operation level. - Where the log will be stored in a file name `foo.log` in the job workspace. + Where the log will be stored in the job document. .. code-block:: python from flow import FlowProject @@ -42,15 +48,11 @@ class Project(FlowProject): pass - def install_operation_track_hook(operation_name, project_cls): - track = TrackOperation(f"{operation_name}.log") - return lambda op: track.install_operation_hooks(op, project_cls) + track = TrackOperation() - @install_operation_log_hook("foo", Project) - @Project.operation(directives={ - "submit_options": "--debug" # Always submit operation foo with the --debug flag - }) + @track.install_operation_hooks(Project) + @Project.operation def foo(job): pass @@ -60,7 +62,7 @@ def foo(job): .. code-block:: python from flow import FlowProject - from flow.hooks import TrackOperations # Import build + from flow.hooks import TrackOperations class Project(FlowProject): @@ -80,7 +82,8 @@ class Project(FlowProject): document in a key labeled ``f"{operation_name}"`` under the ``"execution_history"`` key. Defaults to ``None``. strict_git: bool, optional - Whether to fail if ``GitPython`` cannot be imported. Defaults to ``True``. + Whether to fail if ``GitPython`` cannot be imported or if there are uncommitted changes + to the project's git repo. Defaults to ``True``. """ def __init__(self, log_filename=None, strict_git=True): From 1d9498b6363d6e7be3cbd8b2cc47827f3021335e Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Fri, 27 Oct 2023 16:24:16 -0400 Subject: [PATCH 52/70] test: Fix path test for CI with detailed comment on necessity --- tests/test_project.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/tests/test_project.py b/tests/test_project.py index 385e7836d..309114bf1 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -11,6 +11,7 @@ from functools import partial from io import StringIO from itertools import groupby +from pathlib import Path import define_hooks_test_project import define_hooks_track_operations_project @@ -2598,7 +2599,9 @@ def operation_info(self, request): ids=None if skip_git else ("git-dirty", "git-clean"), ) def git_repo(self, project, request): - if request.param is None: + # params=None is equivalent to not passing a parameter which results in + # result.param being unset. + if not hasattr(request, "param"): return repo = git.Repo.init(project.path) with open(project.fn("test.txt"), "w"): @@ -2644,7 +2647,18 @@ def check_metadata( repo, ): assert metadata["stage"] == expected_stage - assert job.project.path == metadata["project"]["path"] + # When running on MacOS CI a private/ is prepended to job.project.path + # seemingly indeterminetly. If metadata["project"]["path"] is checked + # then job.project.path will have "private" in it. If I check + # job.project.path then metadata["project"]["path"] will have "private" + # in it. If I check neither, one of them will have "private" in it. + test_path = os.path.join( + *filter(lambda x: x != "private", Path(metadata["project"]["path"]).parts) + ) + current_path = os.path.join( + *filter(lambda x: x != "private", Path(job.project.path).parts) + ) + assert current_path == test_path assert metadata["project"]["schema_version"] == job.project.config.get( "schema_version" ) From 1923a4c04f1b5130a1c25f6408a47d828c181a00 Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Tue, 7 Nov 2023 14:15:03 -0500 Subject: [PATCH 53/70] doc: Apply Bradley's fixes Co-authored-by: Bradley Dice --- flow/hooks/track_operations.py | 22 ++++++++++++---------- flow/hooks/util.py | 6 ------ 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index 875b164ad..32a8c8713 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -17,8 +17,8 @@ class TrackOperations: """:class:`~.TrackOperations` tracks information about the execution of operations to a logfile. - This hooks can provides information on the start, successful completion, and/or erroring of - one or more operations in a `flow.FlowProject` instance. The logs are stored either in the + This hook can provides information on the start, successful completion, and/or error of + one or more operations in a :class:`~.FlowProject` instance. The logs are stored either in the job document or the file given by ``log_filename``. If stored in the job document, it uses the key ``execution_history``. The file or document list will be appended to if it already exists. @@ -31,7 +31,7 @@ class TrackOperations: Warning ------- This class will error on construction if GitPython is not available and ``strict_git`` is set - to ``True``. If ``strict_git`` is ``True`` trying to execute an operation with uncommitted + to ``True`` or if ``strict_git`` is ``True`` when executing an operation with uncommitted changes. Examples @@ -40,6 +40,7 @@ class TrackOperations: Where the log will be stored in the job document. .. code-block:: python + from flow import FlowProject from flow.hooks import TrackOperations @@ -61,6 +62,7 @@ def foo(job): instance of :class:`~.FlowProject` .. code-block:: python + from flow import FlowProject from flow.hooks import TrackOperations @@ -77,11 +79,11 @@ class Project(FlowProject): Parameters ---------- - log_filename: str, optional - The name of the log file in the job workspace. If ``None`` store in a list in the job - document in a key labeled ``f"{operation_name}"`` under the ``"execution_history"`` key. + log_filename : str, optional + The name of the log file in the job workspace. If ``None``, store in a list in the job + document in ``job.document["execution_history"][operation_name]``. Defaults to ``None``. - strict_git: bool, optional + strict_git : bool, optional Whether to fail if ``GitPython`` cannot be imported or if there are uncommitted changes to the project's git repo. Defaults to ``True``. """ @@ -98,7 +100,7 @@ def __init__(self, log_filename=None, strict_git=True): self.strict_git = strict_git def _write_metadata(self, job, metadata): - print(f"Writing... {metadata}.", flush=True) + print(f"Writing {metadata}...", flush=True) if self.log_filename is None: history = job.doc.setdefault("execution_history", {}) # No need to store job id or operation name. @@ -146,10 +148,10 @@ def install_operation_hooks(self, op, project_cls=None): Parameters ---------- op : function or type - An operation function to log or a subclass of `flow.FlowProject` if ``project_cls`` is + An operation function to log or a subclass of :class:`~.FlowProject` if ``project_cls`` is ``None``. project_cls : type - A subclass of `flow.FlowProject`. + A subclass of :class:`~.FlowProject`. """ if project_cls is None: return lambda func: self.install_operation_hooks(func, op) diff --git a/flow/hooks/util.py b/flow/hooks/util.py index d4f2409cd..225873227 100644 --- a/flow/hooks/util.py +++ b/flow/hooks/util.py @@ -10,12 +10,6 @@ def collect_metadata(operation, job): Returns a directory including schema version, time, project, and job-operation. - We can no longer track the following because we take in the operation name as a string rather - than as an object, but they provide useful information. We could try to perform introspection - of the project through the job later if desired to get these values. - - - "cmd": operation.cmd, - - "directives": operation.directives, """ return { # the metadata schema version: From bad7e30aa814758ddffc979d51e28ac19805dedf Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Tue, 7 Nov 2023 14:27:51 -0500 Subject: [PATCH 54/70] refactor: metadata schema Co-authored-by: Bradley Dice --- flow/hooks/util.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/flow/hooks/util.py b/flow/hooks/util.py index 225873227..13ae7d097 100644 --- a/flow/hooks/util.py +++ b/flow/hooks/util.py @@ -20,8 +20,6 @@ def collect_metadata(operation, job): # the project schema version: "schema_version": job.project.config.get("schema_version"), }, - "job-operation": { - "name": operation, - "job_id": job.id, - }, + "operation": operation, + "job_id": job.id, } From d96149998c8eb1c63af402ae6341bcf0f85fa1ec Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Tue, 7 Nov 2023 14:19:18 -0500 Subject: [PATCH 55/70] test: Require GitPython --- tests/test_project.py | 49 +++++++++++++------------------------------ 1 file changed, 14 insertions(+), 35 deletions(-) diff --git a/tests/test_project.py b/tests/test_project.py index 8524d25e7..d57665b3d 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -15,7 +15,9 @@ import define_hooks_test_project import define_hooks_track_operations_project +import define_hooks_track_operations_strict_project import define_status_test_project +import git import pytest import signac from conftest import MockScheduler, TestProjectBase @@ -42,17 +44,6 @@ _switch_to_directory, ) -try: - import define_hooks_track_operations_strict_project - import git - - skip_git = False -except RuntimeError: - skip_git = True - - -git_mark_skipif = pytest.mark.skipif(skip_git, reason="git could not be imported") - @contextmanager def suspend_logging(): @@ -2600,10 +2591,7 @@ class TestHooksTrackOperationsNotStrict(TestHooksSetUp): def operation_info(self, request): return request.param - @pytest.fixture( - params=None if skip_git else (True, False), - ids=None if skip_git else ("git-dirty", "git-clean"), - ) + @pytest.fixture(params=(True, False), ids=("git-dirty", "git-clean")) def git_repo(self, project, request): # params=None is equivalent to not passing a parameter which results in # result.param being unset. @@ -2680,13 +2668,8 @@ def check_metadata( else: assert metadata["error"] is None - if repo is not None: - assert metadata["project"]["git"]["commit_id"] == str(repo.commit()) - assert metadata["project"]["git"]["dirty"] == repo.is_dirty() - else: - if self.error_on_no_git: - # Should not happen in actual testing, here for developers. - assert False, "Expected gitpython for test." + assert metadata["project"]["git"]["commit_id"] == str(repo.commit()) + assert metadata["project"]["git"]["dirty"] == repo.is_dirty() def test_metadata(self, project, job, operation_info, git_repo): operation_name, error_message = operation_info @@ -2732,21 +2715,17 @@ def test_metadata(self, project, job, operation_info, git_repo): ) -if not skip_git: - - class TestHooksTrackOperationsStrict(TestHooksTrackOperationsNotStrict): - project_class = ( - define_hooks_track_operations_strict_project._HooksTrackOperations - ) - entrypoint = dict( - path=os.path.realpath( - os.path.join( - os.path.dirname(__file__), - "define_hooks_track_operations_strict_project.py", - ) +class TestHooksTrackOperationsStrict(TestHooksTrackOperationsNotStrict): + project_class = define_hooks_track_operations_strict_project._HooksTrackOperations + entrypoint = dict( + path=os.path.realpath( + os.path.join( + os.path.dirname(__file__), + "define_hooks_track_operations_strict_project.py", ) ) - error_on_no_git = True + ) + error_on_no_git = True class TestIgnoreConditions: From 31d5f8cbd2b7ffe55aafddcbeb44e414769a5ddc Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Tue, 7 Nov 2023 14:25:48 -0500 Subject: [PATCH 56/70] test: Skip check for GitHub Actions MacOS runners. Doesn't use pytest.xfail or skip since only one check fails. --- tests/test_project.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/tests/test_project.py b/tests/test_project.py index d57665b3d..e80ada0e7 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -5,13 +5,13 @@ import json import logging import os +import platform import subprocess import sys from contextlib import contextmanager, redirect_stderr, redirect_stdout from functools import partial from io import StringIO from itertools import groupby -from pathlib import Path import define_hooks_test_project import define_hooks_track_operations_project @@ -2641,18 +2641,19 @@ def check_metadata( repo, ): assert metadata["stage"] == expected_stage - # When running on MacOS CI a private/ is prepended to job.project.path - # seemingly indeterminetly. If metadata["project"]["path"] is checked - # then job.project.path will have "private" in it. If I check - # job.project.path then metadata["project"]["path"] will have "private" - # in it. If I check neither, one of them will have "private" in it. - test_path = os.path.join( - *filter(lambda x: x != "private", Path(metadata["project"]["path"]).parts) - ) - current_path = os.path.join( - *filter(lambda x: x != "private", Path(job.project.path).parts) - ) - assert current_path == test_path + + test_path = metadata["project"]["path"] + current_path = job.project.path + # Still allow for local MacOS check but skip in CI MacOS runners. Rather + # than xfail or skip this ensures we test everything else. + if ( + "private" in test_path + or "private" in current_path + and platform.system == "Darwin" + ): + pass + else: + assert current_path == test_path assert metadata["project"]["schema_version"] == job.project.config.get( "schema_version" ) From 0e458cc05396ab8b7c7871d15e4e23870fd46636 Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Tue, 7 Nov 2023 14:31:15 -0500 Subject: [PATCH 57/70] doc: Correct incomplete sentence. --- flow/hooks/track_operations.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index 32a8c8713..18d3d621f 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -36,8 +36,8 @@ class TrackOperations: Examples -------- - The following example will install :class:`~.TrackOperations` at the operation level. - Where the log will be stored in the job document. + The following example will install :class:`~.TrackOperations` at the operation level in the + the job document. .. code-block:: python @@ -148,8 +148,8 @@ def install_operation_hooks(self, op, project_cls=None): Parameters ---------- op : function or type - An operation function to log or a subclass of :class:`~.FlowProject` if ``project_cls`` is - ``None``. + An operation function to log or a subclass of :class:`~.FlowProject` if + ``project_cls`` is ``None``. project_cls : type A subclass of :class:`~.FlowProject`. """ From 0535e25b2611460ef68226ae26c92467ef394f31 Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Tue, 7 Nov 2023 15:14:36 -0500 Subject: [PATCH 58/70] refactor: Switch back to filebased logging --- flow/hooks/track_operations.py | 36 +++++++++++++------ .../define_hooks_track_operations_project.py | 5 +-- ...e_hooks_track_operations_strict_project.py | 3 -- tests/test_project.py | 28 ++++----------- 4 files changed, 32 insertions(+), 40 deletions(-) diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index 18d3d621f..201b03149 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -14,6 +14,9 @@ GIT = True +_DEFAULT_FILENAME = "signac-execution-history.log" + + class TrackOperations: """:class:`~.TrackOperations` tracks information about the execution of operations to a logfile. @@ -80,15 +83,13 @@ class Project(FlowProject): Parameters ---------- log_filename : str, optional - The name of the log file in the job workspace. If ``None``, store in a list in the job - document in ``job.document["execution_history"][operation_name]``. - Defaults to ``None``. + The name of the log file in the job workspace. Defaults to "signac-execution-history.log". strict_git : bool, optional Whether to fail if ``GitPython`` cannot be imported or if there are uncommitted changes to the project's git repo. Defaults to ``True``. """ - def __init__(self, log_filename=None, strict_git=True): + def __init__(self, log_filename=_DEFAULT_FILENAME, strict_git=True): self.log_filename = log_filename if strict_git and not GIT: raise RuntimeError( @@ -100,13 +101,6 @@ def __init__(self, log_filename=None, strict_git=True): self.strict_git = strict_git def _write_metadata(self, job, metadata): - print(f"Writing {metadata}...", flush=True) - if self.log_filename is None: - history = job.doc.setdefault("execution_history", {}) - # No need to store job id or operation name. - operation_name = metadata.pop("job-operation")["name"] - history.setdefault(operation_name, []).append(metadata) - return with open(job.fn(self.log_filename), "a") as logfile: logfile.write(json.dumps(metadata) + "\n") @@ -172,3 +166,23 @@ def install_project_hooks(self, project): project.project_hooks.on_success.append(self.on_success) project.project_hooks.on_exception.append(self.on_exception) return project + + @classmethod + def read_log(cls, job, log_filename=_DEFAULT_FILENAME): + """Return the execution log data as a list of dictionaries. + + Parameters + ---------- + job : signac.job.Job + The job to read the execution history of. + log_filename : str, optional + The name of the log file in the job workspace. Defaults to + "signac-execution-history.log". + + Returns + ------- + log : list[dict[str, any]] + Returns the job's current execution history for logged operations. + """ + with open(job.fn(log_filename)) as fh: + return [json.loads(line) for line in fh.readlines() if line != ""] diff --git a/tests/define_hooks_track_operations_project.py b/tests/define_hooks_track_operations_project.py index bec403fa3..2242c8d4b 100644 --- a/tests/define_hooks_track_operations_project.py +++ b/tests/define_hooks_track_operations_project.py @@ -8,14 +8,12 @@ class _HooksTrackOperations(FlowProject): pass -LOG_FILENAME = "operations.log" +LOG_FILENAME = "signac-execution-history.log" track_operations = TrackOperations(strict_git=False) -track_operations_with_file = TrackOperations(LOG_FILENAME, strict_git=False) -@track_operations_with_file.install_operation_hooks(_HooksTrackOperations) @track_operations.install_operation_hooks(_HooksTrackOperations) @_HooksTrackOperations.operation def base(job): @@ -23,7 +21,6 @@ def base(job): raise RuntimeError(HOOKS_ERROR_MESSAGE) -@track_operations_with_file.install_operation_hooks(_HooksTrackOperations) @track_operations.install_operation_hooks(_HooksTrackOperations) @_HooksTrackOperations.operation(cmd=True, with_job=True) def cmd(job): diff --git a/tests/define_hooks_track_operations_strict_project.py b/tests/define_hooks_track_operations_strict_project.py index 4bd471d82..012f89791 100644 --- a/tests/define_hooks_track_operations_strict_project.py +++ b/tests/define_hooks_track_operations_strict_project.py @@ -12,11 +12,9 @@ class _HooksTrackOperations(FlowProject): track_operations = TrackOperations() -track_operations_with_file = TrackOperations(LOG_FILENAME) @track_operations.install_operation_hooks(_HooksTrackOperations) -@track_operations_with_file.install_operation_hooks(_HooksTrackOperations) @_HooksTrackOperations.operation def base(job): if job.sp.raise_exception: @@ -24,7 +22,6 @@ def base(job): @track_operations.install_operation_hooks(_HooksTrackOperations) -@track_operations_with_file.install_operation_hooks(_HooksTrackOperations) @_HooksTrackOperations.operation(cmd=True, with_job=True) def cmd(job): if job.sp.raise_exception: diff --git a/tests/test_project.py b/tests/test_project.py index e80ada0e7..d22abdfec 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -2,7 +2,6 @@ # All rights reserved. # This software is licensed under the BSD 3-Clause License. import datetime -import json import logging import os import platform @@ -2608,25 +2607,11 @@ def git_repo(self, project, request): repo.index.add(["dirty.txt"]) return repo - def split_log(self, job, fn): - with open(job.fn(fn)) as f: - values = f.read().split("\n") - if values[-1] == "": - values = values[:-1] - return values - - def get_log(self, job, fn=None): - if fn is None: - execution_history = job.doc["execution_history"]() - # Add back in redundent information to make check_metadata universal - for key, item in execution_history.items(): - for entry in item: - entry["job-operation"] = {"name": key, "job_id": job.id} - return execution_history - log_entries = [json.loads(entry) for entry in self.split_log(job, fn)] + def get_log(self, job): + log_entries = flow.hooks.TrackOperations.read_log(job) execution_history = {} for entry in log_entries: - operation_name = entry["job-operation"]["name"] + operation_name = entry["operation"] execution_history.setdefault(operation_name, []).append(entry) return execution_history @@ -2659,9 +2644,8 @@ def check_metadata( ) start_time = datetime.datetime.fromisoformat(metadata["time"]) assert before_execution < start_time - job_op_metadata = metadata["job-operation"] - assert job_op_metadata["job_id"] == job.id - assert job_op_metadata["name"] == operation_name + assert metadata["job_id"] == job.id + assert metadata["operation"] == operation_name if expected_stage != "prior" and job.sp.raise_exception: assert error_message in metadata["error"] @@ -2689,7 +2673,7 @@ def test_metadata(self, project, job, operation_info, git_repo): assert job.isfile(self.log_fname) for filename in (None, self.log_fname): - metadata = self.get_log(job, filename) + metadata = self.get_log(job) # For the single operation # Each metadata whether file or document is one since they both # write to different places. From 4c2207c4e780e2c126c710d0af396982e56968f3 Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Tue, 7 Nov 2023 15:15:08 -0500 Subject: [PATCH 59/70] doc: Document the schema keys in the hook. --- flow/hooks/track_operations.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index 201b03149..464d538e4 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -37,6 +37,21 @@ class TrackOperations: to ``True`` or if ``strict_git`` is ``True`` when executing an operation with uncommitted changes. + The current schema has the following structure: + + - ``time``: The time of querying the metadata. + - ``stage``: The stage of execution either "prior" or "after" both referring operation + exectution. + - ``project`` + - ``path``: Filepath to the project + - ``schema_version``: The project's schema version + - ``operation``: The operation name + - ``job_id``: The job id + - ``git`` + - ``commit_id``: The current commit of the project's git repo. + - ``dirty``: Whether the project's repo has uncommitted changes or not. + - ``_schema_version``: The metadata storage's schema version. Schema is currently in version 1. + Examples -------- The following example will install :class:`~.TrackOperations` at the operation level in the From 6c846992c586b03a9db2a1f868e19d5fa8f90174 Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Tue, 7 Nov 2023 15:19:08 -0500 Subject: [PATCH 60/70] doc: Fix documentation for current code. --- flow/hooks/track_operations.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index 464d538e4..3cd95bf1a 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -21,9 +21,8 @@ class TrackOperations: """:class:`~.TrackOperations` tracks information about the execution of operations to a logfile. This hook can provides information on the start, successful completion, and/or error of - one or more operations in a :class:`~.FlowProject` instance. The logs are stored either in the - job document or the file given by ``log_filename``. If stored in the job document, it uses - the key ``execution_history``. The file or document list will be appended to if it already + one or more operations in a :class:`~.FlowProject` instance. The logs are stored the file + given by ``log_filename`` within the job's path. The file will be appended to if it already exists. The hooks stores metadata regarding the execution of the operation and the state of the @@ -31,6 +30,9 @@ class TrackOperations: information about the git status if the project is detected as a git repo and ``GitPython`` is installed in the environment. + Each call to the hook adds a single JSON line to the log file. These can be + read using the `json` builtin package or `~.read_log`. + Warning ------- This class will error on construction if GitPython is not available and ``strict_git`` is set @@ -42,6 +44,7 @@ class TrackOperations: - ``time``: The time of querying the metadata. - ``stage``: The stage of execution either "prior" or "after" both referring operation exectution. + - ``error``: The error message on executing the operation if any. - ``project`` - ``path``: Filepath to the project - ``schema_version``: The project's schema version @@ -54,8 +57,7 @@ class TrackOperations: Examples -------- - The following example will install :class:`~.TrackOperations` at the operation level in the - the job document. + The following example will install :class:`~.TrackOperations` at the operation level. .. code-block:: python From 60007ad78b7fec1f2a499c5c74b9f56036d48c0b Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Tue, 7 Nov 2023 15:28:09 -0500 Subject: [PATCH 61/70] ci: Update oldest requirements. --- .github/workflows/ci-oldest-reqs.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci-oldest-reqs.txt b/.github/workflows/ci-oldest-reqs.txt index 0bcd1ecf7..85d5efa86 100644 --- a/.github/workflows/ci-oldest-reqs.txt +++ b/.github/workflows/ci-oldest-reqs.txt @@ -8,3 +8,4 @@ pytest-cov==3.0.0 pytest==7.0.1 ruamel.yaml==0.17.21 tqdm==4.60.0 +GitPython==3.0.0 From 4d5aa94b3a97c073146ab388ea28f8b8481222f2 Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Tue, 7 Nov 2023 15:35:44 -0500 Subject: [PATCH 62/70] doc: Fix documentation rendering. --- flow/hooks/track_operations.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index 3cd95bf1a..50060979d 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -31,13 +31,7 @@ class TrackOperations: ``GitPython`` is installed in the environment. Each call to the hook adds a single JSON line to the log file. These can be - read using the `json` builtin package or `~.read_log`. - - Warning - ------- - This class will error on construction if GitPython is not available and ``strict_git`` is set - to ``True`` or if ``strict_git`` is ``True`` when executing an operation with uncommitted - changes. + read using the `json` builtin package or :method:`~.read_log`. The current schema has the following structure: @@ -55,6 +49,13 @@ class TrackOperations: - ``dirty``: Whether the project's repo has uncommitted changes or not. - ``_schema_version``: The metadata storage's schema version. Schema is currently in version 1. + + Warning + ------- + This class will error on construction if GitPython is not available and ``strict_git`` is set + to ``True`` or if ``strict_git`` is ``True`` when executing an operation with uncommitted + changes. + Examples -------- The following example will install :class:`~.TrackOperations` at the operation level. From 8761ce0a6153286d6a4bef5316ccedf669eb9ccd Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Tue, 7 Nov 2023 15:38:45 -0500 Subject: [PATCH 63/70] doc: Fix nested list formatting. --- flow/hooks/track_operations.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index 50060979d..11ed61fa3 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -40,13 +40,13 @@ class TrackOperations: exectution. - ``error``: The error message on executing the operation if any. - ``project`` - - ``path``: Filepath to the project - - ``schema_version``: The project's schema version + - ``path``: Filepath to the project + - ``schema_version``: The project's schema version - ``operation``: The operation name - ``job_id``: The job id - ``git`` - - ``commit_id``: The current commit of the project's git repo. - - ``dirty``: Whether the project's repo has uncommitted changes or not. + - ``commit_id``: The current commit of the project's git repo. + - ``dirty``: Whether the project's repo has uncommitted changes or not. - ``_schema_version``: The metadata storage's schema version. Schema is currently in version 1. From 514eda654c4a0fac41edfba00bcf56afb8841e31 Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Tue, 7 Nov 2023 15:40:22 -0500 Subject: [PATCH 64/70] doc: Fix method reference. --- flow/hooks/track_operations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index 11ed61fa3..ecac1d95e 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -31,7 +31,7 @@ class TrackOperations: ``GitPython`` is installed in the environment. Each call to the hook adds a single JSON line to the log file. These can be - read using the `json` builtin package or :method:`~.read_log`. + read using the `json` builtin package or :meth:`~.TrackOperations.read_log`. The current schema has the following structure: From d83d0e5bcd8704a454cb6d9fb2ed786ad182ad5d Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Tue, 7 Nov 2023 15:50:47 -0500 Subject: [PATCH 65/70] ci: Try to get working version of GitPython --- .github/workflows/ci-oldest-reqs.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-oldest-reqs.txt b/.github/workflows/ci-oldest-reqs.txt index 85d5efa86..e107e9ea8 100644 --- a/.github/workflows/ci-oldest-reqs.txt +++ b/.github/workflows/ci-oldest-reqs.txt @@ -8,4 +8,4 @@ pytest-cov==3.0.0 pytest==7.0.1 ruamel.yaml==0.17.21 tqdm==4.60.0 -GitPython==3.0.0 +GitPython==3.0.2 From 387ff7bb44d0fc023f0de2800e0f2aef114adf3e Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Tue, 7 Nov 2023 15:57:48 -0500 Subject: [PATCH 66/70] ci (WIP): Test if latest GitPython works. --- .github/workflows/ci-oldest-reqs.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-oldest-reqs.txt b/.github/workflows/ci-oldest-reqs.txt index e107e9ea8..729fcbedd 100644 --- a/.github/workflows/ci-oldest-reqs.txt +++ b/.github/workflows/ci-oldest-reqs.txt @@ -8,4 +8,4 @@ pytest-cov==3.0.0 pytest==7.0.1 ruamel.yaml==0.17.21 tqdm==4.60.0 -GitPython==3.0.2 +GitPython==3.1.37 From a7bbb4a6763f35606a4583e3fbeaa5cf34c5f35e Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Tue, 14 Nov 2023 10:37:27 -0500 Subject: [PATCH 67/70] doc: Fix typos --- flow/hooks/track_operations.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index ecac1d95e..72be32c80 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -21,7 +21,7 @@ class TrackOperations: """:class:`~.TrackOperations` tracks information about the execution of operations to a logfile. This hook can provides information on the start, successful completion, and/or error of - one or more operations in a :class:`~.FlowProject` instance. The logs are stored the file + one or more operations in a :class:`~.FlowProject` instance. The logs are stored in the file given by ``log_filename`` within the job's path. The file will be appended to if it already exists. @@ -36,8 +36,8 @@ class TrackOperations: The current schema has the following structure: - ``time``: The time of querying the metadata. - - ``stage``: The stage of execution either "prior" or "after" both referring operation - exectution. + - ``stage``: Whether the hook was executed either "prior" or "after" the associated + operation's execution. - ``error``: The error message on executing the operation if any. - ``project`` - ``path``: Filepath to the project @@ -52,9 +52,8 @@ class TrackOperations: Warning ------- - This class will error on construction if GitPython is not available and ``strict_git`` is set - to ``True`` or if ``strict_git`` is ``True`` when executing an operation with uncommitted - changes. + This class will raise an exception when strict_git is set to ``True`` and either GitPython is + not available or the repository contains uncommitted changes (i.e. is "dirty"). Examples -------- From 6411bd5a4c6d24aa55b62cc9b518e647a1e8aa44 Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Mon, 4 Dec 2023 11:02:53 -0500 Subject: [PATCH 68/70] refactor: Change default name of file Also fix documentation errors. Co-authored-by: Bradley Dice --- flow/hooks/track_operations.py | 14 +++++++------- tests/define_hooks_track_operations_project.py | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index 72be32c80..ea8f948fa 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -14,7 +14,7 @@ GIT = True -_DEFAULT_FILENAME = "signac-execution-history.log" +_DEFAULT_FILENAME = "signac_execution_history.log" class TrackOperations: @@ -22,7 +22,7 @@ class TrackOperations: This hook can provides information on the start, successful completion, and/or error of one or more operations in a :class:`~.FlowProject` instance. The logs are stored in the file - given by ``log_filename`` within the job's path. The file will be appended to if it already + given by ``log_filename`` within the job's path. The file will be appended to if it already exists. The hooks stores metadata regarding the execution of the operation and the state of the @@ -44,15 +44,15 @@ class TrackOperations: - ``schema_version``: The project's schema version - ``operation``: The operation name - ``job_id``: The job id - - ``git`` + - ``git``: - ``commit_id``: The current commit of the project's git repo. - ``dirty``: Whether the project's repo has uncommitted changes or not. - - ``_schema_version``: The metadata storage's schema version. Schema is currently in version 1. + - ``_schema_version``: The metadata storage's schema version. Warning ------- - This class will raise an exception when strict_git is set to ``True`` and either GitPython is + This class will raise an exception when ``strict_git`` is set to ``True`` and either GitPython is not available or the repository contains uncommitted changes (i.e. is "dirty"). Examples @@ -100,7 +100,7 @@ class Project(FlowProject): Parameters ---------- log_filename : str, optional - The name of the log file in the job workspace. Defaults to "signac-execution-history.log". + The name of the log file in the job workspace. Defaults to "signac_execution_history.log". strict_git : bool, optional Whether to fail if ``GitPython`` cannot be imported or if there are uncommitted changes to the project's git repo. Defaults to ``True``. @@ -194,7 +194,7 @@ def read_log(cls, job, log_filename=_DEFAULT_FILENAME): The job to read the execution history of. log_filename : str, optional The name of the log file in the job workspace. Defaults to - "signac-execution-history.log". + "signac_execution_history.log". Returns ------- diff --git a/tests/define_hooks_track_operations_project.py b/tests/define_hooks_track_operations_project.py index 2242c8d4b..24e662051 100644 --- a/tests/define_hooks_track_operations_project.py +++ b/tests/define_hooks_track_operations_project.py @@ -8,7 +8,7 @@ class _HooksTrackOperations(FlowProject): pass -LOG_FILENAME = "signac-execution-history.log" +LOG_FILENAME = "signac_execution_history.log" track_operations = TrackOperations(strict_git=False) From c759957b8efce73b405fe3bb21a3240809f241e7 Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Mon, 4 Dec 2023 13:27:33 -0500 Subject: [PATCH 69/70] misc: Formatting to make pre-commit pass. --- flow/hooks/track_operations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flow/hooks/track_operations.py b/flow/hooks/track_operations.py index ea8f948fa..ee0d139f2 100644 --- a/flow/hooks/track_operations.py +++ b/flow/hooks/track_operations.py @@ -52,8 +52,8 @@ class TrackOperations: Warning ------- - This class will raise an exception when ``strict_git`` is set to ``True`` and either GitPython is - not available or the repository contains uncommitted changes (i.e. is "dirty"). + This class will raise an exception when ``strict_git`` is set to ``True`` and either + GitPython is not available or the repository contains uncommitted changes (i.e. is "dirty"). Examples -------- From d35908da70408d575c092aafafc50fda36d435f0 Mon Sep 17 00:00:00 2001 From: Brandon Butler Date: Mon, 4 Dec 2023 13:29:23 -0500 Subject: [PATCH 70/70] test: Refactor conditional to not use pass --- tests/test_project.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_project.py b/tests/test_project.py index d22abdfec..85e6b77ee 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -2631,13 +2631,11 @@ def check_metadata( current_path = job.project.path # Still allow for local MacOS check but skip in CI MacOS runners. Rather # than xfail or skip this ensures we test everything else. - if ( + if not ( "private" in test_path or "private" in current_path and platform.system == "Darwin" ): - pass - else: assert current_path == test_path assert metadata["project"]["schema_version"] == job.project.config.get( "schema_version"