From a047402a88cc63d1c2814b0c4e6ba2eccd61ff63 Mon Sep 17 00:00:00 2001 From: Renata Date: Fri, 1 May 2020 23:10:59 +0200 Subject: [PATCH 1/7] Work on improving error handling --- taf/exceptions.py | 11 ++++ taf/git.py | 129 ++++++++++++++++++++++++++++------------ taf/repositoriesdb.py | 34 ++++++++--- taf/updater/handlers.py | 4 +- taf/updater/updater.py | 7 +-- taf/validation.py | 4 +- 6 files changed, 134 insertions(+), 55 deletions(-) diff --git a/taf/exceptions.py b/taf/exceptions.py index c2fa5318..20001524 100644 --- a/taf/exceptions.py +++ b/taf/exceptions.py @@ -18,6 +18,10 @@ def __init__(self, path): self.message = f"Cannot fetch changes. Repo: {path}" +class GitError(TAFError): + pass + + class InvalidBranchError(TAFError): pass @@ -45,6 +49,13 @@ class InvalidPINError(TAFError): pass +class RepositoryInstantiationError(TAFError): + def __init__(self, repo_path, message): + super().__init__(f"Could not instantiate repository {repo_path}\n\n: {message}") + self.repo_path = repo_path + self.message = message + + class MetadataUpdateError(TAFError): def __init__(self, metadata_role, message): super().__init__( diff --git a/taf/git.py b/taf/git.py index b618dd03..5e5421fa 100644 --- a/taf/git.py +++ b/taf/git.py @@ -8,7 +8,12 @@ from pathlib import Path import taf.settings as settings -from taf.exceptions import CloneRepoException, FetchException, InvalidRepositoryError +from taf.exceptions import ( + CloneRepoException, + FetchException, + InvalidRepositoryError, + GitError, +) from taf.log import taf_logger from taf.utils import run @@ -75,7 +80,7 @@ def is_git_repository(self): try: self._git("rev-parse --git-dir") return True - except subprocess.CalledProcessError: + except GitError: return False @property @@ -101,32 +106,47 @@ def _git(self, cmd, *args, **kwargs): log_error_msg = kwargs.pop("log_error_msg", "") reraise_error = kwargs.pop("reraise_error", False) log_success_msg = kwargs.pop("log_success_msg", "") + error_if_not_exists = kwargs.pop("error_if_not_exists", True) if len(args): cmd = cmd.format(*args) command = f"git -C {self.path} {cmd}" + result = None if log_error or log_error_msg: try: result = run(command) if log_success_msg: taf_logger.debug("Repo {}:" + log_success_msg, self.name) except subprocess.CalledProcessError as e: + if ( + error_if_not_exists + and not self._path.is_dir() + or not self.is_git_repository + ): + error_msg = f"{self.path} does not exist or is not a git repository" + reraise_error = True + else: + error_msg = ( + log_error_msg + if log_error_msg + else f"Repo {self.name}: error occurred while executing {command}:\n\n{e.output}", + ) if log_error_msg: - taf_logger.error(log_error_msg) + taf_logger.error(error_msg) else: # not every git error indicates a problem # if it does, we expect that either custom error message will be provided # or that the error will be reraised - taf_logger.debug( - "Repo {}: error occurred while executing {}:\n{}", - self.name, - command, - e.output, - ) + taf_logger.debug(error_msg) if reraise_error: - raise + raise GitError(error_msg) else: - result = run(command) + try: + result = run(command) + except subprocess.CalledProcessError as e: + raise GitError( + f"Repo {self.name}: error occurred while executing {command}:\n\n{e.output}" + ) from e if log_success_msg: taf_logger.debug("Repo {}: " + log_success_msg, self.name) return result @@ -189,10 +209,14 @@ def all_fetched_commits(self, branch="master"): def branches(self, remote=False, all=False, strip_remote=False): """Returns all branches.""" flag = "-r" if remote else "-a" if all else "" + error_msg = "remote" if remote else "all" if all else "" branches = [ branch.strip('"').strip("'").strip() for branch in self._git( - "branch {} --format='%(refname:short)'", flag + "branch {} --format='%(refname:short)'", + flag, + log_error_msg=f"Repo {self.name}: could not list {error_msg} branches", + reraise_error=True, ).split("\n") ] @@ -209,12 +233,18 @@ def branches(self, remote=False, all=False, strip_remote=False): def branches_containing_commit(self, commit, strip_remote=False, sort_key=None): """Finds all branches that contain the given commit""" - local_branches = self._git(f"branch --contains {commit}").split("\n") + local_branches = self._git( + f"branch --contains {commit}", + log_error_msg=f"Repo {self.name}: could not list branches containing commit {commit}", + ).split("\n") if local_branches: local_branches = [ branch.replace("*", "").strip() for branch in local_branches ] - remote_branches = self._git(f"branch -r --contains {commit}").split("\n") + remote_branches = self._git( + f"branch -r --contains {commit}", + log_error_msg=f"Repo {self.name}: could not list remote branches containing commit {commit}", + ).split("\n") filtered_remote_branches = [] if remote_branches: for branch in remote_branches: @@ -238,14 +268,22 @@ def branch_exists(self, branch_name, include_remotes=True): If include_remotes is set to True, this checks if a remote branch exists. """ - branch = self._git(f"branch --list {branch_name}") + branch = self._git( + f"branch --list {branch_name}", + log_error_msg=f"Repo {self.name}: could not list branches", + reraise_error=True, + ) # this git command should return the branch's name if it exists # empty string otherwise if branch: return True if include_remotes: for remote in self.remotes: - remote_branch = self._git(f"branch -r --list {remote}/{branch_name}") + remote_branch = self._git( + f"branch -r --list {remote}/{branch_name}", + log_error_msg=f"Repo {self.name}: could not list remote branches", + reraise_error=True, + ) # this command should return the branch's name if the remote branch # exists # it will also return some warnings if there are problems with some refs @@ -255,7 +293,12 @@ def branch_exists(self, branch_name, include_remotes=True): def branch_off_commit(self, branch_name, commit): """Create a new branch by branching off of the specified commit""" - self._git(f"checkout -b {branch_name} {commit}") + self._git( + f"checkout -b {branch_name} {commit}", + log_error_msg=f"Repo {self.name}: could not create a new branch {branch_name} branching off of {commit}", + reraise_error=True, + log_success_msg=f"Repo {self.name}: created a new branch {branch_name} from branching off of {commit}", + ) def branch_local_name(self, remote_branch_name): """Strip remote from the given remote branch""" @@ -274,19 +317,23 @@ def checkout_branch(self, branch_name, create=False, raise_anyway=False): branch_name, log_error=True, reraise_error=True, - log_success_msg=f"checked out branch {branch_name}", + log_success_msg=f"Repo {self.name}: checked out branch {branch_name}", ) - except subprocess.CalledProcessError as e: + except GitError as e: if raise_anyway: raise (e) # skip worktree errors - if "is already checked out at" in e.output: + print(e) + if "is already checked out at" in str(e): return if create: self.create_and_checkout_branch(branch_name) else: + taf_logger.error( + f"Repo {self.name}: could not check out branch {branch_name}" + ) raise (e) def checkout_paths(self, commit, *args): @@ -296,10 +343,11 @@ def checkout_paths(self, commit, *args): def checkout_orphan_branch(self, branch_name): """Creates orphan branch""" self._git(f"checkout --orphan {branch_name}") - try: - self._git("rm -rf .") - except subprocess.CalledProcessError: # If repository is empty - pass + self._git( + "rm -rf .", + log_error_msg=f"Repo {self.name}: could not checkout orphan branch {branch_name}", + reraise_error=True, + ) def clean(self): self._git("clean -fd") @@ -310,7 +358,9 @@ def clone(self, no_checkout=False, bare=False, **kwargs): shutil.rmtree(self.path, True) self._path.mkdir(exist_ok=True, parents=True) if self.repo_urls is None: - raise Exception("Cannot clone repository. No urls were specified") + raise GitError( + f"Repo {self.name}: cannot clone repository. No urls were specified" + ) params = [] if bare: params.append("--bare") @@ -330,8 +380,9 @@ def clone(self, no_checkout=False, bare=False, **kwargs): self._git( "clone {} . {}", url, params, log_success_msg="successfully cloned" ) - except subprocess.CalledProcessError: + except GitError: taf_logger.error("Repo {}: cannot clone from url {}", self.name, url) + raise else: break @@ -351,8 +402,7 @@ def clone_or_pull(self, branches=None, only_fetch=False, **kwargs): taf_logger.debug("Repo {}: old head sha is {}", self.name, old_head) try: self.clone(**kwargs) - except subprocess.CalledProcessError: - taf_logger.error("Repo {}: could not clone repo", self.name) + except GitError: raise CloneRepoException(self.url) else: try: @@ -364,7 +414,7 @@ def clone_or_pull(self, branches=None, only_fetch=False, **kwargs): taf_logger.info( "Repo {}: successfully fetched branch {}", self.name, branch ) - except subprocess.CalledProcessError as e: + except GitError as e: if "fatal" in e.stdout: raise FetchException(f"{self.path}: {str(e)}") pass @@ -418,9 +468,12 @@ def commit(self, message): """Create a commit with the provided message on the currently checked out branch""" self._git("add -A") try: - self._git("diff --cached --exit-code --shortstat") - except subprocess.CalledProcessError: - run("git", "-C", self.path, "commit", "--quiet", "-m", message) + self._git("diff --cached --exit-code --shortstat", reraise_error=True) + except GitError: + try: + run("git", "-C", self.path, "commit", "--quiet", "-m", message) + except subprocess.CalledProcessError as e: + raise GitError(str(e)) return self._git("rev-parse HEAD") def commit_empty(self, message): @@ -489,7 +542,7 @@ def get_last_branch_by_committer_date(self): def get_remote_url(self): try: return self._git("config --get remote.origin.url").strip() - except subprocess.CalledProcessError: + except GitError: return None def delete_branch(self, branch_name): @@ -505,7 +558,7 @@ def head_commit_sha(self): """Finds sha of the commit to which the current HEAD points""" try: return self._git("rev-parse HEAD") - except subprocess.CalledProcessError: + except GitError: return None def fetch(self, fetch_all=False, branch=None, remote="origin"): @@ -543,7 +596,7 @@ def get_tracking_branch(self, branch="", strip_remote=False): if strip_remote: tracking_branch = self.branch_local_name(tracking_branch) return tracking_branch - except subprocess.CalledProcessError: + except GitError: return None def init_repo(self, bare=False): @@ -651,7 +704,7 @@ def reset_to_head(self): def safely_get_json(self, commit, path): try: return self.get_json(commit, path) - except subprocess.CalledProcessError: + except GitError: taf_logger.debug( "Auth repo {}: {} not available at revision {}", self.name, @@ -694,8 +747,8 @@ def synced_with_remote(self, branch="master", url=None): try: local_commit = self._git(f"rev-parse {branch}") - except subprocess.CalledProcessError as e: - if "unknown revision or path not in the working tree" not in e.output: + except GitError as e: + if "unknown revision or path not in the working tree" not in str(e): raise e local_commit = None diff --git a/taf/repositoriesdb.py b/taf/repositoriesdb.py index 2551d551..379bdba8 100644 --- a/taf/repositoriesdb.py +++ b/taf/repositoriesdb.py @@ -1,8 +1,12 @@ import json from pathlib import Path -from subprocess import CalledProcessError -from taf.exceptions import InvalidOrMissingMetadataError, RepositoriesNotFoundError +from taf.exceptions import ( + InvalidOrMissingMetadataError, + RepositoriesNotFoundError, + RepositoryInstantiationError, + GitError, +) from taf.git import NamedGitRepository from taf.log import taf_logger @@ -94,7 +98,10 @@ def load_repositories( repositories = _get_json_file(auth_repo, repositories_path, commit) targets = _get_json_file(auth_repo, targets_path, commit) except InvalidOrMissingMetadataError as e: - taf_logger.warning("Skipping commit {} due to error {}", commit, e) + log_func = taf_logger.warning + if "targets/repositories.json not available at revision" in str(e): + log_func = taf_logger.debug + log_func("Skipping commit {} due to error: {}", commit, str(e)) continue # target repositories are defined in both mirrors.json and targets.json @@ -108,11 +115,20 @@ def load_repositories( additional_info = _get_custom_data(repo_data, targets.get(path)) git_repo = None - if factory is not None: - git_repo = factory(root_dir, path, urls, additional_info) - else: - git_repo_class = _determine_repo_class(repo_classes, path) - git_repo = git_repo_class(root_dir, path, urls, additional_info) + try: + if factory is not None: + git_repo = factory(root_dir, path, urls, additional_info) + else: + git_repo_class = _determine_repo_class(repo_classes, path) + git_repo = git_repo_class(root_dir, path, urls, additional_info) + except Exception as e: + taf_logger.error( + "Auth repo {}: an error occurred while instantiating repository {}: {}", + auth_repo.name, + path, + str(e), + ) + raise RepositoryInstantiationError(f"{root_dir / path}", str(e)) # allows us to partially update repositories if git_repo: @@ -160,7 +176,7 @@ def _get_custom_data(repo, target): def _get_json_file(auth_repo, path, commit): try: return auth_repo.get_json(commit, path) - except CalledProcessError: + except GitError: raise InvalidOrMissingMetadataError( f"{path} not available at revision {commit}" ) diff --git a/taf/updater/handlers.py b/taf/updater/handlers.py index c0961203..5ffd2056 100644 --- a/taf/updater/handlers.py +++ b/taf/updater/handlers.py @@ -3,11 +3,11 @@ import tempfile import time from pathlib import Path -from subprocess import CalledProcessError import securesystemslib import tuf.client.handlers as handlers +from taf.exceptions import GitError from taf.log import taf_logger import taf.settings as settings from taf.auth_repo import AuthenticationRepo, NamedAuthenticationRepo @@ -144,7 +144,7 @@ def _init_commits(self): commits_since = self.validation_auth_repo.all_commits_since_commit( last_validated_commit ) - except CalledProcessError as e: + except GitError as e: if "Invalid revision range" in e.output: taf_logger.error( "Commit {} is not contained by the remote repository {}.", diff --git a/taf/updater/updater.py b/taf/updater/updater.py index 390c2b06..cd221fb9 100644 --- a/taf/updater/updater.py +++ b/taf/updater/updater.py @@ -166,8 +166,8 @@ def _update_named_repository( test_repo = "test-auth-repo" in targets["signed"]["targets"] if test_repo and not authenticate_test_repo: raise UpdateFailedError( - f"Repository {users_auth_repo.name} is a test repository." - 'Call update with "--authenticate-test-repo to update a test "' + f"Repository {users_auth_repo.name} is a test repository. " + 'Call update with "--authenticate-test-repo" to update a test ' "repository" ) elif not test_repo and authenticate_test_repo: @@ -225,6 +225,7 @@ def _update_named_repository( shutil.rmtree(users_auth_repo.conf_dir) raise e finally: + repository_updater.update_handler.cleanup() repositoriesdb.clear_repositories_db() @@ -269,8 +270,6 @@ def _update_authentication_repository(repository_updater, only_validate): f"Validation of authentication repository {users_auth_repo.name}" f" failed due to error: {e}" ) - finally: - repository_updater.update_handler.cleanup() taf_logger.info( "Successfully validated authentication repository {}", users_auth_repo.name diff --git a/taf/validation.py b/taf/validation.py index 2026d4ec..b5ee382b 100644 --- a/taf/validation.py +++ b/taf/validation.py @@ -1,7 +1,7 @@ -import subprocess from pathlib import Path from tuf.repository_tool import TARGETS_DIRECTORY_NAME, METADATA_DIRECTORY_NAME +from taf.exceptions import GitError from taf.repository_tool import Repository from taf.constants import CAPSTONE from taf.exceptions import InvalidBranchError @@ -114,7 +114,7 @@ def _check_branch_id(auth_repo, auth_commit, branch_id): new_branch_id = auth_repo.get_file( auth_commit, f"{TARGETS_DIRECTORY_NAME}/branch" ) - except subprocess.CalledProcessError: + except GitError: raise InvalidBranchError(f"No branch specified at revision {auth_commit}") if branch_id is not None and new_branch_id != branch_id: raise InvalidBranchError( From 9371005c1a8a93d8eb02d57c92c9235a0613d88d Mon Sep 17 00:00:00 2001 From: Renata Date: Mon, 4 May 2020 17:29:15 +0200 Subject: [PATCH 2/7] Standardize log and error messages Standardize log and error messages by defining log prefix. Log and error messages are built by log method and GitError class, which enforces standardization. Some fixes --- taf/auth_repo.py | 15 ++- taf/exceptions.py | 15 ++- taf/git.py | 269 ++++++++++++++++++++++------------------------ 3 files changed, 147 insertions(+), 152 deletions(-) diff --git a/taf/auth_repo.py b/taf/auth_repo.py index 353bb117..4a565c41 100644 --- a/taf/auth_repo.py +++ b/taf/auth_repo.py @@ -5,7 +5,6 @@ from contextlib import contextmanager from pathlib import Path from tuf.repository_tool import METADATA_DIRECTORY_NAME -from taf.log import taf_logger from taf.git import GitRepository, NamedGitRepository from taf.repository_tool import ( Repository as TAFRepository, @@ -49,6 +48,10 @@ def last_validated_commit(self): except FileNotFoundError: return None + @property + def log_prefix(self): + return f"Auth repo {self.name}: " + def get_target(self, target_name, commit=None, safely=True): if commit is None: commit = self.head_commit_sha() @@ -99,9 +102,7 @@ def set_last_validated_commit(self, commit): """ Set the last validated commit of the authentication repository """ - taf_logger.debug( - "Auth repo {}: setting last validated commit to: {}", self.name, commit - ) + self._log_debug(f"setting last validated commit to: {commit}") Path(self.conf_dir, self.LAST_VALIDATED_FILENAME).write_text(commit) def sorted_commits_and_branches_per_repositories(self, commits): @@ -133,10 +134,8 @@ def sorted_commits_and_branches_per_repositories(self, commits): target_branch, [] ).append(target_commit) previous_commits[target_path] = target_commit - taf_logger.debug( - "Auth repo {}: new commits per repositories according to targets.json: {}", - self.name, - repositories_commits, + self._log_debug( + f"new commits per repositories according to targets.json: {repositories_commits}" ) return repositories_commits diff --git a/taf/exceptions.py b/taf/exceptions.py index 20001524..d515f3a8 100644 --- a/taf/exceptions.py +++ b/taf/exceptions.py @@ -19,7 +19,20 @@ def __init__(self, path): class GitError(TAFError): - pass + def __init__(self, repo, command=None, error=None, message=None): + if message is None: + if command is not None: + message = f'error occurred while executing {command}' + if error is not None: + message = f'{message}:\n{error.output}' + elif error is not None: + message = error.output + else: + message = 'error occurred' + self.message = f'{repo.log_prefix}{message}' + self.repo = repo + self.command = command + self.error = error class InvalidBranchError(TAFError): diff --git a/taf/git.py b/taf/git.py index 5e5421fa..4a8e96a8 100644 --- a/taf/git.py +++ b/taf/git.py @@ -46,7 +46,7 @@ def __init__( if repo_urls is not None: if settings.update_from_filesystem is False: for url in repo_urls: - _validate_url(url) + self._validate_url(url) else: repo_urls = [ os.path.normpath(os.path.join(self.path, url)) @@ -86,16 +86,16 @@ def is_git_repository(self): @property def initial_commit(self): return ( - self._git("rev-list --max-parents=0 HEAD").strip() + self._git( + "rev-list --max-parents=0 HEAD", error_if_not_exists=False + ).strip() if self.is_git_repository else None ) - def is_remote_branch(self, branch_name): - for remote in self.remotes: - if branch_name.startswith(remote + "/"): - return True - return False + @property + def log_prefix(self): + return f"Repo {self.name}: " def _git(self, cmd, *args, **kwargs): """Call git commands in subprocess @@ -116,48 +116,57 @@ def _git(self, cmd, *args, **kwargs): try: result = run(command) if log_success_msg: - taf_logger.debug("Repo {}:" + log_success_msg, self.name) + self._log_debug(log_success_msg) except subprocess.CalledProcessError as e: - if ( - error_if_not_exists - and not self._path.is_dir() - or not self.is_git_repository + if error_if_not_exists and ( + not self._path.is_dir() or not self.is_git_repository ): - error_msg = f"{self.path} does not exist or is not a git repository" - reraise_error = True - else: - error_msg = ( - log_error_msg - if log_error_msg - else f"Repo {self.name}: error occurred while executing {command}:\n\n{e.output}", + log_error_msg = ( + f"{self.path} does not exist or is not a git repository" ) + reraise_error = True if log_error_msg: - taf_logger.error(error_msg) + error = GitError(self, message=log_error_msg, error=e) + self._log_error(error.message) else: + error = GitError(self, command=command, error=e) # not every git error indicates a problem # if it does, we expect that either custom error message will be provided # or that the error will be reraised - taf_logger.debug(error_msg) + self._log_debug(error.message) if reraise_error: - raise GitError(error_msg) + raise error else: try: result = run(command) except subprocess.CalledProcessError as e: - raise GitError( - f"Repo {self.name}: error occurred while executing {command}:\n\n{e.output}" - ) from e + raise GitError(self, command=command, error=e) if log_success_msg: - taf_logger.debug("Repo {}: " + log_success_msg, self.name) + self._log_debug(log_success_msg) return result + def _log(self, log_func, message): + log_func("{}{}", self.log_prefix, message) + + def _log_debug(self, message): + self._log(taf_logger.debug, message) + + def _log_info(self, message): + self._log(taf_logger.info, message) + + def _log_warning(self, message): + self._log(taf_logger.warning, message) + + def _log_error(self, message): + self._log(taf_logger.error, message) + def all_commits_on_branch(self, branch=None, reverse=True): """Returns a list of all commits on the specified branch. If branch is None, all commits on the currently checked out branch will be returned """ if branch is None: branch = "" - commits = self._git("log {} --format=format:%H", branch).strip() + commits = self._git("log {} --format=format:%H", branch, log_error=True).strip() if not commits: commits = [] else: @@ -165,9 +174,7 @@ def all_commits_on_branch(self, branch=None, reverse=True): if reverse: commits.reverse() - taf_logger.debug( - "Repo {}: found the following commits: {}", self.name, ", ".join(commits) - ) + self._log_debug(f"found the following commits: {', '.join(commits)}") return commits def all_commits_since_commit(self, since_commit, branch=None, reverse=True): @@ -178,7 +185,9 @@ def all_commits_since_commit(self, since_commit, branch=None, reverse=True): return self.all_commits_on_branch(branch=branch, reverse=reverse) if branch is None: branch = "HEAD" - commits = self._git("rev-list {}..{}", since_commit, branch).strip() + commits = self._git( + "rev-list {}..{}", since_commit, branch, log_error=True + ).strip() if not commits: commits = [] else: @@ -186,11 +195,8 @@ def all_commits_since_commit(self, since_commit, branch=None, reverse=True): if reverse: commits.reverse() - taf_logger.debug( - "Repo {}: found the following commits after commit {}: {}", - self.name, - since_commit, - ", ".join(commits), + self._log_debug( + f"found the following commits after commit {since_commit}: {', '.join(commits)}" ) return commits @@ -201,9 +207,7 @@ def all_fetched_commits(self, branch="master"): else: commits = commits.split("\n") commits.reverse() - taf_logger.debug( - "Repo {}: fetched the following commits {}", self.name, ", ".join(commits) - ) + self._log_debug(f"fetched the following commits {', '.join(commits)}") return commits def branches(self, remote=False, all=False, strip_remote=False): @@ -233,17 +237,15 @@ def branches(self, remote=False, all=False, strip_remote=False): def branches_containing_commit(self, commit, strip_remote=False, sort_key=None): """Finds all branches that contain the given commit""" - local_branches = self._git( - f"branch --contains {commit}", - log_error_msg=f"Repo {self.name}: could not list branches containing commit {commit}", - ).split("\n") + local_branches = self._git(f"branch --contains {commit}", log_error=True).split( + "\n" + ) if local_branches: local_branches = [ branch.replace("*", "").strip() for branch in local_branches ] remote_branches = self._git( - f"branch -r --contains {commit}", - log_error_msg=f"Repo {self.name}: could not list remote branches containing commit {commit}", + f"branch -r --contains {commit}", log_error=True ).split("\n") filtered_remote_branches = [] if remote_branches: @@ -269,9 +271,7 @@ def branch_exists(self, branch_name, include_remotes=True): a remote branch exists. """ branch = self._git( - f"branch --list {branch_name}", - log_error_msg=f"Repo {self.name}: could not list branches", - reraise_error=True, + f"branch --list {branch_name}", log_error=True, reraise_error=True ) # this git command should return the branch's name if it exists # empty string otherwise @@ -295,7 +295,7 @@ def branch_off_commit(self, branch_name, commit): """Create a new branch by branching off of the specified commit""" self._git( f"checkout -b {branch_name} {commit}", - log_error_msg=f"Repo {self.name}: could not create a new branch {branch_name} branching off of {commit}", + log_error=True, reraise_error=True, log_success_msg=f"Repo {self.name}: created a new branch {branch_name} from branching off of {commit}", ) @@ -315,8 +315,6 @@ def checkout_branch(self, branch_name, create=False, raise_anyway=False): self._git( "checkout {}", branch_name, - log_error=True, - reraise_error=True, log_success_msg=f"Repo {self.name}: checked out branch {branch_name}", ) except GitError as e: @@ -324,42 +322,37 @@ def checkout_branch(self, branch_name, create=False, raise_anyway=False): raise (e) # skip worktree errors - print(e) - if "is already checked out at" in str(e): + if "is already checked out at" in e.error.output: return if create: self.create_and_checkout_branch(branch_name) else: - taf_logger.error( - f"Repo {self.name}: could not check out branch {branch_name}" - ) + self._log_error(f'could not checkout branch {branch_name}') raise (e) def checkout_paths(self, commit, *args): for file_path in args: - self._git(f"checkout {commit} {file_path}") + self._git( + f"checkout {commit} {file_path}", log_error=True, reraise_error=True + ) def checkout_orphan_branch(self, branch_name): """Creates orphan branch""" self._git(f"checkout --orphan {branch_name}") - self._git( - "rm -rf .", - log_error_msg=f"Repo {self.name}: could not checkout orphan branch {branch_name}", - reraise_error=True, - ) + self._git("rm -rf .", log_error=True, reraise_error=True) def clean(self): self._git("clean -fd") def clone(self, no_checkout=False, bare=False, **kwargs): - taf_logger.info("Repo {}: cloning repository", self.name) + self._log_info("cloning repository") shutil.rmtree(self.path, True) self._path.mkdir(exist_ok=True, parents=True) if self.repo_urls is None: raise GitError( - f"Repo {self.name}: cannot clone repository. No urls were specified" + repo=self, message="cannot clone repository. No urls were specified" ) params = [] if bare: @@ -378,11 +371,10 @@ def clone(self, no_checkout=False, bare=False, **kwargs): for url in self.repo_urls: try: self._git( - "clone {} . {}", url, params, log_success_msg="successfully cloned" + "clone {} . {}", url, params, log_success_msg=f"successfully cloned from {url}", log_error_msg=f'cannot clone from url {url}', reraise_error=True ) except GitError: - taf_logger.error("Repo {}: cannot clone from url {}", self.name, url) - raise + pass else: break @@ -393,13 +385,11 @@ def clone_or_pull(self, branches=None, only_fetch=False, **kwargs): """ if branches is None: branches = ["master"] - taf_logger.debug( - "Repo {}: cloning or pulling branches {}", self.name, ", ".join(branches) - ) + self._log_debug(f"cloning or pulling branches {', '.join(branches)}") old_head = self.head_commit_sha() if old_head is None: - taf_logger.debug("Repo {}: old head sha is {}", self.name, old_head) + self._log_debug(f"old head sha is {old_head}") try: self.clone(**kwargs) except GitError: @@ -411,9 +401,7 @@ def clone_or_pull(self, branches=None, only_fetch=False, **kwargs): self._git("fetch", "origin", f"{branch}:{branch}") else: self._git("pull", "origin", branch) - taf_logger.info( - "Repo {}: successfully fetched branch {}", self.name, branch - ) + self._log_info(f"successfully fetched branch {branch}") except GitError as e: if "fatal" in e.stdout: raise FetchException(f"{self.path}: {str(e)}") @@ -473,7 +461,9 @@ def commit(self, message): try: run("git", "-C", self.path, "commit", "--quiet", "-m", message) except subprocess.CalledProcessError as e: - raise GitError(str(e)) + raise GitError( + repo=self, message=f"could not commit changes due to:\n{e.output}" + ) return self._git("rev-parse HEAD") def commit_empty(self, message): @@ -489,11 +479,8 @@ def commits_on_branch_and_not_other( on a speculative branch and not on the master branch. """ - taf_logger.debug( - "Repo {}: finding commits which are on branch {}, but not on branch {}", - self.name, - branch1, - branch2, + self._log_debug( + f"finding commits which are on branch {branch1}, but not on branch {branch2}" ) commits = self._git( "log {} --not {} --no-merges --format=format:%H", branch1, branch2 @@ -502,18 +489,18 @@ def commits_on_branch_and_not_other( if include_branching_commit: branching_commit = self._git("rev-list -n 1 {}~1", commits[-1]) commits.append(branching_commit) - taf_logger.debug("Repo {}: found the following commits: {}", self.name, commits) + self._log_debug(f"found the following commits: {commits}") return commits def delete_local_branch(self, branch_name, force=False): """Deletes local branch.""" flag = "-D" if force else "-d" - self._git(f"branch {flag} {branch_name}") + self._git(f"branch {flag} {branch_name}", log_error=True) def delete_remote_branch(self, branch_name, remote=None): if remote is None: remote = self.remotes[0] - self._git(f"push {remote} --delete {branch_name}") + self._git(f"push {remote} --delete {branch_name}", log_error=True) def get_commit_message(self, commit): """Returns commit message of the given commit""" @@ -546,7 +533,7 @@ def get_remote_url(self): return None def delete_branch(self, branch_name): - self._git("branch -D {}", branch_name) + self._git("branch -D {}", branch_name, log_error=True) def diff_between_revisions(self, revision1=EMPTY_TREE, revision2="HEAD"): return self._git("diff --name-status {} {}", revision1, revision2) @@ -557,17 +544,17 @@ def has_remote(self): def head_commit_sha(self): """Finds sha of the commit to which the current HEAD points""" try: - return self._git("rev-parse HEAD") + return self._git("rev-parse HEAD", error_if_not_exists=False) except GitError: return None def fetch(self, fetch_all=False, branch=None, remote="origin"): if fetch_all: - self._git("fetch --all") + self._git("fetch --all", log_error=True) else: if branch is None: branch = "" - self._git("fetch {} {}", remote, branch) + self._git("fetch {} {}", remote, branch, log_error=True) def get_current_branch(self): """Return current branch.""" @@ -578,14 +565,16 @@ def get_last_remote_commit(self, url, branch="master"): Fet the last remote commit of the specified branch """ if url is not None: - last_commit = self._git(f"--no-pager ls-remote {url} {branch}") + last_commit = self._git( + f"--no-pager ls-remote {url} {branch}", log_error=True + ) if last_commit: return last_commit.split("\t", 1)[0] return None def get_merge_base(self, branch1, branch2): """Finds the best common ancestor between two branches""" - return self._git(f"merge-base {branch1} {branch2}") + return self._git(f"merge-base {branch1} {branch2}", log_error=True) def get_tracking_branch(self, branch="", strip_remote=False): """Returns tracking branch name in format origin/branch-name or None if branch does not @@ -603,10 +592,16 @@ def init_repo(self, bare=False): if self._path.is_dir(): self._path.mkdir(exist_ok=True, parents=True) flag = "--bare" if bare else "" - self._git(f"init {flag}") + self._git(f"init {flag}", error_if_not_exists=False) if self.repo_urls is not None and len(self.repo_urls): self._git("remote add origin {}", self.repo_urls[0]) + def is_remote_branch(self, branch_name): + for remote in self.remotes: + if branch_name.startswith(remote + "/"): + return True + return False + def list_files_at_revision(self, commit, path=""): if path is None: path = "" @@ -673,11 +668,11 @@ def list_untracked_files(self, path=None): ] def merge_commit(self, commit): - self._git("merge {}", commit) + self._git("merge {}", commit, log_error=True) def pull(self): """Pull current branch""" - self._git("pull") + self._git("pull", log_error=True) def push(self, branch=None, set_upstream=False, force=False): """Push all changes""" @@ -685,7 +680,9 @@ def push(self, branch=None, set_upstream=False, force=False): branch = self.get_current_branch() upstream_flag = "-u" if set_upstream else "" force_flag = "-f" if force else "" - self._git("push {} {} origin {}", upstream_flag, force_flag, branch) + self._git( + "push {} {} origin {}", upstream_flag, force_flag, branch, log_error=True + ) def rename_branch(self, old_name, new_name): self._git("branch -m {} {}", old_name, new_name) @@ -705,19 +702,9 @@ def safely_get_json(self, commit, path): try: return self.get_json(commit, path) except GitError: - taf_logger.debug( - "Auth repo {}: {} not available at revision {}", - self.name, - os.path.basename(path), - commit, - ) + self._log_debug(f"{path} not available at revision {commit}") except json.decoder.JSONDecodeError: - taf_logger.debug( - "Auth repo {}: {} not a valid json at revision {}", - self.name, - os.path.basename(path), - commit, - ) + self._log_debug(f"{path} not a valid json at revision {commit}") return None def set_remote_url(self, new_url, remote="origin"): @@ -759,6 +746,17 @@ def synced_with_remote(self, branch="master", url=None): def top_commit_of_branch(self, branch): return self._git(f"rev-parse {branch}") + def _validate_url(self, url): + """ ensure valid URL """ + for _url_re in [_http_fttp_url, _ssh_url]: + match = _url_re.match(url) + if match: + return + self._log_error(f"URL ({url}) is not valid") + raise InvalidRepositoryError( + f'Repository URL must be a valid URL, but got "{url}".' + ) + class NamedGitRepository(GitRepository): def __init__( @@ -782,7 +780,7 @@ def __init__( root_dir and repo_name. """ self.root_dir = root_dir - path = _get_repo_path(root_dir, repo_name) + path = self._get_repo_path(root_dir, repo_name) super().__init__( path, repo_name=repo_name, @@ -791,35 +789,32 @@ def __init__( default_branch=default_branch, ) - -def _get_repo_path(root_dir, repo_name): - """ - get the path to a repo and ensure it is valid. - (since this is coming from potentially untrusted data) - """ - _validate_repo_name(repo_name) - repo_dir = str((Path(root_dir) / (repo_name or ""))) - if not repo_dir.startswith(repo_dir): - taf_logger.error("Repo {}: repository name is not valid", repo_name) - raise InvalidRepositoryError(f"Invalid repository name: {repo_name}") - return repo_dir + def _get_repo_path(self, root_dir, repo_name): + """ + get the path to a repo and ensure it is valid. + (since this is coming from potentially untrusted data) + """ + self._validate_repo_name(repo_name) + repo_dir = str((Path(root_dir) / (repo_name or ""))) + if not repo_dir.startswith(repo_dir): + self._log_error("repository name is not valid") + raise InvalidRepositoryError(f"Invalid repository name: {repo_name}") + return repo_dir + + def _validate_repo_name(self, repo_name): + """ Ensure the repo name is not malicious """ + match = _repo_name_re.match(repo_name) + if not match: + self._log_error("repository name is not valid") + raise InvalidRepositoryError( + "Repository name must be in format namespace/repository " + "and can only contain letters, numbers, underscores and " + f'dashes, but got "{repo_name}"' + ) _repo_name_re = re.compile(r"^\w[\w_-]*/\w[\w_-]*$") - -def _validate_repo_name(repo_name): - """ Ensure the repo name is not malicious """ - match = _repo_name_re.match(repo_name) - if not match: - taf_logger.error("Repo {}: repository name is not valid", repo_name) - raise InvalidRepositoryError( - "Repository name must be in format namespace/repository " - "and can only contain letters, numbers, underscores and " - f'dashes, but got "{repo_name}"' - ) - - _http_fttp_url = re.compile( r"^(?:http|ftp)s?://" # http:// or https:// # domain... @@ -834,15 +829,3 @@ def _validate_repo_name(repo_name): _ssh_url = re.compile( r"((git|ssh|http(s)?)|(git@[\w\.]+))(:(//)?)([\w\.@\:/\-~]+)(\.git)?(/)?" ) - - -def _validate_url(url): - """ ensure valid URL """ - for _url_re in [_http_fttp_url, _ssh_url]: - match = _url_re.match(url) - if match: - return - taf_logger.error("Repository URL ({}) is not valid", url) - raise InvalidRepositoryError( - f'Repository URL must be a valid URL, but got "{url}".' - ) From d3af596f610f59980cc83ee8fd17fabbf932d60a Mon Sep 17 00:00:00 2001 From: Renata Date: Mon, 4 May 2020 18:45:38 +0200 Subject: [PATCH 3/7] Small fixes and formatting --- taf/exceptions.py | 8 ++++---- taf/git.py | 11 ++++++++--- taf/updater/handlers.py | 6 ++++-- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/taf/exceptions.py b/taf/exceptions.py index d515f3a8..6e181681 100644 --- a/taf/exceptions.py +++ b/taf/exceptions.py @@ -22,14 +22,14 @@ class GitError(TAFError): def __init__(self, repo, command=None, error=None, message=None): if message is None: if command is not None: - message = f'error occurred while executing {command}' + message = f"error occurred while executing {command}" if error is not None: - message = f'{message}:\n{error.output}' + message = f"{message}:\n{error.output}" elif error is not None: message = error.output else: - message = 'error occurred' - self.message = f'{repo.log_prefix}{message}' + message = "error occurred" + self.message = f"{repo.log_prefix}{message}" self.repo = repo self.command = command self.error = error diff --git a/taf/git.py b/taf/git.py index 4a8e96a8..db679e4c 100644 --- a/taf/git.py +++ b/taf/git.py @@ -328,7 +328,7 @@ def checkout_branch(self, branch_name, create=False, raise_anyway=False): if create: self.create_and_checkout_branch(branch_name) else: - self._log_error(f'could not checkout branch {branch_name}') + self._log_error(f"could not checkout branch {branch_name}") raise (e) def checkout_paths(self, commit, *args): @@ -371,7 +371,12 @@ def clone(self, no_checkout=False, bare=False, **kwargs): for url in self.repo_urls: try: self._git( - "clone {} . {}", url, params, log_success_msg=f"successfully cloned from {url}", log_error_msg=f'cannot clone from url {url}', reraise_error=True + "clone {} . {}", + url, + params, + log_success_msg=f"successfully cloned from {url}", + log_error_msg=f"cannot clone from url {url}", + reraise_error=True, ) except GitError: pass @@ -544,7 +549,7 @@ def has_remote(self): def head_commit_sha(self): """Finds sha of the commit to which the current HEAD points""" try: - return self._git("rev-parse HEAD", error_if_not_exists=False) + return self._git("rev-parse HEAD") except GitError: return None diff --git a/taf/updater/handlers.py b/taf/updater/handlers.py index 5ffd2056..c5a54bab 100644 --- a/taf/updater/handlers.py +++ b/taf/updater/handlers.py @@ -252,8 +252,10 @@ def cleanup(self): directories. This should be called after the update is finished, either successfully or unsuccessfully. """ - shutil.rmtree(self.current_path) - shutil.rmtree(self.previous_path) + if self.current_path.is_dir(): + shutil.rmtree(self.current_path) + if self.previous_path.is_dir(): + shutil.rmtree(self.previous_path) temp_dir = Path(self.validation_auth_repo.path, os.pardir).parent shutil.rmtree(str(temp_dir), onerror=on_rm_error) From efcaad2eb815121950cc328feca8a91f6403acef Mon Sep 17 00:00:00 2001 From: Renata Date: Tue, 5 May 2020 00:10:57 +0200 Subject: [PATCH 4/7] Updated changelog --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d41584c2..17bef046 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,12 @@ and this project adheres to [Semantic Versioning][semver]. ### Changed +- Error handling and logging improvements ([131]) + +### Fixed + +[131]: https://github.com/openlawlibrary/taf/pull/131 + ### Fixed From 1a74f26a416e112bc514aeda1ce1053c1dcad4e0 Mon Sep 17 00:00:00 2001 From: Renata Date: Tue, 5 May 2020 13:16:27 +0200 Subject: [PATCH 5/7] Fix checkout orphan branch --- taf/git.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/taf/git.py b/taf/git.py index db679e4c..351db439 100644 --- a/taf/git.py +++ b/taf/git.py @@ -339,8 +339,13 @@ def checkout_paths(self, commit, *args): def checkout_orphan_branch(self, branch_name): """Creates orphan branch""" - self._git(f"checkout --orphan {branch_name}") - self._git("rm -rf .", log_error=True, reraise_error=True) + self._git( + f"checkout --orphan {branch_name}", log_error=True, reraise_error=True + ) + try: + self._git("rm -rf .") + except GitError: # If repository is empty + pass def clean(self): self._git("clean -fd") From cb8179ce5262ddcd6c6e0b5a548fddc1a9bd2320 Mon Sep 17 00:00:00 2001 From: Renata Date: Mon, 11 May 2020 20:26:02 +0200 Subject: [PATCH 6/7] Make logging a bit more flexible Make it easier to use a different logger and define logging template --- taf/git.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/taf/git.py b/taf/git.py index 351db439..d8bdbb1f 100644 --- a/taf/git.py +++ b/taf/git.py @@ -3,6 +3,7 @@ import re import shutil import subprocess +import logging from collections import OrderedDict from functools import reduce from pathlib import Path @@ -57,12 +58,22 @@ def __init__( self.repo_urls = repo_urls self.additional_info = additional_info - _remotes = None + logging_functions = { + logging.DEBUG: taf_logger.debug, + logging.INFO: taf_logger.info, + logging.WARNING: taf_logger.warning, + logging.ERROR: taf_logger.error, + logging.CRITICAL: taf_logger.critical + } + + log_template = "{}{}" @property def path(self): return str(self._path) + _remotes = None + @property def remotes(self): if self._remotes is None: @@ -146,19 +157,22 @@ def _git(self, cmd, *args, **kwargs): return result def _log(self, log_func, message): - log_func("{}{}", self.log_prefix, message) + log_func(self.log_template, self.log_prefix, message) def _log_debug(self, message): - self._log(taf_logger.debug, message) + self._log(self.logging_functions[logging.DEBUG], message) def _log_info(self, message): - self._log(taf_logger.info, message) + self._log(self.logging_functions[logging.INFO], message) def _log_warning(self, message): - self._log(taf_logger.warning, message) + self._log(self.logging_functions[logging.WARNING], message) def _log_error(self, message): - self._log(taf_logger.error, message) + self._log(self.logging_functions[logging.ERROR], message) + + def _log_critical(self, message): + self._log(self.logging_functions[logging.CRITICAL], message) def all_commits_on_branch(self, branch=None, reverse=True): """Returns a list of all commits on the specified branch. If branch is None, From 22469ca8f7b0830eec72ee2b1f29a4f88d964753 Mon Sep 17 00:00:00 2001 From: Renata Date: Mon, 11 May 2020 21:10:12 +0200 Subject: [PATCH 7/7] Addressing CR comment --- taf/git.py | 2 +- taf/repositoriesdb.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/taf/git.py b/taf/git.py index d8bdbb1f..f0379f21 100644 --- a/taf/git.py +++ b/taf/git.py @@ -63,7 +63,7 @@ def __init__( logging.INFO: taf_logger.info, logging.WARNING: taf_logger.warning, logging.ERROR: taf_logger.error, - logging.CRITICAL: taf_logger.critical + logging.CRITICAL: taf_logger.critical, } log_template = "{}{}" diff --git a/taf/repositoriesdb.py b/taf/repositoriesdb.py index 379bdba8..192ae4d0 100644 --- a/taf/repositoriesdb.py +++ b/taf/repositoriesdb.py @@ -98,11 +98,11 @@ def load_repositories( repositories = _get_json_file(auth_repo, repositories_path, commit) targets = _get_json_file(auth_repo, targets_path, commit) except InvalidOrMissingMetadataError as e: - log_func = taf_logger.warning if "targets/repositories.json not available at revision" in str(e): - log_func = taf_logger.debug - log_func("Skipping commit {} due to error: {}", commit, str(e)) - continue + taf_logger.debug("Skipping commit {} due to: {}", commit, str(e)) + continue + else: + raise # target repositories are defined in both mirrors.json and targets.json repositories = repositories["repositories"]