From 024f176db0647d5f5697e5b32e0a93c1950d3541 Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Fri, 13 Oct 2023 01:55:41 +0200
Subject: [PATCH 01/28] feat chore: Added alias attribute to repositories,
 logging improvements

- At the start of the update process, an error message used to be shown
saying that the default branch could not be determined. There is not need
to attempt to determine the default branch if the repository is not yet cloned.
- During the update process, the updater clones the remote reposiory to
a temp folder and this temp folder's name used to be used when logging.
E.g. 327fshkewrew/law would get set as the repository's name. In order
to be able to reference a repository by a different name when logging,
added a new attribute called alias. So now we can reference this temp
repository by a more descriptive name.
---
 taf/auth_repo.py           |  5 +++++
 taf/git.py                 | 20 +++++++++++++++++---
 taf/tools/repo/__init__.py |  4 ++++
 taf/updater/updater.py     | 24 +++++++++++++++++-------
 4 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/taf/auth_repo.py b/taf/auth_repo.py
index fdcb0554..bb7ebbcd 100644
--- a/taf/auth_repo.py
+++ b/taf/auth_repo.py
@@ -36,6 +36,7 @@ def __init__(
         conf_directory_root: Optional[str] = None,
         out_of_band_authentication: Optional[str] = None,
         path: Optional[Union[Path, str]] = None,
+        alias: Optional[str] = None,
         *args,
         **kwargs,
     ):
@@ -51,6 +52,7 @@ def __init__(
           custom (dict): a dictionary containing other data
           default_branch (str): repository's default branch, automatically determined if not specified
           out_of_band_authentication (str): manually specified initial commit
+          alias: Repository's alias, which will be used in logging statements to reference it
         """
         super().__init__(
             library_dir,
@@ -60,6 +62,7 @@ def __init__(
             default_branch,
             allow_unsafe,
             path,
+            alias,
             *args,
             **kwargs,
         )
@@ -142,6 +145,8 @@ def last_validated_commit(self) -> Optional[str]:
 
     @property
     def log_prefix(self) -> str:
+        if self.alias:
+            return f"{self.alias}: "
         return f"Auth repo {self.name}: "
 
     def get_target(self, target_name, commit=None, safely=True) -> Optional[Dict]:
diff --git a/taf/git.py b/taf/git.py
index ca554628..8b96a26d 100644
--- a/taf/git.py
+++ b/taf/git.py
@@ -36,6 +36,7 @@ def __init__(
         default_branch: Optional[str] = None,
         allow_unsafe: Optional[bool] = False,
         path: Optional[Union[Path, str]] = None,
+        alias: Optional[str] = None,
         *args,
         **kwargs,
     ):
@@ -52,6 +53,7 @@ def __init__(
           default_branch (str): repository's default branch, automatically determined if not specified
           allow_unsafe: allow a git's security mechanism which prevents execution of git commands if
           the containing directory is owned by a different user to be ignored
+          alias: Repository's alias, which will be used in logging statements to reference it
         """
         if isinstance(library_dir, str):
             library_dir = Path(library_dir)
@@ -88,6 +90,7 @@ def __init__(
             self.library_dir = self.library_dir.resolve()
             self.path = self._validate_repo_path(path)
 
+        self.alias = alias
         self.urls = self._validate_urls(urls)
         self.allow_unsafe = allow_unsafe
         self.custom = custom or {}
@@ -167,6 +170,8 @@ def initial_commit(self) -> str:
 
     @property
     def log_prefix(self) -> str:
+        if self.alias:
+            return f"{self.alias}: "
         return f"Repo {self.name}: "
 
     @property
@@ -242,6 +247,9 @@ def _get_default_branch_from_local(self) -> str:
         return branch
 
     def _get_default_branch_from_remote(self, url: str) -> str:
+        if not self.is_git_repository:
+            self._log_debug("Repository does not exist. Could not determined default branch from remote")
+            return None
         branch = self._git(
             f"ls-remote --symref {url} HEAD",
             log_error=True,
@@ -819,7 +827,11 @@ def get_default_branch(self, url: Optional[str] = None) -> str:
         if url is not None:
             url = url.strip()
             return self._get_default_branch_from_remote(url)
-        return self._get_default_branch_from_local()
+        # there is not point in trying to find the default branch from a local repository
+        # if the directory is not a git repository (e.g. not yet cloned)
+        if self.is_git_repository_root:
+            return self._get_default_branch_from_local()
+        return None
 
     def get_json(
         self, commit: str, path: str, raw: Optional[bool] = False
@@ -994,7 +1006,7 @@ def list_files_at_revision(self, commit: str, path: str = "") -> List[str]:
             return self.pygit.list_files_at_revision(commit, posix_path)
         except TAFError as e:
             raise e
-        except Exception:
+        except Exception as e:
             self._log_warning(
                 "Perfomance regression: Could not list files with pygit2. Reverting to git subprocess"
             )
@@ -1290,7 +1302,9 @@ def _determine_default_branch(self) -> Optional[str]:
         # try to get the default branch from the local repository
         errors = []
         try:
-            return self.get_default_branch()
+            branch = self.get_default_branch()
+            if branch is not None:
+                return branch
         except GitError as e:
             errors.append(e)
             pass
diff --git a/taf/tools/repo/__init__.py b/taf/tools/repo/__init__.py
index 933e07fc..4be515b0 100644
--- a/taf/tools/repo/__init__.py
+++ b/taf/tools/repo/__init__.py
@@ -1,6 +1,8 @@
 import click
 import json
 from taf.api.repository import create_repository
+from taf.exceptions import TAFError, UpdateFailedError
+from taf.tools.cli import catch_cli_exception
 from taf.updater.updater import update_repository, validate_repository, UpdateType
 
 
@@ -11,6 +13,7 @@ def repo():
         pass
 
     @repo.command()
+    @catch_cli_exception(handle=TAFError)
     @click.option("--path", default=".", help="Authentication repository's location. If not specified, set to the current directory")
     @click.option("--keys-description", help="A dictionary containing information about the "
                   "keys or a path to a json file which stores the needed information")
@@ -71,6 +74,7 @@ def create(path, keys_description, keystore, no_commit, test):
         )
 
     @repo.command()
+    @catch_cli_exception(handle=UpdateFailedError)
     @click.option("--path", default=".", help="Authentication repository's location. If not specified, set to the current directory")
     @click.option("--url", default=None, help="Authentication repository's url")
     @click.option("--clients-library-dir", default=None, help="Directory where target repositories and, "
diff --git a/taf/updater/updater.py b/taf/updater/updater.py
index ec7e7864..187793ac 100644
--- a/taf/updater/updater.py
+++ b/taf/updater/updater.py
@@ -1,9 +1,11 @@
 import json
+from logging import ERROR
 import shutil
 import enum
 import tempfile
 
 from typing import Dict, Tuple, Any
+from logdecorator import log_on_error
 from taf.git import GitRepository
 
 from tuf.ngclient.updater import Updater
@@ -77,7 +79,7 @@ def _clone_validation_repo(url, repository_name):
     """
     temp_dir = tempfile.mkdtemp()
     path = Path(temp_dir, "auth_repo").absolute()
-    validation_auth_repo = AuthenticationRepository(path=path, urls=[url])
+    validation_auth_repo = AuthenticationRepository(path=path, urls=[url], alias="Validation repository")
     validation_auth_repo.clone(bare=True)
     validation_auth_repo.fetch(fetch_all=True)
 
@@ -170,6 +172,13 @@ def _reset_repository(repo, commits_data):
             _reset_repository(repo, branch_data)
 
 
+@log_on_error(
+    ERROR,
+    "{e}",
+    logger=taf_logger,
+    on_exceptions=UpdateFailedError,
+    reraise=True,
+)
 @timed_run("Updating repository")
 def update_repository(
     url,
@@ -241,6 +250,7 @@ def update_repository(
         else None
     )
 
+    taf_logger.info(f"Updating repository {auth_repo_name}")
     clients_auth_library_dir = clients_library_dir
     repos_update_data = {}
     transient_data = {}
@@ -280,7 +290,7 @@ def update_repository(
         )
     except Exception as e:
         root_error = UpdateFailedError(
-            f"Update of {auth_repo_name} failed due to error {e}"
+            f"Update of {auth_repo_name} failed due to error: {e}"
         )
 
     update_data = {}
@@ -785,7 +795,7 @@ def _update_target_repositories(
                     ] = additional_commits_on_branch
 
             except UpdateFailedError as e:
-                taf_logger.error("Updated failed due to error {}", str(e))
+                taf_logger.error("Updated failed due to error: {}", str(e))
                 # delete all repositories that were cloned
                 for repo in cloned_repositories:
                     taf_logger.debug("Removing cloned repository {}", repo.path)
@@ -870,7 +880,7 @@ def _init_updater():
                 fetcher=git_updater,
             )
         except Exception as e:
-            taf_logger.error(f"Failed to instantiate TUF Updater due to error {e}")
+            taf_logger.error(f"Failed to instantiate TUF Updater due to error: {e}")
             raise e
 
     def _update_tuf_current_revision():
@@ -902,7 +912,7 @@ def _update_tuf_current_revision():
             ).__name__ or EXPIRED_METADATA_ERROR in str(e)
             if not metadata_expired or settings.strict:
                 taf_logger.error(
-                    "Validation of authentication repository {} failed at revision {} due to error {}",
+                    "Validation of authentication repository {} failed at revision {} due to error: {}",
                     git_updater.users_auth_repo.name,
                     current_commit,
                     e,
@@ -912,7 +922,7 @@ def _update_tuf_current_revision():
                     f" failed at revision {current_commit} due to error: {e}"
                 )
             taf_logger.warning(
-                f"WARNING: Could not validate authentication repository {git_updater.users_auth_repo.name} at revision {current_commit} due to error {e}"
+                f"WARNING: Could not validate authentication repository {git_updater.users_auth_repo.name} at revision {current_commit} due to error: {e}"
             )
 
     while not git_updater.update_done():
@@ -1226,7 +1236,7 @@ def validate_repository(
         )
     except Exception as e:
         raise ValidationFailedError(
-            f"Validation or repository {auth_repo_name} failed due to error {e}"
+            f"Validation or repository {auth_repo_name} failed due to error: {e}"
         )
     settings.overwrite_last_validated_commit = False
     settings.last_validated_commit = None

From 5db90c16b1d17bf2b5b20aae42ca93c28f43cc5f Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Mon, 16 Oct 2023 21:10:50 +0200
Subject: [PATCH 02/28] fix: if last successful commit is invalid, report the
 issue and raise appropriate error

---
 README.md                         |  2 +-
 taf/updater/handlers.py           | 14 ++++++++++++++
 taf/updater/lifecycle_handlers.py | 10 ++++++----
 taf/updater/updater.py            |  7 ++++---
 4 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 1505b090..faba59ee 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ Such an attacker could then:
 TAF's primary objective is not to prevent the attacks listed above but rather to detect when
 an attack has occurred and halt an update if necessary. Thus, TAF should be used instead of
 directly calling `git pull` and `git clone`.
-```
+
 
 ## Further reading
 
diff --git a/taf/updater/handlers.py b/taf/updater/handlers.py
index 1c9bb631..b092132d 100644
--- a/taf/updater/handlers.py
+++ b/taf/updater/handlers.py
@@ -182,6 +182,20 @@ def _init_commits(self):
             else:
                 raise e
 
+        # check if the last validated commit exists in the remote repository
+        # last_successful_commit could've been manually update to an invalid value
+        # or set to a commit that exists in the local authentication repository
+        # that was not pushed
+        branches_containing_last_validated_commit = (
+            self.validation_auth_repo.branches_containing_commit(last_validated_commit)
+        )
+        default_branch = self.validation_auth_repo.default_branch
+        if default_branch not in branches_containing_last_validated_commit:
+            msg = f"""Last successful commit not on the {default_branch} of the authentication repository.
+This could mean that the a commit was removed from the remote repository or that the last_successful_commit file was manually updated."""
+            taf_logger.error(msg)
+            raise UpdateFailedError(msg)
+
         # Check if the user's head commit matches the saved one
         # That should always be the case
         # If it is not, it means that someone, accidentally or maliciously made manual changes
diff --git a/taf/updater/lifecycle_handlers.py b/taf/updater/lifecycle_handlers.py
index f1a5f625..86eea967 100644
--- a/taf/updater/lifecycle_handlers.py
+++ b/taf/updater/lifecycle_handlers.py
@@ -103,9 +103,12 @@ def _execute_scripts(repos_and_data, lifecycle_stage, event):
         for script_repo, script_data in repos_and_data.items():
             data = script_data["data"]
             last_commit = script_data["commit"]
-            repos_and_data[script_repo]["data"] = execute_scripts(
-                script_repo, last_commit, scripts_rel_path, data, scripts_root_dir
-            )
+            # there is no reason to try executing the scripts if last_commit is None
+            # that means that update was not even starterd
+            if last_commit is not None:
+                repos_and_data[script_repo]["data"] = execute_scripts(
+                    script_repo, last_commit, scripts_rel_path, data, scripts_root_dir
+                )
         return repos_and_data
 
     if event in (Event.CHANGED, Event.UNCHANGED, Event.SUCCEEDED):
@@ -154,7 +157,6 @@ def execute_scripts(auth_repo, last_commit, scripts_rel_path, data, scripts_root
     # load from filesystem in development mode so that the scripts can be updated without
     # having to commit and push
     development_mode = settings.development_mode
-
     if development_mode:
         if scripts_root_dir is not None:
             path = Path(scripts_root_dir) / auth_repo.name / scripts_rel_path
diff --git a/taf/updater/updater.py b/taf/updater/updater.py
index 187793ac..68b49b10 100644
--- a/taf/updater/updater.py
+++ b/taf/updater/updater.py
@@ -79,7 +79,9 @@ def _clone_validation_repo(url, repository_name):
     """
     temp_dir = tempfile.mkdtemp()
     path = Path(temp_dir, "auth_repo").absolute()
-    validation_auth_repo = AuthenticationRepository(path=path, urls=[url], alias="Validation repository")
+    validation_auth_repo = AuthenticationRepository(
+        path=path, urls=[url], alias="Validation repository"
+    )
     validation_auth_repo.clone(bare=True)
     validation_auth_repo.fetch(fetch_all=True)
 
@@ -795,7 +797,7 @@ def _update_target_repositories(
                     ] = additional_commits_on_branch
 
             except UpdateFailedError as e:
-                taf_logger.error("Updated failed due to error: {}", str(e))
+                taf_logger.error("Update failed due to error: {}", str(e))
                 # delete all repositories that were cloned
                 for repo in cloned_repositories:
                     taf_logger.debug("Removing cloned repository {}", repo.path)
@@ -1254,7 +1256,6 @@ def _validate_authentication_repository(
     # we validate it before updating the actual authentication repository
     validation_auth_repo = repository_updater.validation_auth_repo
     commits = repository_updater.commits
-
     if (
         out_of_band_authentication is not None
         and users_auth_repo.last_validated_commit is None

From 6b6894f3d4dbbd2ea86b3431452cba077133c302 Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Wed, 18 Oct 2023 01:52:43 +0200
Subject: [PATCH 03/28] feat: initial work on bf update. Listing targets data
 per auth commits

---
 taf/auth_repo.py        | 40 +++++++++++++++++++++++++++--
 taf/models/types.py     | 57 ++++++++++++++++++++++++++++++++++++++++-
 taf/updater/handlers.py |  4 +--
 3 files changed, 96 insertions(+), 5 deletions(-)

diff --git a/taf/auth_repo.py b/taf/auth_repo.py
index bb7ebbcd..8b491d42 100644
--- a/taf/auth_repo.py
+++ b/taf/auth_repo.py
@@ -3,10 +3,11 @@
 import tempfile
 import fnmatch
 
-from typing import Callable, Dict, List, Optional, Union
+from typing import Any, Callable, Dict, List, Optional, Union
 from collections import defaultdict
 from contextlib import contextmanager
 from pathlib import Path
+from taf.models.types import AuthCommitGroup, TargetAtCommitInfo
 from tuf.repository_tool import METADATA_DIRECTORY_NAME
 from taf.git import GitRepository
 from taf.repository_tool import (
@@ -218,6 +219,41 @@ def set_last_validated_commit(self, commit: str):
         self._log_debug(f"setting last validated commit to: {commit}")
         Path(self.conf_dir, self.LAST_VALIDATED_FILENAME).write_text(commit)
 
+
+    def targets_data_per_auth_commits(
+        self,
+        commits: List[str],
+        target_repos: Optional[List[str]] = None,
+        default_branch: Optional[str] = None,
+        excluded_target_globs: Optional[List[str]] = None,
+    ):
+        auth_commit_targets_data: List[AuthCommitGroup] = []
+        targets = self.targets_at_revisions(
+            *commits, target_repos=target_repos, default_branch=default_branch
+        )
+        skipped_targets = []
+        excluded_target_globs = excluded_target_globs or []
+        for auth_commit in commits:
+            current_auth_commit_info = AuthCommitGroup(auth_commit, [])
+            auth_commit_targets_data.append(current_auth_commit_info)
+            for target_path, target_data in targets[auth_commit].items():
+                if target_path in skipped_targets:
+                    continue
+                if any(
+                    fnmatch.fnmatch(target_path, excluded_target_glob)
+                    for excluded_target_glob in excluded_target_globs
+                ):
+                    skipped_targets.append(target_path)
+                    continue
+                target_branch = target_data.get("branch")
+                target_commit = target_data.get("commit")
+                target_custom = target_data.get("custom")
+
+                current_auth_commit_info.target_infos.append(
+                    TargetAtCommitInfo(target_path, target_branch, target_commit, target_custom)
+                )
+        return auth_commit_targets_data
+
     def sorted_commits_and_branches_per_repositories(
         self,
         commits: List[str],
@@ -225,7 +261,7 @@ def sorted_commits_and_branches_per_repositories(
         custom_fns: Optional[Dict[str, Callable]] = None,
         default_branch: Optional[str] = None,
         excluded_target_globs: Optional[List[str]] = None,
-    ) -> Dict[str, Dict[str, List[Dict]]]:
+    ) -> Dict[str, Dict[str, List[Dict[str, Any]]]]:
         """Return a dictionary consisting of branches and commits belonging
         to it for every target repository:
         {
diff --git a/taf/models/types.py b/taf/models/types.py
index 9ace735d..051fb8c4 100644
--- a/taf/models/types.py
+++ b/taf/models/types.py
@@ -1,5 +1,5 @@
 from __future__ import annotations
-from typing import Iterator, List, Optional, Dict
+from typing import Any, Iterator, List, Optional, Dict
 import attrs
 
 from taf.constants import DEFAULT_ROLE_SETUP_PARAMS
@@ -171,3 +171,58 @@ def _dfs_delegations(role, skip_top_role=False):
 
         for role in roles:
             yield from _dfs_delegations(role, self.skip_top_role)
+
+
+@attrs.define
+class CommitInfo:
+    commit: str = attrs.field()
+    custom: Dict[str, Any] = attrs.field()
+    auth_commit: str = attrs.field()
+
+@attrs.define
+class TargetInfo:
+    branches: Dict[str, List[CommitInfo]] = attrs.field()
+
+    def get_branch(self, branch_name: str) -> List[CommitInfo]:
+        return self.branches.get(branch_name, [])
+
+@attrs.define
+class TargetsSortedCommitsData:
+    targets: Dict[str, TargetInfo] = attrs.field()
+
+    def __getitem__(self, repo_name: str) -> TargetInfo:
+        return self.targets.get(repo_name)
+
+    @classmethod
+    def from_dict(cls, data_dict: Dict[str, Dict[str, List[Dict[str, Any]]]]) -> 'TargetsSortedCommitsData':
+        targets = {}
+
+        for repo_name, branches_dict in data_dict.items():
+            commits_by_branch = {}
+            for branch_name, commits_list in branches_dict.items():
+                commits = [
+                    CommitInfo(
+                        commit=commit_data["commit"],
+                        custom=commit_data["custom"],
+                        auth_commit=commit_data["auth_commit"],
+                    )
+                    for commit_data in commits_list
+                ]
+                commits_by_branch[branch_name] = commits
+            targets[repo_name] = TargetInfo(branches=commits_by_branch)
+
+        return cls(targets=targets)
+
+
+@attrs.define
+class TargetAtCommitInfo:
+    repo_name: str = attrs.field()
+    branch: str = attrs.field()
+    commit: str = attrs.field()
+    custom: Dict[str, Any] = attrs.field()
+
+
+@attrs.define
+class AuthCommitGroup:
+    auth_commit: str = attrs.field()
+    target_infos: List[TargetAtCommitInfo] = attrs.field(factory=list)
diff --git a/taf/updater/handlers.py b/taf/updater/handlers.py
index b092132d..fae5812c 100644
--- a/taf/updater/handlers.py
+++ b/taf/updater/handlers.py
@@ -191,8 +191,8 @@ def _init_commits(self):
         )
         default_branch = self.validation_auth_repo.default_branch
         if default_branch not in branches_containing_last_validated_commit:
-            msg = f"""Last successful commit not on the {default_branch} of the authentication repository.
-This could mean that the a commit was removed from the remote repository or that the last_successful_commit file was manually updated."""
+            msg = f"""Last validated commit not on the {default_branch} of the authentication repository.
+This could mean that the a commit was removed from the remote repository or that the last_validated_commit file was manually updated."""
             taf_logger.error(msg)
             raise UpdateFailedError(msg)
 

From fb653e7ba2c9961fb814830d2a5513095b126d7d Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Tue, 24 Oct 2023 21:12:37 +0200
Subject: [PATCH 04/28] feat: work on refactoring the update process with a
 pipeline architecture

1. Introduced a `Pipeline` framework to structure the update process as a series of distinct stages.
2. Implemented the following pipeline stages:
   - `clone_remote_and_run_tuf_updater`
   - `validate_out_of_band_and_update_type`
   - `clone_or_fetch_users_auth_repo`
   - `load_target_repositories`
   - `get_targets_data_from_auth_repo`
---
 taf/auth_repo.py                |   6 +-
 taf/models/types.py             |   7 +-
 taf/updater/updater_pipeline.py | 452 ++++++++++++++++++++++++++++++++
 3 files changed, 461 insertions(+), 4 deletions(-)
 create mode 100644 taf/updater/updater_pipeline.py

diff --git a/taf/auth_repo.py b/taf/auth_repo.py
index 8b491d42..62e29082 100644
--- a/taf/auth_repo.py
+++ b/taf/auth_repo.py
@@ -7,7 +7,7 @@
 from collections import defaultdict
 from contextlib import contextmanager
 from pathlib import Path
-from taf.models.types import AuthCommitGroup, TargetAtCommitInfo
+from taf.models.types import AuthCommitAndTargets, TargetAtCommitInfo
 from tuf.repository_tool import METADATA_DIRECTORY_NAME
 from taf.git import GitRepository
 from taf.repository_tool import (
@@ -227,14 +227,14 @@ def targets_data_per_auth_commits(
         default_branch: Optional[str] = None,
         excluded_target_globs: Optional[List[str]] = None,
     ):
-        auth_commit_targets_data: List[AuthCommitGroup] = []
+        auth_commit_targets_data: List[AuthCommitAndTargets] = []
         targets = self.targets_at_revisions(
             *commits, target_repos=target_repos, default_branch=default_branch
         )
         skipped_targets = []
         excluded_target_globs = excluded_target_globs or []
         for auth_commit in commits:
-            current_auth_commit_info = AuthCommitGroup(auth_commit, [])
+            current_auth_commit_info = AuthCommitAndTargets(auth_commit, [])
             auth_commit_targets_data.append(current_auth_commit_info)
             for target_path, target_data in targets[auth_commit].items():
                 if target_path in skipped_targets:
diff --git a/taf/models/types.py b/taf/models/types.py
index 051fb8c4..7c07dfe0 100644
--- a/taf/models/types.py
+++ b/taf/models/types.py
@@ -222,7 +222,12 @@ class TargetAtCommitInfo:
     custom: Dict[str, Any] = attrs.field()
 
 
+# TODO this needs to be sorted by branches
+# if that is not the case, then that will have to be done
+# elsewhere in the code
+# better in the auth repo function
+
 @attrs.define
-class AuthCommitGroup:
+class AuthCommitAndTargets:
     auth_commit: str = attrs.field()
     target_infos: List[TargetAtCommitInfo] = attrs.field(factory=list)
diff --git a/taf/updater/updater_pipeline.py b/taf/updater/updater_pipeline.py
new file mode 100644
index 00000000..aa2975f6
--- /dev/null
+++ b/taf/updater/updater_pipeline.py
@@ -0,0 +1,452 @@
+import functools
+from logging import ERROR, INFO
+from pathlib import Path
+import shutil
+import tempfile
+from typing import Any, Dict, List, Optional
+from attr import attrs, field
+from logdecorator import log_on_end, log_on_error, log_on_start
+from taf.git import GitRepository
+
+import taf.settings as settings
+import taf.repositoriesdb as repositoriesdb
+from taf.auth_repo import AuthenticationRepository
+from taf.exceptions import UpdateFailedError
+from taf.updater.handlers import GitUpdater
+from taf.updater.lifecycle_handlers import Event
+from taf.updater.updater import EXPIRED_METADATA_ERROR, INFO_JSON_PATH, UpdateType
+from taf.utils import on_rm_error
+from taf.log import taf_logger
+from tuf.ngclient.updater import Updater
+
+@attrs
+class UpdateState:
+    auth_commits_since_last_validated: List[Any] = field(factory=list)
+    existing_repo: bool = field(default=False)
+    update_successful: bool = field(default=False)
+    event: Optional[str] = field(default=None)
+    users_auth_repo: Optional[AuthenticationRepository] = field(default=None)
+    validation_auth_repo: Optional[AuthenticationRepository] = field(default=None)
+    auth_repo_name: Optional[str] = field(default=None)
+    error: Optional[Exception] = field(default=None)
+    targets_data: Dict[str, Any] = field(factory=dict)
+    last_validated_commit: str = field(type=str)
+    target_repositories: Dict[GitRepository] = field(factory=dict)
+    cloned_target_repositories: List[GitRepository] = field(factory=list)
+    target_branches_data_from_auth_repo: Dict = field(factory=dict)
+
+
+@attrs
+class UpdateOutput:
+    event: str = field()
+    users_auth_repo: Any = field()
+    auth_repo_name: str = field()
+    commits_data: Dict[str, Any] = field()
+    error: Optional[Exception] = field(default=None)
+    targets_data: Dict[str, Any] = field(factory=dict)
+
+
+def cleanup_decorator(pipeline_function):
+    @functools.wraps(pipeline_function)
+    def wrapper(self, *args, **kwargs):
+        try:
+            result = pipeline_function(self, *args, **kwargs)
+            return result
+        finally:
+            if self.state.event == Event.FAILED and not self.state.existing_repo:
+                shutil.rmtree(self.state.users_auth_repo.path, onerror=on_rm_error)
+                shutil.rmtree(self.state.users_auth_repo.conf_dir)
+    return wrapper
+
+class UpdaterPipeline:
+
+    def __init__(
+        self, url, clients_auth_library_dir, targets_library_dir, auth_repo_name,
+        update_from_filesystem, expected_repo_type, target_repo_classes, target_factory,
+        only_validate, validate_from_commit, conf_directory_root, out_of_band_authentication,
+        checkout, excluded_target_globs
+    ):
+        self.url = url
+        self.clients_auth_library_dir = clients_auth_library_dir
+        self.targets_library_dir = targets_library_dir
+        self.update_from_filesystem = update_from_filesystem
+        self.expected_repo_type = expected_repo_type
+        self.target_repo_classes = target_repo_classes
+        self.target_factory = target_factory
+        self.only_validate = only_validate
+        self.validate_from_commit = validate_from_commit
+        self.conf_directory_root = conf_directory_root
+        self.out_of_band_authentication = out_of_band_authentication
+        self.checkout = checkout
+        self.excluded_target_globs = excluded_target_globs
+
+        self.state = UpdateState()
+        self.state.auth_repo_name = auth_repo_name
+        self.state.targets_data = {}
+        self._output = None
+
+    @property
+    def output(self):
+        if not self._output:
+            raise ValueError("Pipeline has not been run yet. Please run the pipeline first.")
+        return self._output
+
+    def run(self):
+        steps = [
+            self.clone_remote_and_run_tuf_updater,
+            self.validate_out_of_band_and_update_type,
+            self.clone_or_fetch_users_auth_repo,
+            self.load_target_repositories,
+            self.clone_target_repositories_if_not_on_disk,
+            self.get_targets_data_from_auth_repo,
+            self.validate_target_repositories,
+            self.merge_branch_commits
+        ]
+
+        for step in steps:
+            try:
+                should_continue = step()
+                if not should_continue:
+                    break
+
+            except Exception as e:
+                self.handle_error(e)
+                break  # If you want to stop further execution upon any error.
+
+            self._set_output()
+
+
+    @log_on_start(INFO, "Cloning repository and running TUF updater", logger=taf_logger)
+    @log_on_end(INFO, "TUF validation finished", logger=taf_logger)
+    @cleanup_decorator
+    def clone_remote_and_run_tuf_updater(self):
+        settings.update_from_filesystem = self.update_from_filesystem
+        settings.conf_directory_root = self.conf_directory_root
+        try:
+            self.state.auth_commits_since_last_validated = None
+            self.state.existing_repo = (
+                Path(self.clients_auth_library_dir, self.auth_repo_name).exists()
+                if self.state.auth_repo_name is not None
+                else True
+            )
+
+            # Clone the validation repository in temp.
+            self.state.auth_repo_name = _clone_validation_repo(self.url, self.state.auth_repo_name)
+            git_updater = GitUpdater(self.url, self.clients_auth_library_dir, self.state.auth_repo_name)
+            self.state.users_auth_repo = git_updater.users_auth_repo
+            _run_tuf_updater(git_updater)
+            self.state.existing_repo = self.state.users_auth_repo.is_git_repository_root
+            self.state.validation_auth_repo = git_updater.validation_auth_repo
+            self.auth_commits_since_last_validated = list(git_updater.commits)
+            self.state.event = Event.CHANGED if len( self.auth_commits_since_last_validated) > 1 else Event.UNCHANGED
+
+            # used for testing purposes
+            if settings.overwrite_last_validated_commit:
+                self.state.last_validated_commit = settings.last_validated_commit
+            else:
+                self.state.last_validated_commit = self.state.users_auth_repo.last_validated_commit
+            # always clean up repository updater
+            git_updater.cleanup()
+            return True
+
+        except Exception as e:
+            self.state.error = e
+            self.state.users_auth_repo = None
+
+            if self.state.auth_repo_name is not None:
+                self.state.users_auth_repo = AuthenticationRepository(
+                    self.clients_auth_library_dir,
+                    self.state.auth_repo_name,
+                    urls=[self.url],
+                    conf_directory_root=self.conf_directory_root,
+                )
+
+            self.state.event = Event.FAILED
+            return False
+
+    @cleanup_decorator
+    @log_on_start(INFO, "Validating out of band commit and update type", logger=taf_logger)
+    @log_on_end(INFO, "Validation finished", logger=taf_logger)
+    def validate_out_of_band_and_update_type(self):
+        try:
+            # this is the repository cloned inside the temp directory
+            # we validate it before updating the actual authentication repository
+            if (
+                self.out_of_band_authentication is not None
+                and self.state.users_auth_repo.last_validated_commit is None
+                and self.auth_commits_since_last_validated[0] != self.out_of_band_authentication
+            ):
+                raise UpdateFailedError(
+                    f"First commit of repository {self.state.auth_repo_name} does not match "
+                    "out of band authentication commit"
+                )
+
+            if self.expected_repo_type != UpdateType.EITHER:
+                # check if the repository being updated is a test repository
+                if self.state.validation_auth_repo.is_test_repo and self.expected_repo_type != UpdateType.TEST:
+                    raise UpdateFailedError(
+                        f"Repository {self.state.users_auth_repo.name} is a test repository. "
+                        'Call update with "--expected-repo-type" test to update a test '
+                        "repository"
+                    )
+                elif (
+                    not self.state.validation_auth_repo.is_test_repo
+                    and self.state.expected_repo_type == UpdateType.TEST
+                ):
+                    raise UpdateFailedError(
+                        f"Repository {self.state.users_auth_repo.name} is not a test repository,"
+                        ' but update was called with the "--expected-repo-type" test'
+                    )
+
+        except Exception as e:
+            self.state.error = e
+            self.state.event = Event.FAILED
+            return False
+
+    @log_on_start(INFO, "Cloning or updating user's authentication repository", logger=taf_logger)
+    def clone_or_fetch_users_auth_repo(self):
+        if not self.only_validate:
+            # fetch the latest commit or clone the repository without checkout
+            # do not merge before targets are validated as well
+            try:
+                if self.state.existing_repo:
+                    self.state.users_auth_repo.fetch(fetch_all=True)
+                else:
+                    self.state.users_auth_repo.clone()
+            except Exception as e:
+                self.state.error = e
+                self.state.event = Event.FAILED
+                return False
+        return True
+
+    def load_target_repositories(self):
+        try:
+            repositoriesdb.load_repositories(
+                self.state.users_auth_repo,
+                repo_classes=self.target_repo_classes,
+                factory=self.target_factory,
+                library_dir=self.targets_library_dir,
+                commits=self.state.auth_commits_since_last_validated,
+                only_load_targets=False,
+                excluded_target_globs=self.excluded_target_globs,
+            )
+            self.state.target_repositories = repositoriesdb.get_deduplicated_repositories(
+                self.state.users_auth_repo, self.state.auth_commits_since_last_validated[-1::]
+            )
+        except Exception as e:
+            self.state.error = e
+            self.state.event = Event.FAILED
+            return False
+
+    def clone_target_repositories_if_not_on_disk(self):
+        self.state.cloned_target_repositories = []
+        for repository in self.state.target_repositories.values():
+            is_git_repository = repository.is_git_repository_root
+            if not is_git_repository:
+                if self.only_validate:
+                    taf_logger.warning(
+                        "Target repositories must already exist when only validating repositories"
+                    )
+                    continue
+                repository.clone(no_checkout=True)
+                self.state.cloned_target_repositories.append(repository)
+
+
+    def get_targets_data_from_auth_repo(self):
+        targets_by_auth_commits = self.state.users_auth_repo.targets_data_per_auth_commits()
+
+        # for each repository and each branch that needs to be validated
+        # get actual target commits
+        # extract branch information
+        # TODO this is where the data from the authentication repository needed for validating
+        # targets should be extracted
+        # and saved to state
+        self.state.target_branches_data_from_auth_repo = {}
+        for auth_commit_and_target in targets_by_auth_commits:
+            for target_info in auth_commit_and_target.target_infos:
+                 self.state.target_branches_data_from_auth_repo.setdefault(target_info.name, set()).add(target_info.branch)
+
+
+    def validate_target_repositories(self):
+        # Implement logic
+        # Update self.state as necessary
+        pass
+
+    def merge_branch_commits(self):
+        # Implement logic
+        # Update self.state as necessary
+        pass
+
+    def handle_error(self, error):
+        # Centralized error handling
+        print(f"Error encountered: {error}")
+
+
+    def _set_output(self):
+        if self.state.commits is None:
+            commit_before_pull = None
+            new_commits = []
+            commit_after_pull = None
+        else:
+            commit_before_pull = self.state.commits[0] if self.state.existing_repo and len(self.state.commits) else None
+            commit_after_pull = self.state.commits[-1] if self.state.update_successful else self.state.commits[0]
+
+            if not self.state.existing_repo:
+                new_commits = self.state.commits
+            else:
+                new_commits = self.state.commits[1:] if len(self.state.commits) else []
+        commits_data = {
+            "before_pull": commit_before_pull,
+            "new": new_commits,
+            "after_pull": commit_after_pull,
+        }
+        self._output = UpdateOutput(
+            event=self.state.event,
+            users_auth_repo=self.state.users_auth_repo,
+            auth_repo_name=self.state.auth_repo_name,
+            commits_data=commits_data,
+            error=self.state.error,
+            targets_data=self.state.targets_data
+        )
+
+
+
+class UpdateTargetRepositoriesPipeline():
+    # sub-pipeline
+    # called for each auth repo commit
+    pass
+
+def _clone_validation_repo(url, repository_name):
+    """
+    Clones the authentication repository based on the url specified using the
+    mirrors parameter. The repository is cloned as a bare repository
+    to a the temp directory and will be deleted one the update is done.
+
+    If repository_name isn't provided (default value), extract it from info.json.
+    """
+    temp_dir = tempfile.mkdtemp()
+    path = Path(temp_dir, "auth_repo").absolute()
+    validation_auth_repo = AuthenticationRepository(
+        path=path, urls=[url], alias="Validation repository"
+    )
+    validation_auth_repo.clone(bare=True)
+    validation_auth_repo.fetch(fetch_all=True)
+
+    settings.validation_repo_path = validation_auth_repo.path
+
+    validation_head_sha = validation_auth_repo.top_commit_of_branch(
+        validation_auth_repo.default_branch
+    )
+
+    if repository_name is None:
+        try:
+            info = validation_auth_repo.get_json(validation_head_sha, INFO_JSON_PATH)
+            repository_name = f'{info["namespace"]}/{info["name"]}'
+        except Exception:
+            raise UpdateFailedError(
+                "Error during info.json parse. When specifying --clients-library-dir check if info.json metadata exists in targets/protected or provide full path to auth repo"
+            )
+
+    validation_auth_repo.cleanup()
+    return repository_name
+
+
+def _is_unauthenticated_allowed(repository):
+    return repository.custom.get(
+        "allow-unauthenticated-commits", False
+    )
+
+
+def _run_tuf_updater(git_updater):
+    def _init_updater():
+        try:
+            return Updater(
+                git_updater.metadata_dir,
+                "metadata/",
+                git_updater.targets_dir,
+                "targets/",
+                fetcher=git_updater,
+            )
+        except Exception as e:
+            taf_logger.error(f"Failed to instantiate TUF Updater due to error: {e}")
+            raise e
+
+    def _update_tuf_current_revision():
+        current_commit = git_updater.current_commit
+        try:
+            updater.refresh()
+            taf_logger.debug("Validated metadata files at revision {}", current_commit)
+            # using refresh, we have updated all main roles
+            # we still need to update the delegated roles (if there are any)
+            # and validate any target files
+            current_targets = git_updater.get_current_targets()
+            for target_path in current_targets:
+                target_filepath = target_path.replace("\\", "/")
+
+                targetinfo = updater.get_targetinfo(target_filepath)
+                target_data = git_updater.get_current_target_data(
+                    target_filepath, raw=True
+                )
+                targetinfo.verify_length_and_hashes(target_data)
+
+                taf_logger.debug(
+                    "Successfully validated target file {} at {}",
+                    target_filepath,
+                    current_commit,
+                )
+        except Exception as e:
+            metadata_expired = EXPIRED_METADATA_ERROR in type(
+                e
+            ).__name__ or EXPIRED_METADATA_ERROR in str(e)
+            if not metadata_expired or settings.strict:
+                taf_logger.error(
+                    "Validation of authentication repository {} failed at revision {} due to error: {}",
+                    git_updater.users_auth_repo.name,
+                    current_commit,
+                    e,
+                )
+                raise UpdateFailedError(
+                    f"Validation of authentication repository {git_updater.users_auth_repo.name}"
+                    f" failed at revision {current_commit} due to error: {e}"
+                )
+            taf_logger.warning(
+                f"WARNING: Could not validate authentication repository {git_updater.users_auth_repo.name} at revision {current_commit} due to error: {e}"
+            )
+
+    while not git_updater.update_done():
+        updater = _init_updater()
+        _update_tuf_current_revision()
+
+    taf_logger.info(
+        "Successfully validated authentication repository {}",
+        git_updater.users_auth_repo.name,
+    )
+
+def _set_target_old_head_and_validate(
+    repository,
+    branch,
+    branch_exists,
+    last_validated_commit,
+    is_git_repository,
+    repo_branch_commits,
+    allow_unauthenticated_for_repo,
+):
+    if (
+        last_validated_commit is None
+        or not is_git_repository
+        or not branch_exists
+        or not len(repo_branch_commits)
+    ):
+        old_head = None
+    else:
+        old_head = repo_branch_commits[0]
+        if not allow_unauthenticated_for_repo:
+            repo_old_head = repository.top_commit_of_branch(branch)
+            # do the same as when checking the top and last_validated_commit of the authentication repository
+            if repo_old_head != old_head:
+                commits_since = repository.all_commits_since_commit(old_head)
+                if repo_old_head not in commits_since:
+                    msg = f"Top commit of repository {repository.name} {repo_old_head} and is not equal to or newer than commit defined in auth repo {old_head}"
+                    taf_logger.error(msg)
+                    raise UpdateFailedError(msg)
+    return old_head

From 24924e153398ee5445e082a5cdc1d8bd005d2ef7 Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Wed, 25 Oct 2023 07:36:55 +0200
Subject: [PATCH 05/28] refact: move target repos initial state validation and
 commits fetching to pipeline

---
 taf/updater/updater_pipeline.py | 224 ++++++++++++++++++++++----------
 1 file changed, 157 insertions(+), 67 deletions(-)

diff --git a/taf/updater/updater_pipeline.py b/taf/updater/updater_pipeline.py
index aa2975f6..c36ec904 100644
--- a/taf/updater/updater_pipeline.py
+++ b/taf/updater/updater_pipeline.py
@@ -1,3 +1,4 @@
+from collections import defaultdict
 import functools
 from logging import ERROR, INFO
 from pathlib import Path
@@ -5,6 +6,7 @@
 import tempfile
 from typing import Any, Dict, List, Optional
 from attr import attrs, field
+from git import GitError
 from logdecorator import log_on_end, log_on_error, log_on_start
 from taf.git import GitRepository
 
@@ -34,6 +36,8 @@ class UpdateState:
     target_repositories: Dict[GitRepository] = field(factory=dict)
     cloned_target_repositories: List[GitRepository] = field(factory=list)
     target_branches_data_from_auth_repo: Dict = field(factory=dict)
+    old_heads_per_target_repos_branches: Dict[str, Dict[str, str]] = field(factory=dict)
+    fetched_commits_per_target_repos_branches: Dict[str, Dict[str, List[str]]] = field(factory=dict)
 
 
 @attrs
@@ -58,7 +62,33 @@ def wrapper(self, *args, **kwargs):
                 shutil.rmtree(self.state.users_auth_repo.conf_dir)
     return wrapper
 
-class UpdaterPipeline:
+
+class Pipeline:
+
+    def __init__(self, steps):
+        self.steps = steps
+
+    def run(self):
+        for step in self.steps:
+            try:
+                should_continue = step()
+                if not should_continue:
+                    break
+
+            except Exception as e:
+                self._handle_error(e)
+                break
+
+            self._set_output()
+
+    def _handle_error(self, e):
+        pass
+
+    def _set_output(self):
+        pass
+
+class AuthenticationRepositoryUpdatePipeline(Pipeline):
+
 
     def __init__(
         self, url, clients_auth_library_dir, targets_library_dir, auth_repo_name,
@@ -66,6 +96,21 @@ def __init__(
         only_validate, validate_from_commit, conf_directory_root, out_of_band_authentication,
         checkout, excluded_target_globs
     ):
+
+        super().__init__(steps=[
+            self.clone_remote_and_run_tuf_updater,
+            self.validate_out_of_band_and_update_type,
+            self.clone_or_fetch_users_auth_repo,
+            self.load_target_repositories,
+            self.clone_target_repositories_if_not_on_disk,
+            self.get_targets_data_from_auth_repo,
+            self.validate_target_repositories_initial_state,
+            self.get_target_repositories_commits,
+            self.update_target_repositories,
+            self.merge_branch_commits
+        ])
+
+
         self.url = url
         self.clients_auth_library_dir = clients_auth_library_dir
         self.targets_library_dir = targets_library_dir
@@ -91,31 +136,6 @@ def output(self):
             raise ValueError("Pipeline has not been run yet. Please run the pipeline first.")
         return self._output
 
-    def run(self):
-        steps = [
-            self.clone_remote_and_run_tuf_updater,
-            self.validate_out_of_band_and_update_type,
-            self.clone_or_fetch_users_auth_repo,
-            self.load_target_repositories,
-            self.clone_target_repositories_if_not_on_disk,
-            self.get_targets_data_from_auth_repo,
-            self.validate_target_repositories,
-            self.merge_branch_commits
-        ]
-
-        for step in steps:
-            try:
-                should_continue = step()
-                if not should_continue:
-                    break
-
-            except Exception as e:
-                self.handle_error(e)
-                break  # If you want to stop further execution upon any error.
-
-            self._set_output()
-
-
     @log_on_start(INFO, "Cloning repository and running TUF updater", logger=taf_logger)
     @log_on_end(INFO, "TUF validation finished", logger=taf_logger)
     @cleanup_decorator
@@ -267,10 +287,117 @@ def get_targets_data_from_auth_repo(self):
                  self.state.target_branches_data_from_auth_repo.setdefault(target_info.name, set()).add(target_info.branch)
 
 
-    def validate_target_repositories(self):
-        # Implement logic
-        # Update self.state as necessary
-        pass
+    def validate_target_repositories_initial_state(self):
+        try:
+            self.state.old_heads_per_target_repos_branches = defaultdict(dict)
+            for repo_name, repository in self.target_repositories:
+                for branch in self.target_branches_data_from_auth_repo[repo_name]:
+                    # if last_validated_commit is None or if the target repository didn't exist prior
+                    # to calling update, start the update from the beginning
+                    # otherwise, for each branch, start with the last validated commit of the local branch
+                    branch_exists = repository.branch_exists(branch, include_remotes=False)
+                    if not branch_exists and self.only_validate:
+                        self.state.targets_data = {}
+                        msg = f"{repo_name} does not contain a local branch named {branch} and cannot be validated. Please update the repositories"
+                        taf_logger.error(msg)
+                        raise UpdateFailedError(msg)
+
+                    # TODO
+                    repo_branch_commits = []
+                    if (
+                        self.last_validated_commit is None
+                        or not repository.is_git_repository_root
+                        or not branch_exists
+                        or not len(repo_branch_commits)
+                    ):
+                        old_head = None
+                    else:
+                        old_head = repo_branch_commits[0]
+                        if not _is_unauthenticated_allowed(repository):
+                            repo_old_head = repository.top_commit_of_branch(branch)
+                            # do the same as when checking the top and last_validated_commit of the authentication repository
+                            if repo_old_head != old_head:
+                                commits_since = repository.all_commits_since_commit(old_head)
+                                if repo_old_head not in commits_since:
+                                    msg = f"Top commit of repository {repository.name} {repo_old_head} and is not equal to or newer than commit defined in auth repo {old_head}"
+                                    taf_logger.error(msg)
+                                    raise UpdateFailedError(msg)
+                    self.state.old_heads_per_target_repos_branches[repo_name][branch] = old_head
+        except Exception as e:
+            self.state.error = e
+            self.state.event = Event.FAILED
+            # TODO remove cloned?
+            return False
+
+
+    def get_target_repositories_commits(self):
+        """Returns a list of newly fetched commits belonging to the specified branch."""
+        self.state.fetched_commits_per_target_repos_branches = defaultdict(dict)
+        for repo_name, repository in self.target_repositories:
+            for branch in self.target_branches_data_from_auth_repo[repo_name]:
+                if repository.is_git_repository_root:
+                    repository.fetch(branch=branch)
+
+                old_head = self.state.old_heads_per_target_repos_branches[repo_name][branch]
+                if old_head is not None:
+                    if not self.only_validate:
+                        fetched_commits = repository.all_commits_on_branch(
+                            branch=f"origin/{branch}"
+                        )
+
+                        # if the local branch does not exist (the branch was not checked out locally)
+                        # fetched commits will include already validated commits
+                        # check which commits are newer that the previous head commit
+                        if old_head in fetched_commits:
+                            fetched_commits_on_target_repo_branch = fetched_commits[
+                                fetched_commits.index(old_head) + 1 : :
+                            ]
+                        else:
+                            fetched_commits_on_target_repo_branch = repository.all_commits_since_commit(
+                                old_head, branch
+                            )
+                            for commit in fetched_commits:
+                                if commit not in fetched_commits_on_target_repo_branch:
+                                    fetched_commits_on_target_repo_branch.append(commit)
+                    else:
+                        fetched_commits_on_target_repo_branch = repository.all_commits_since_commit(
+                            old_head, branch
+                        )
+                    fetched_commits_on_target_repo_branch.insert(0, old_head)
+                else:
+                    branch_exists = repository.branch_exists(branch, include_remotes=False)
+                    if branch_exists:
+                        # this happens in the case when last_validated_commit does not exist
+                        # we want to validate all commits, so combine existing commits and
+                        # fetched commits
+                        fetched_commits_on_target_repo_branch = repository.all_commits_on_branch(
+                            branch=branch, reverse=True
+                        )
+                    else:
+                        fetched_commits_on_target_repo_branch = []
+                    if not self.only_validate:
+                        try:
+                            fetched_commits = repository.all_commits_on_branch(
+                                branch=f"origin/{branch}"
+                            )
+                            # if the local branch does not exist (the branch was not checked out locally)
+                            # fetched commits will include already validated commits
+                            # check which commits are newer that the previous head commit
+                            for commit in fetched_commits:
+                                if commit not in fetched_commits_on_target_repo_branch:
+                                    fetched_commits_on_target_repo_branch.append(commit)
+                        except GitError:
+                            pass
+                self.state.fetched_commits_per_target_repos_branches[repo_name][branch] = fetched_commits_on_target_repo_branch
+
+
+    def update_target_repositories(self):
+        for auth_commit in self.state.auth_commits_since_last_validated:
+            # update target repositories given that auth_commit
+            # what if unauthenticated are allowed?
+            # rework update, transition to bfs
+            pass
+
 
     def merge_branch_commits(self):
         # Implement logic
@@ -310,12 +437,6 @@ def _set_output(self):
         )
 
 
-
-class UpdateTargetRepositoriesPipeline():
-    # sub-pipeline
-    # called for each auth repo commit
-    pass
-
 def _clone_validation_repo(url, repository_name):
     """
     Clones the authentication repository based on the url specified using the
@@ -350,13 +471,11 @@ def _clone_validation_repo(url, repository_name):
     validation_auth_repo.cleanup()
     return repository_name
 
-
 def _is_unauthenticated_allowed(repository):
     return repository.custom.get(
         "allow-unauthenticated-commits", False
     )
 
-
 def _run_tuf_updater(git_updater):
     def _init_updater():
         try:
@@ -421,32 +540,3 @@ def _update_tuf_current_revision():
         "Successfully validated authentication repository {}",
         git_updater.users_auth_repo.name,
     )
-
-def _set_target_old_head_and_validate(
-    repository,
-    branch,
-    branch_exists,
-    last_validated_commit,
-    is_git_repository,
-    repo_branch_commits,
-    allow_unauthenticated_for_repo,
-):
-    if (
-        last_validated_commit is None
-        or not is_git_repository
-        or not branch_exists
-        or not len(repo_branch_commits)
-    ):
-        old_head = None
-    else:
-        old_head = repo_branch_commits[0]
-        if not allow_unauthenticated_for_repo:
-            repo_old_head = repository.top_commit_of_branch(branch)
-            # do the same as when checking the top and last_validated_commit of the authentication repository
-            if repo_old_head != old_head:
-                commits_since = repository.all_commits_since_commit(old_head)
-                if repo_old_head not in commits_since:
-                    msg = f"Top commit of repository {repository.name} {repo_old_head} and is not equal to or newer than commit defined in auth repo {old_head}"
-                    taf_logger.error(msg)
-                    raise UpdateFailedError(msg)
-    return old_head

From c08ea4882ecdbf291b816e088fea94adf35b954b Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Wed, 25 Oct 2023 21:53:29 +0200
Subject: [PATCH 06/28] feat: initial breadth-first targets validation

---
 taf/auth_repo.py                |  66 ++++++++++-----
 taf/git.py                      |  14 ++++
 taf/updater/updater_pipeline.py | 142 +++++++++++++++++++++++++++-----
 3 files changed, 182 insertions(+), 40 deletions(-)

diff --git a/taf/auth_repo.py b/taf/auth_repo.py
index 62e29082..3ac9becb 100644
--- a/taf/auth_repo.py
+++ b/taf/auth_repo.py
@@ -220,39 +220,63 @@ def set_last_validated_commit(self, commit: str):
         Path(self.conf_dir, self.LAST_VALIDATED_FILENAME).write_text(commit)
 
 
-    def targets_data_per_auth_commits(
-        self,
-        commits: List[str],
-        target_repos: Optional[List[str]] = None,
-        default_branch: Optional[str] = None,
-        excluded_target_globs: Optional[List[str]] = None,
-    ):
-        auth_commit_targets_data: List[AuthCommitAndTargets] = []
+    def targets_data_by_auth_commits(
+            self,
+            commits: List[str],
+            target_repos: Optional[List[str]] = None,
+            custom_fns: Optional[Dict[str, Callable]] = None,
+            default_branch: Optional[str] = None,
+            excluded_target_globs: Optional[List[str]] = None,
+    ) -> Dict[str, Dict[str, Dict[str, Any]]]:
+        """
+        Return a dictionary where each target repository has associated authentication commits,
+        and for each authentication commit, there's a dictionary of the branch, commit and custom data.
+
+        {
+            'target_repo1': {
+                'auth_commit1': {'branch': 'branch1', 'commit': 'commit1', 'custom': {}},
+                'auth_commit2': {'branch': 'branch1', 'commit': 'commit2', 'custom': {}},
+                ...
+            },
+            'target_repo2': {
+                ...
+            },
+            ...
+        }
+
+        """
+        repositories_commits: Dict[str, Dict[str, Dict[str, Any]]] = {}
         targets = self.targets_at_revisions(
             *commits, target_repos=target_repos, default_branch=default_branch
         )
-        skipped_targets = []
         excluded_target_globs = excluded_target_globs or []
-        for auth_commit in commits:
-            current_auth_commit_info = AuthCommitAndTargets(auth_commit, [])
-            auth_commit_targets_data.append(current_auth_commit_info)
-            for target_path, target_data in targets[auth_commit].items():
-                if target_path in skipped_targets:
-                    continue
+        for commit in commits:
+            for target_path, target_data in targets[commit].items():
                 if any(
                     fnmatch.fnmatch(target_path, excluded_target_glob)
                     for excluded_target_glob in excluded_target_globs
                 ):
-                    skipped_targets.append(target_path)
                     continue
+
                 target_branch = target_data.get("branch")
                 target_commit = target_data.get("commit")
-                target_custom = target_data.get("custom")
+                target_data.setdefault("custom", {})
+                if custom_fns is not None and target_path in custom_fns:
+                    target_data["custom"].update(
+                        custom_fns[target_path](target_commit)
+                    )
+
+                repositories_commits.setdefault(target_path, {})[commit] = {
+                    "branch": target_branch,
+                    "commit": target_commit,
+                    "custom": target_data.get("custom"),
+                }
+
+        self._log_debug(
+            f"new commits per repositories according to target files: {repositories_commits}"
+        )
+        return repositories_commits
 
-                current_auth_commit_info.target_infos.append(
-                    TargetAtCommitInfo(target_path, target_branch, target_commit, target_custom)
-                )
-        return auth_commit_targets_data
 
     def sorted_commits_and_branches_per_repositories(
         self,
diff --git a/taf/git.py b/taf/git.py
index 8b96a26d..145638e1 100644
--- a/taf/git.py
+++ b/taf/git.py
@@ -1,4 +1,5 @@
 from __future__ import annotations
+import datetime
 import json
 import itertools
 import os
@@ -806,6 +807,19 @@ def delete_remote_branch(
             remote = self.remotes[0]
         self._git(f"push {remote} --delete {branch_name}", log_error=True)
 
+    def get_commit_date(self, commit_sha: str) -> str:
+        """Returns commit date of the given commit"""
+        repo = self.pygit_repo
+        if repo is None:
+            raise GitError(
+                "Could not get commit message. pygit repository could not be instantiated."
+            )
+        commit = repo.get(commit_sha)
+        date = datetime.utcfromtimestamp(commit.commit_time + commit.commit_time_offset)
+        formatted_date = date.strftime('%Y-%m-%d')
+        return formatted_date
+
+
     def get_commit_message(self, commit_sha: str) -> str:
         """Returns commit message of the given commit"""
         repo = self.pygit_repo
diff --git a/taf/updater/updater_pipeline.py b/taf/updater/updater_pipeline.py
index c36ec904..80ac3ea5 100644
--- a/taf/updater/updater_pipeline.py
+++ b/taf/updater/updater_pipeline.py
@@ -36,6 +36,7 @@ class UpdateState:
     target_repositories: Dict[GitRepository] = field(factory=dict)
     cloned_target_repositories: List[GitRepository] = field(factory=list)
     target_branches_data_from_auth_repo: Dict = field(factory=dict)
+    targets_data_by_auth_commits: Dict = field(factory=dict)
     old_heads_per_target_repos_branches: Dict[str, Dict[str, str]] = field(factory=dict)
     fetched_commits_per_target_repos_branches: Dict[str, Dict[str, List[str]]] = field(factory=dict)
 
@@ -221,9 +222,11 @@ def validate_out_of_band_and_update_type(self):
         except Exception as e:
             self.state.error = e
             self.state.event = Event.FAILED
+            taf_logger.error(e)
             return False
 
     @log_on_start(INFO, "Cloning or updating user's authentication repository", logger=taf_logger)
+    @log_on_end(INFO, "Clone or fetch finished", logger=taf_logger)
     def clone_or_fetch_users_auth_repo(self):
         if not self.only_validate:
             # fetch the latest commit or clone the repository without checkout
@@ -236,6 +239,7 @@ def clone_or_fetch_users_auth_repo(self):
             except Exception as e:
                 self.state.error = e
                 self.state.event = Event.FAILED
+                taf_logger.error(e)
                 return False
         return True
 
@@ -256,8 +260,10 @@ def load_target_repositories(self):
         except Exception as e:
             self.state.error = e
             self.state.event = Event.FAILED
+            taf_logger.error(e)
             return False
 
+    @log_on_start(INFO, "Cloning target repositories which are not on disk", logger=taf_logger)
     def clone_target_repositories_if_not_on_disk(self):
         self.state.cloned_target_repositories = []
         for repository in self.state.target_repositories.values():
@@ -273,25 +279,22 @@ def clone_target_repositories_if_not_on_disk(self):
 
 
     def get_targets_data_from_auth_repo(self):
-        targets_by_auth_commits = self.state.users_auth_repo.targets_data_per_auth_commits()
-
-        # for each repository and each branch that needs to be validated
-        # get actual target commits
-        # extract branch information
-        # TODO this is where the data from the authentication repository needed for validating
-        # targets should be extracted
-        # and saved to state
-        self.state.target_branches_data_from_auth_repo = {}
-        for auth_commit_and_target in targets_by_auth_commits:
-            for target_info in auth_commit_and_target.target_infos:
-                 self.state.target_branches_data_from_auth_repo.setdefault(target_info.name, set()).add(target_info.branch)
+        self.state.targets_data_by_auth_commits = self.state.users_auth_repo.targets_data_by_auth_commits()
+        repo_branches = {}
+        for repo_name, commits_data in  self.state.targets_data_by_auth_commits.items():
+            branches = set()  # using a set to avoid duplicate branches
+            for commit_data in commits_data.values():
+                branches.add(commit_data["branch"])
+            repo_branches[repo_name] = sorted(list(branches))
+        self.state.target_branches_data_from_auth_repo = repo_branches
 
 
+    @log_on_start(INFO, "Validating initial state of target repositories", logger=taf_logger)
     def validate_target_repositories_initial_state(self):
         try:
             self.state.old_heads_per_target_repos_branches = defaultdict(dict)
             for repo_name, repository in self.target_repositories:
-                for branch in self.target_branches_data_from_auth_repo[repo_name]:
+                for branch in self.state.target_branches_data_from_auth_repo[repo_name]:
                     # if last_validated_commit is None or if the target repository didn't exist prior
                     # to calling update, start the update from the beginning
                     # otherwise, for each branch, start with the last validated commit of the local branch
@@ -326,15 +329,16 @@ def validate_target_repositories_initial_state(self):
         except Exception as e:
             self.state.error = e
             self.state.event = Event.FAILED
+            taf_logger.error(e)
             # TODO remove cloned?
             return False
 
-
+    @log_on_start(INFO, "Validating initial state of target repositories", logger=taf_logger)
     def get_target_repositories_commits(self):
         """Returns a list of newly fetched commits belonging to the specified branch."""
         self.state.fetched_commits_per_target_repos_branches = defaultdict(dict)
         for repo_name, repository in self.target_repositories:
-            for branch in self.target_branches_data_from_auth_repo[repo_name]:
+            for branch in self.state.target_branches_data_from_auth_repo[repo_name]:
                 if repository.is_git_repository_root:
                     repository.fetch(branch=branch)
 
@@ -380,6 +384,7 @@ def get_target_repositories_commits(self):
                             fetched_commits = repository.all_commits_on_branch(
                                 branch=f"origin/{branch}"
                             )
+
                             # if the local branch does not exist (the branch was not checked out locally)
                             # fetched commits will include already validated commits
                             # check which commits are newer that the previous head commit
@@ -392,11 +397,37 @@ def get_target_repositories_commits(self):
 
 
     def update_target_repositories(self):
+        """
+        Breadth-first update of target repositories
+        Merge last valid commits at the end of the update
+        """
+        last_validated_target_commits_per_repositories = {}
         for auth_commit in self.state.auth_commits_since_last_validated:
-            # update target repositories given that auth_commit
-            # what if unauthenticated are allowed?
-            # rework update, transition to bfs
-            pass
+            for repository in self.state.target_repositories.values():
+                last_validated_target_commit = last_validated_target_commits_per_repositories.get(repository.name)
+                current_targets_data_from_auth_repo = self.state.targets_data_by_auth_commits[repository.name][auth_commit]
+                target_commits_from_target_repo = self.state.fetched_commits_per_target_repos_branches[repository.name]
+                validated_commit = _validate_current_repo_commit(
+                    repository,
+                    self.state.users_auth_repo,
+                    last_validated_target_commit,
+                    current_targets_data_from_auth_repo,
+                    target_commits_from_target_repo,
+                    auth_commit)
+                last_validated_target_commits_per_repositories[repository.name] = validated_commit
+        # TODO check for additional commits
+
+
+        # event = Event.CHANGED if len(commits) > 1 else Event.UNCHANGED
+        # TODO set targets_data
+        # return (
+        #     event,
+        #     users_auth_repo,
+        #     auth_repo_name,
+        #     _commits_ret(commits, existing_repo, True),
+        #     None,
+        #     targets_data,
+        # )
 
 
     def merge_branch_commits(self):
@@ -540,3 +571,76 @@ def _update_tuf_current_revision():
         "Successfully validated authentication repository {}",
         git_updater.users_auth_repo.name,
     )
+
+
+def _validate_current_repo_commit(
+        repository,
+        users_auth_repo,
+        last_validated_target_commit,
+        current_targets_data_from_auth_repo,
+        target_commits_from_target_repo,
+        current_auth_commit,
+    ):
+    current_commit_from_auth_repo = current_targets_data_from_auth_repo["commit"]
+    branch = current_targets_data_from_auth_repo["branch"]
+    target_commits_from_target_repos_on_branch = target_commits_from_target_repo[branch]
+    if last_validated_target_commit is not None and last_validated_target_commit in target_commits_from_target_repos_on_branch:
+        # same branch
+        current_target_commit = _find_next_value(last_validated_target_commit, target_commits_from_target_repos_on_branch)
+    else:
+        # next branch
+        current_target_commit = target_commits_from_target_repos_on_branch[0]
+
+    if current_target_commit is None:
+        # there are commits missing from the target repository
+        commit_date = users_auth_repo.get_commit_date()
+        raise UpdateFailedError(
+            f"Failure to validate {users_auth_repo.name} commit {current_auth_commit} committed on {commit_date}:\
+                data repository {repository.name} was supposed to be at commit {current_commit_from_auth_repo} \
+                but commit not on branch {branch}"
+        )
+
+    current_target_commit = target_commits_from_target_repo[branch].index
+    if current_commit_from_auth_repo == current_target_commit:
+        return current_target_commit
+    if not _is_unauthenticated_allowed(repository):
+        commit_date = users_auth_repo.get_commit_date()
+        raise UpdateFailedError(
+            f"Failure to validate {users_auth_repo.name} commit {current_auth_commit} committed on {commit_date}:\
+                data repository {repository.name} was supposed to be at commit {current_commit_from_auth_repo} \
+                but repo was at {last_validated_target_commit}"
+        )
+    # unauthenticated commits are allowed, try to skip them
+    # if commits of the target repositories were swapped, commit which is expected to be found
+    # after the current one will be skipped and it won't be found later, so validation will fail
+    remaining_commits = target_commits_from_target_repos_on_branch[target_commits_from_target_repos_on_branch.index(last_validated_target_commit):]
+    for target_commit in remaining_commits:
+        if current_commit_from_auth_repo == target_commit:
+            return target_commit
+        taf_logger.info(f"{repository.name}: skipping target commit {target_commit}")
+    raise UpdateFailedError(
+        f"Failure to validate {users_auth_repo.name} commit {current_auth_commit} committed on {commit_date}:\
+            data repository {repository.name} was supposed to be at commit {current_commit_from_auth_repo} \
+            but commit not on branch {branch}"
+    )
+
+def _find_next_value(value, values_list):
+    """
+    Find the next value in the list after the given value.
+
+    Parameters:
+    - value: The value to look for.
+    - values_list: The list of values.
+
+    Returns:
+    - The next value in the list after the given value, or None if there isn't one.
+    """
+    try:
+        index = values_list.index(value)
+        if index < len(values_list) - 1:  # check if there are remaining values
+            return values_list[index + 1]
+    except ValueError:
+        pass  # value not in list
+    return None
+
+

From cc4c6a58de300f1a2e3b4ad365575343a7929e0e Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Thu, 26 Oct 2023 21:34:28 +0200
Subject: [PATCH 07/28] refact: additional work on moving code to the updater
 pipeline

---
 taf/updater/types/update.py     |   7 +
 taf/updater/updater_pipeline.py | 360 +++++++++++++++++++++++---------
 2 files changed, 263 insertions(+), 104 deletions(-)

diff --git a/taf/updater/types/update.py b/taf/updater/types/update.py
index fb3a6a28..84969a40 100644
--- a/taf/updater/types/update.py
+++ b/taf/updater/types/update.py
@@ -1,3 +1,4 @@
+import enum
 from attrs import define, field
 from typing import Dict
 
@@ -9,3 +10,9 @@ class Update:
     error_msg: str = field(default="")
     auth_repos: Dict = field(factory=dict)
     auth_repo_name: str = field(default="")
+
+class UpdateType(enum.Enum):
+    TEST = "test"
+    OFFICIAL = "official"
+    EITHER = "either"
+
diff --git a/taf/updater/updater_pipeline.py b/taf/updater/updater_pipeline.py
index 80ac3ea5..c49181f6 100644
--- a/taf/updater/updater_pipeline.py
+++ b/taf/updater/updater_pipeline.py
@@ -5,7 +5,7 @@
 import shutil
 import tempfile
 from typing import Any, Dict, List, Optional
-from attr import attrs, field
+from attr import attrs, define, field
 from git import GitError
 from logdecorator import log_on_end, log_on_error, log_on_start
 from taf.git import GitRepository
@@ -16,29 +16,38 @@
 from taf.exceptions import UpdateFailedError
 from taf.updater.handlers import GitUpdater
 from taf.updater.lifecycle_handlers import Event
-from taf.updater.updater import EXPIRED_METADATA_ERROR, INFO_JSON_PATH, UpdateType
+from taf.updater.types.update import UpdateType
 from taf.utils import on_rm_error
 from taf.log import taf_logger
 from tuf.ngclient.updater import Updater
+from tuf.repository_tool import TARGETS_DIRECTORY_NAME
 
-@attrs
+
+EXPIRED_METADATA_ERROR = "ExpiredMetadataError"
+PROTECTED_DIRECTORY_NAME = "protected"
+INFO_JSON_PATH = f"{TARGETS_DIRECTORY_NAME}/{PROTECTED_DIRECTORY_NAME}/info.json"
+
+
+@define
 class UpdateState:
     auth_commits_since_last_validated: List[Any] = field(factory=list)
     existing_repo: bool = field(default=False)
     update_successful: bool = field(default=False)
     event: Optional[str] = field(default=None)
-    users_auth_repo: Optional[AuthenticationRepository] = field(default=None)
-    validation_auth_repo: Optional[AuthenticationRepository] = field(default=None)
+    users_auth_repo: Optional["AuthenticationRepository"] = field(default=None)
+    validation_auth_repo: Optional["AuthenticationRepository"] = field(default=None)
     auth_repo_name: Optional[str] = field(default=None)
     error: Optional[Exception] = field(default=None)
     targets_data: Dict[str, Any] = field(factory=dict)
-    last_validated_commit: str = field(type=str)
-    target_repositories: Dict[GitRepository] = field(factory=dict)
-    cloned_target_repositories: List[GitRepository] = field(factory=list)
+    last_validated_commit: str = field(factory=str)
+    target_repositories: Dict[str, "GitRepository"] = field(factory=dict)
+    cloned_target_repositories: List["GitRepository"] = field(factory=list)
     target_branches_data_from_auth_repo: Dict = field(factory=dict)
     targets_data_by_auth_commits: Dict = field(factory=dict)
     old_heads_per_target_repos_branches: Dict[str, Dict[str, str]] = field(factory=dict)
     fetched_commits_per_target_repos_branches: Dict[str, Dict[str, List[str]]] = field(factory=dict)
+    last_validated_commits_per_target_repos_branches: Dict[str, Dict[str, str]] = field(factory=dict)
+    additional_commits_per_target_repos_branches: Dict[str, Dict[str, List[str]]] = field(factory=dict)
 
 
 @attrs
@@ -107,11 +116,11 @@ def __init__(
             self.get_targets_data_from_auth_repo,
             self.validate_target_repositories_initial_state,
             self.get_target_repositories_commits,
-            self.update_target_repositories,
+            self.validate_target_repositories,
+            self.set_additional_commits_of_target_repositories,
             self.merge_branch_commits
         ])
 
-
         self.url = url
         self.clients_auth_library_dir = clients_auth_library_dir
         self.targets_library_dir = targets_library_dir
@@ -146,7 +155,7 @@ def clone_remote_and_run_tuf_updater(self):
         try:
             self.state.auth_commits_since_last_validated = None
             self.state.existing_repo = (
-                Path(self.clients_auth_library_dir, self.auth_repo_name).exists()
+                Path(self.clients_auth_library_dir, self.state.auth_repo_name).exists()
                 if self.state.auth_repo_name is not None
                 else True
             )
@@ -158,8 +167,8 @@ def clone_remote_and_run_tuf_updater(self):
             _run_tuf_updater(git_updater)
             self.state.existing_repo = self.state.users_auth_repo.is_git_repository_root
             self.state.validation_auth_repo = git_updater.validation_auth_repo
-            self.auth_commits_since_last_validated = list(git_updater.commits)
-            self.state.event = Event.CHANGED if len( self.auth_commits_since_last_validated) > 1 else Event.UNCHANGED
+            self.state.auth_commits_since_last_validated = list(git_updater.commits)
+            self.state.event = Event.CHANGED if len(self.state.auth_commits_since_last_validated) > 1 else Event.UNCHANGED
 
             # used for testing purposes
             if settings.overwrite_last_validated_commit:
@@ -181,7 +190,6 @@ def clone_remote_and_run_tuf_updater(self):
                     urls=[self.url],
                     conf_directory_root=self.conf_directory_root,
                 )
-
             self.state.event = Event.FAILED
             return False
 
@@ -218,7 +226,7 @@ def validate_out_of_band_and_update_type(self):
                         f"Repository {self.state.users_auth_repo.name} is not a test repository,"
                         ' but update was called with the "--expected-repo-type" test'
                     )
-
+            return True
         except Exception as e:
             self.state.error = e
             self.state.event = Event.FAILED
@@ -226,7 +234,6 @@ def validate_out_of_band_and_update_type(self):
             return False
 
     @log_on_start(INFO, "Cloning or updating user's authentication repository", logger=taf_logger)
-    @log_on_end(INFO, "Clone or fetch finished", logger=taf_logger)
     def clone_or_fetch_users_auth_repo(self):
         if not self.only_validate:
             # fetch the latest commit or clone the repository without checkout
@@ -257,6 +264,7 @@ def load_target_repositories(self):
             self.state.target_repositories = repositoriesdb.get_deduplicated_repositories(
                 self.state.users_auth_repo, self.state.auth_commits_since_last_validated[-1::]
             )
+            return True
         except Exception as e:
             self.state.error = e
             self.state.event = Event.FAILED
@@ -264,22 +272,30 @@ def load_target_repositories(self):
             return False
 
     @log_on_start(INFO, "Cloning target repositories which are not on disk", logger=taf_logger)
+    @log_on_start(INFO, "Finished cloning target repositories", logger=taf_logger)
     def clone_target_repositories_if_not_on_disk(self):
-        self.state.cloned_target_repositories = []
-        for repository in self.state.target_repositories.values():
-            is_git_repository = repository.is_git_repository_root
-            if not is_git_repository:
-                if self.only_validate:
-                    taf_logger.warning(
-                        "Target repositories must already exist when only validating repositories"
-                    )
-                    continue
-                repository.clone(no_checkout=True)
-                self.state.cloned_target_repositories.append(repository)
+        try:
+            self.state.cloned_target_repositories = []
+            for repository in self.state.target_repositories.values():
+                is_git_repository = repository.is_git_repository_root
+                if not is_git_repository:
+                    if self.only_validate:
+                        taf_logger.warning(
+                            "Target repositories must already exist when only validating repositories"
+                        )
+                        continue
+                    repository.clone(no_checkout=True)
+                    self.state.cloned_target_repositories.append(repository)
+            return True
+        except Exception as e:
+            self.state.error = e
+            self.state.event = Event.FAILED
+            taf_logger.error(e)
+            return False
 
 
     def get_targets_data_from_auth_repo(self):
-        self.state.targets_data_by_auth_commits = self.state.users_auth_repo.targets_data_by_auth_commits()
+        self.state.targets_data_by_auth_commits = self.state.users_auth_repo.targets_data_by_auth_commits(self.state.auth_commits_since_last_validated)
         repo_branches = {}
         for repo_name, commits_data in  self.state.targets_data_by_auth_commits.items():
             branches = set()  # using a set to avoid duplicate branches
@@ -287,6 +303,7 @@ def get_targets_data_from_auth_repo(self):
                 branches.add(commit_data["branch"])
             repo_branches[repo_name] = sorted(list(branches))
         self.state.target_branches_data_from_auth_repo = repo_branches
+        return True
 
 
     @log_on_start(INFO, "Validating initial state of target repositories", logger=taf_logger)
@@ -326,6 +343,7 @@ def validate_target_repositories_initial_state(self):
                                     taf_logger.error(msg)
                                     raise UpdateFailedError(msg)
                     self.state.old_heads_per_target_repos_branches[repo_name][branch] = old_head
+            return True
         except Exception as e:
             self.state.error = e
             self.state.event = Event.FAILED
@@ -394,31 +412,40 @@ def get_target_repositories_commits(self):
                         except GitError:
                             pass
                 self.state.fetched_commits_per_target_repos_branches[repo_name][branch] = fetched_commits_on_target_repo_branch
+        return True
 
-
-    def update_target_repositories(self):
+    @log_on_start(INFO, "Validating target repositories...", logger=taf_logger)
+    def validate_target_repositories(self):
         """
         Breadth-first update of target repositories
         Merge last valid commits at the end of the update
         """
-        last_validated_target_commits_per_repositories = {}
-        for auth_commit in self.state.auth_commits_since_last_validated:
-            for repository in self.state.target_repositories.values():
-                last_validated_target_commit = last_validated_target_commits_per_repositories.get(repository.name)
-                current_targets_data_from_auth_repo = self.state.targets_data_by_auth_commits[repository.name][auth_commit]
-                target_commits_from_target_repo = self.state.fetched_commits_per_target_repos_branches[repository.name]
-                validated_commit = _validate_current_repo_commit(
-                    repository,
-                    self.state.users_auth_repo,
-                    last_validated_target_commit,
-                    current_targets_data_from_auth_repo,
-                    target_commits_from_target_repo,
-                    auth_commit)
-                last_validated_target_commits_per_repositories[repository.name] = validated_commit
-        # TODO check for additional commits
-
-
-        # event = Event.CHANGED if len(commits) > 1 else Event.UNCHANGED
+        try:
+            last_validated_target_commits_per_repositories = {}
+            self.state.last_validated_commits_per_target_repos_branches = defaultdict(dict)
+            for auth_commit in self.state.auth_commits_since_last_validated:
+                for repository in self.state.target_repositories.values():
+                    last_validated_target_commit = last_validated_target_commits_per_repositories.get(repository.name)
+                    current_targets_data_from_auth_repo = self.state.targets_data_by_auth_commits[repository.name][auth_commit]
+                    target_commits_from_target_repo = self.state.fetched_commits_per_target_repos_branches[repository.name]
+                    validated_commit = self._validate_current_repo_commit(
+                        repository,
+                        self.state.users_auth_repo,
+                        last_validated_target_commit,
+                        current_targets_data_from_auth_repo,
+                        target_commits_from_target_repo,
+                        auth_commit)
+                    last_validated_target_commits_per_repositories[repository.name] = validated_commit
+            return True
+        # TODO merge until the last validated
+        # if at some point validation fails
+        except Exception as e:
+            self.state.error = e
+            self.state.event = Event.FAILED
+            return False
+
+
+        # self.state.even = Event.CHANGED if len(self.state.auth_commits_since_last_validated) > 1 else Event.UNCHANGED
         # TODO set targets_data
         # return (
         #     event,
@@ -429,30 +456,179 @@ def update_target_repositories(self):
         #     targets_data,
         # )
 
+    def _validate_current_repo_commit(
+            self,
+            repository,
+            users_auth_repo,
+            last_validated_target_commit,
+            current_targets_data_from_auth_repo,
+            target_commits_from_target_repo,
+            current_auth_commit,
+        ):
+        current_commit_from_auth_repo = current_targets_data_from_auth_repo["commit"]
+        branch = current_targets_data_from_auth_repo["branch"]
+        target_commits_from_target_repos_on_branch = target_commits_from_target_repo[branch]
+        if last_validated_target_commit is not None and last_validated_target_commit in target_commits_from_target_repos_on_branch:
+            # same branch
+            current_target_commit = _find_next_value(last_validated_target_commit, target_commits_from_target_repos_on_branch)
+        else:
+            # next branch
+            current_target_commit = target_commits_from_target_repos_on_branch[0]
+
+        if current_target_commit is None:
+            # there are commits missing from the target repository
+            commit_date = users_auth_repo.get_commit_date()
+            raise UpdateFailedError(
+                f"Failure to validate {users_auth_repo.name} commit {current_auth_commit} committed on {commit_date}:\
+                    data repository {repository.name} was supposed to be at commit {current_commit_from_auth_repo} \
+                    but commit not on branch {branch}"
+            )
+
+        current_target_commit = target_commits_from_target_repo[branch].index
+        if current_commit_from_auth_repo == current_target_commit:
+            self.state.last_validated_commits_per_target_repos_branches[repository.name][branch] = current_target_commit
+            return current_target_commit
+        if not _is_unauthenticated_allowed(repository):
+            commit_date = users_auth_repo.get_commit_date()
+            raise UpdateFailedError(
+                f"Failure to validate {users_auth_repo.name} commit {current_auth_commit} committed on {commit_date}:\
+                    data repository {repository.name} was supposed to be at commit {current_commit_from_auth_repo} \
+                    but repo was at {last_validated_target_commit}"
+            )
+        # unauthenticated commits are allowed, try to skip them
+        # if commits of the target repositories were swapped, commit which is expected to be found
+        # after the current one will be skipped and it won't be found later, so validation will fail
+        remaining_commits = target_commits_from_target_repos_on_branch[target_commits_from_target_repos_on_branch.index(last_validated_target_commit):]
+        for target_commit in remaining_commits:
+            if current_commit_from_auth_repo == target_commit:
+                self.state.last_validated_commits_per_target_repos_branches[repository.name][branch] = target_commit
+                return target_commit
+            taf_logger.info(f"{repository.name}: skipping target commit {target_commit}. Looking for commit {current_commit_from_auth_repo}")
+        raise UpdateFailedError(
+            f"Failure to validate {users_auth_repo.name} commit {current_auth_commit} committed on {commit_date}:\
+                data repository {repository.name} was supposed to be at commit {current_commit_from_auth_repo} \
+                but commit not on branch {branch}"
+        )
+
+    def set_additional_commits_of_target_repositories(self):
+        """
+        For target repository and for each branch, extract commits following the last validated commit
+        These commits are not invalid. In case of repositories which cannot contain unauthenticated commits
+        all of these commits will have to get signed if at least one commits on that branch needs to get signed
+        However, no error will get reported if there are commits which have not yet been signed
+        In case of repositories which can contain unauthenticated commits, they do not even need to get signed
+        """
+        self.state.additional_commits_per_target_repos_branches = defaultdict(dict)
+        for repository in self.state.target_repositories.values():
+            # this will only include branches who were, at least partially, validated (up until a certain point)
+            for branch, last_validated_commit in self.state.last_validated_commits_per_target_repos_branches[repository.name].items():
+                # TODO what to do if an error occurred while validating that branch
+                branch_commits = self.state.fetched_commits_per_target_repos_branches[repository.name][branch]
+                additional_commits = branch_commits[branch_commits.index(last_validated_commit) + 1:]
+                if len(additional_commits):
+                    taf_logger.info(f"Repository {repository.name}: found commits succeeding the last authenticated commit on branch {branch}: {','.join(additional_commits)}")
+
+                    # these commits include all commits newer than last authenticated commit (if unauthenticated commits are allowed)
+                    # that does not necessarily mean that the local repository is not up to date with the remote
+                    # pull could've been run manually
+                    # check where the current local head is
+                    # TODO I don't remember why this was done!
+                    branch_current_head = repository.top_commit_of_branch(branch)
+                    if branch_current_head in additional_commits:
+                        additional_commits = additional_commits[
+                            additional_commits.index(branch_current_head) + 1 :
+                        ]
+                self.state.additional_commits_per_target_repos_branches[repository.name][branch] = additional_commits
+        return True
+
+    @log_on_start(INFO, "Merging commits into target repositories...", logger=taf_logger)
+    def merge_commits(self):
+        """Determines which commits needs to be merged into the specified branch and
+        merge it.
+        """
+        try:
+            if self.only_validate:
+                return
+            for repository in self.state.target_repositories.values():
+                # this will only include branches who were, at least partially, validated (up until a certain point)
+                for branch, last_validated_commit in self.state.last_validated_commits_per_target_repos_branches[repository.name].items():
+                    commit_to_merge = (
+                        last_validated_commit if not _is_unauthenticated_allowed(repository) else self.state.fetched_commits_per_target_repos_branches[repository.name][branch][-1]
+                )
+                    taf_logger.info("Repository {}: merging {} into branch {}",  repository.name, commit_to_merge, branch)
+                    _merge_commit(repository, branch, commit_to_merge)
+            return True
+        except Exception as e:
+            self.state.error = e
+            self.state.event = Event.FAILED
+            return False
+
+    def set_target_repositories_data(self):
+        # targets_data = {}
+        # for repo_name, repo in repositories.items():
+        #     targets_data[repo_name] = {"repo_data": repo.to_json_dict()}
+        #     commits_data = {}
+        #     for branch, commits_with_custom in repositories_branches_and_commits[
+        #         repo_name
+        #     ].items():
+        #         branch_commits_data = {}
+        #         previous_top_of_branch = top_commits_of_branches_before_pull[repo_name][
+        #             branch
+        #         ]
+
+        #         branch_commits_data["before_pull"] = None
+
+        #         if previous_top_of_branch is not None:
+        #             # this needs to be the same - implementation error otherwise
+        #             branch_commits_data["before_pull"] = (
+        #                 commits_with_custom[0] if len(commits_with_custom) else None
+        #             )
+
+        #         branch_commits_data["after_pull"] = (
+        #             commits_with_custom[-1] if len(commits_with_custom) else None
+        #         )
+
+        #         if branch_commits_data["before_pull"] is not None:
+        #             commits_with_custom.pop(0)
+        #         branch_commits_data["new"] = commits_with_custom
+        #         additional_commits = (
+        #             additional_commits_per_repo[repo_name].get(branch, [])
+        #             if repo_name in additional_commits_per_repo
+        #             else []
+        #         )
+        #         branch_commits_data["unauthenticated"] = additional_commits
+        #         commits_data[branch] = branch_commits_data
+        #     targets_data[repo_name]["commits"] = commits_data
+        # return targets_data
+        pass
+
+
 
     def merge_branch_commits(self):
         # Implement logic
         # Update self.state as necessary
         pass
 
+
+
     def handle_error(self, error):
         # Centralized error handling
         print(f"Error encountered: {error}")
 
 
     def _set_output(self):
-        if self.state.commits is None:
+        if self.state.auth_commits_since_last_validated is None:
             commit_before_pull = None
             new_commits = []
             commit_after_pull = None
         else:
-            commit_before_pull = self.state.commits[0] if self.state.existing_repo and len(self.state.commits) else None
-            commit_after_pull = self.state.commits[-1] if self.state.update_successful else self.state.commits[0]
+            commit_before_pull = self.state.auth_commits_since_last_validated[0] if self.state.existing_repo and len(self.state.auth_commits_since_last_validated) else None
+            commit_after_pull = self.state.auth_commits_since_last_validated[-1] if self.state.update_successful else self.state.auth_commits_since_last_validated[0]
 
             if not self.state.existing_repo:
-                new_commits = self.state.commits
+                new_commits = self.state.auth_commits_since_last_validated
             else:
-                new_commits = self.state.commits[1:] if len(self.state.commits) else []
+                new_commits = self.state.auth_commits_since_last_validated[1:] if len(self.state.auth_commits_since_last_validated) else []
         commits_data = {
             "before_pull": commit_before_pull,
             "new": new_commits,
@@ -507,6 +683,7 @@ def _is_unauthenticated_allowed(repository):
         "allow-unauthenticated-commits", False
     )
 
+@log_on_start(INFO, "Running TUF validation of the authentication repository...", logger=taf_logger)
 def _run_tuf_updater(git_updater):
     def _init_updater():
         try:
@@ -573,57 +750,6 @@ def _update_tuf_current_revision():
     )
 
 
-def _validate_current_repo_commit(
-        repository,
-        users_auth_repo,
-        last_validated_target_commit,
-        current_targets_data_from_auth_repo,
-        target_commits_from_target_repo,
-        current_auth_commit,
-    ):
-    current_commit_from_auth_repo = current_targets_data_from_auth_repo["commit"]
-    branch = current_targets_data_from_auth_repo["branch"]
-    target_commits_from_target_repos_on_branch = target_commits_from_target_repo[branch]
-    if last_validated_target_commit is not None and last_validated_target_commit in target_commits_from_target_repos_on_branch:
-        # same branch
-        current_target_commit = _find_next_value(last_validated_target_commit, target_commits_from_target_repos_on_branch)
-    else:
-        # next branch
-        current_target_commit = target_commits_from_target_repos_on_branch[0]
-
-    if current_target_commit is None:
-        # there are commits missing from the target repository
-        commit_date = users_auth_repo.get_commit_date()
-        raise UpdateFailedError(
-            f"Failure to validate {users_auth_repo.name} commit {current_auth_commit} committed on {commit_date}:\
-                data repository {repository.name} was supposed to be at commit {current_commit_from_auth_repo} \
-                but commit not on branch {branch}"
-        )
-
-    current_target_commit = target_commits_from_target_repo[branch].index
-    if current_commit_from_auth_repo == current_target_commit:
-        return current_target_commit
-    if not _is_unauthenticated_allowed(repository):
-        commit_date = users_auth_repo.get_commit_date()
-        raise UpdateFailedError(
-            f"Failure to validate {users_auth_repo.name} commit {current_auth_commit} committed on {commit_date}:\
-                data repository {repository.name} was supposed to be at commit {current_commit_from_auth_repo} \
-                but repo was at {last_validated_target_commit}"
-        )
-    # unauthenticated commits are allowed, try to skip them
-    # if commits of the target repositories were swapped, commit which is expected to be found
-    # after the current one will be skipped and it won't be found later, so validation will fail
-    remaining_commits = target_commits_from_target_repos_on_branch[target_commits_from_target_repos_on_branch.index(last_validated_target_commit):]
-    for target_commit in remaining_commits:
-        if current_commit_from_auth_repo == target_commit:
-            return target_commit
-        taf_logger.info(f"{repository.name}: skipping target commit {target_commit}")
-    raise UpdateFailedError(
-        f"Failure to validate {users_auth_repo.name} commit {current_auth_commit} committed on {commit_date}:\
-            data repository {repository.name} was supposed to be at commit {current_commit_from_auth_repo} \
-            but commit not on branch {branch}"
-    )
-
 def _find_next_value(value, values_list):
     """
     Find the next value in the list after the given value.
@@ -643,4 +769,30 @@ def _find_next_value(value, values_list):
         pass  # value not in list
     return None
 
+def _merge_commit(self, repository, branch, commit_to_merge, checkout=True):
+    """Merge the specified commit into the given branch and check out the branch.
+    If the repository cannot contain unauthenticated commits, check out the merged commit.
+    """
+    taf_logger.info("Merging commit {} into {}", commit_to_merge, repository.name)
+    try:
+        repository.checkout_branch(branch, raise_anyway=True)
+    except GitError as e:
+        # two scenarios:
+        # current git repository is in an inconsistent state:
+        # - .git/index.lock exists (git partial update got applied)
+        # should get addressed in https://github.com/openlawlibrary/taf/issues/210
+        # current git repository has uncommitted changes:
+        # we do not want taf to lose any repo data, so we do not reset the repository.
+        # for now, raise an update error and let the user manually reset the repository
+        taf_logger.error(
+            "Could not checkout branch {} during commit merge. Error {}", branch, e
+        )
+        raise UpdateFailedError(
+            f"Repository {repository.name} should contain only committed changes. \n"
+            f"Please update the repository at {repository.path} manually and try again."
+        )
 
+    repository.merge_commit(commit_to_merge)
+    if checkout:
+        taf_logger.info("{}: checking out branch {}", repository.name, branch)
+        repository.checkout_branch(branch)

From ed887d39a580e6aff5c8b70dfca20f64aae23485 Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Fri, 27 Oct 2023 01:54:18 +0200
Subject: [PATCH 08/28] fix: do not expect target commit to be changed in every
 auth commit. Improved logging

---
 taf/updater/updater_pipeline.py | 48 +++++++++++++++++++--------------
 1 file changed, 28 insertions(+), 20 deletions(-)

diff --git a/taf/updater/updater_pipeline.py b/taf/updater/updater_pipeline.py
index c49181f6..c5f2adda 100644
--- a/taf/updater/updater_pipeline.py
+++ b/taf/updater/updater_pipeline.py
@@ -1,6 +1,6 @@
 from collections import defaultdict
 import functools
-from logging import ERROR, INFO
+from logging import DEBUG, ERROR, INFO
 from pathlib import Path
 import shutil
 import tempfile
@@ -146,8 +146,7 @@ def output(self):
             raise ValueError("Pipeline has not been run yet. Please run the pipeline first.")
         return self._output
 
-    @log_on_start(INFO, "Cloning repository and running TUF updater", logger=taf_logger)
-    @log_on_end(INFO, "TUF validation finished", logger=taf_logger)
+    @log_on_start(INFO, "Cloning repository and running TUF updater...", logger=taf_logger)
     @cleanup_decorator
     def clone_remote_and_run_tuf_updater(self):
         settings.update_from_filesystem = self.update_from_filesystem
@@ -195,7 +194,6 @@ def clone_remote_and_run_tuf_updater(self):
 
     @cleanup_decorator
     @log_on_start(INFO, "Validating out of band commit and update type", logger=taf_logger)
-    @log_on_end(INFO, "Validation finished", logger=taf_logger)
     def validate_out_of_band_and_update_type(self):
         try:
             # this is the repository cloned inside the temp directory
@@ -233,7 +231,7 @@ def validate_out_of_band_and_update_type(self):
             taf_logger.error(e)
             return False
 
-    @log_on_start(INFO, "Cloning or updating user's authentication repository", logger=taf_logger)
+    @log_on_start(INFO, "Cloning or updating user's authentication repository...", logger=taf_logger)
     def clone_or_fetch_users_auth_repo(self):
         if not self.only_validate:
             # fetch the latest commit or clone the repository without checkout
@@ -250,6 +248,7 @@ def clone_or_fetch_users_auth_repo(self):
                 return False
         return True
 
+    @log_on_start(DEBUG, "Loading target repositories", logger=taf_logger)
     def load_target_repositories(self):
         try:
             repositoriesdb.load_repositories(
@@ -271,7 +270,7 @@ def load_target_repositories(self):
             taf_logger.error(e)
             return False
 
-    @log_on_start(INFO, "Cloning target repositories which are not on disk", logger=taf_logger)
+    @log_on_start(INFO, "Cloning target repositories which are not on disk...", logger=taf_logger)
     @log_on_start(INFO, "Finished cloning target repositories", logger=taf_logger)
     def clone_target_repositories_if_not_on_disk(self):
         try:
@@ -306,26 +305,27 @@ def get_targets_data_from_auth_repo(self):
         return True
 
 
-    @log_on_start(INFO, "Validating initial state of target repositories", logger=taf_logger)
+    @log_on_start(INFO, "Validating initial state of target repositories...", logger=taf_logger)
+    @log_on_end(INFO, "Validation of the initial state of target repositories finished", logger=taf_logger)
     def validate_target_repositories_initial_state(self):
         try:
             self.state.old_heads_per_target_repos_branches = defaultdict(dict)
-            for repo_name, repository in self.target_repositories:
-                for branch in self.state.target_branches_data_from_auth_repo[repo_name]:
+            for repository in self.state.target_repositories.values():
+                for branch in self.state.target_branches_data_from_auth_repo[repository.name]:
                     # if last_validated_commit is None or if the target repository didn't exist prior
                     # to calling update, start the update from the beginning
                     # otherwise, for each branch, start with the last validated commit of the local branch
                     branch_exists = repository.branch_exists(branch, include_remotes=False)
                     if not branch_exists and self.only_validate:
                         self.state.targets_data = {}
-                        msg = f"{repo_name} does not contain a local branch named {branch} and cannot be validated. Please update the repositories"
+                        msg = f"{repository.name} does not contain a local branch named {branch} and cannot be validated. Please update the repositories"
                         taf_logger.error(msg)
                         raise UpdateFailedError(msg)
 
                     # TODO
                     repo_branch_commits = []
                     if (
-                        self.last_validated_commit is None
+                        self.state.last_validated_commit is None
                         or not repository.is_git_repository_root
                         or not branch_exists
                         or not len(repo_branch_commits)
@@ -342,7 +342,7 @@ def validate_target_repositories_initial_state(self):
                                     msg = f"Top commit of repository {repository.name} {repo_old_head} and is not equal to or newer than commit defined in auth repo {old_head}"
                                     taf_logger.error(msg)
                                     raise UpdateFailedError(msg)
-                    self.state.old_heads_per_target_repos_branches[repo_name][branch] = old_head
+                    self.state.old_heads_per_target_repos_branches[repository.name][branch] = old_head
             return True
         except Exception as e:
             self.state.error = e
@@ -351,16 +351,16 @@ def validate_target_repositories_initial_state(self):
             # TODO remove cloned?
             return False
 
-    @log_on_start(INFO, "Validating initial state of target repositories", logger=taf_logger)
+    @log_on_start(DEBUG, "Getting fetched commits of target repositories", logger=taf_logger)
     def get_target_repositories_commits(self):
         """Returns a list of newly fetched commits belonging to the specified branch."""
         self.state.fetched_commits_per_target_repos_branches = defaultdict(dict)
-        for repo_name, repository in self.target_repositories:
-            for branch in self.state.target_branches_data_from_auth_repo[repo_name]:
+        for repository in self.state.target_repositories.values():
+            for branch in self.state.target_branches_data_from_auth_repo[repository.name]:
                 if repository.is_git_repository_root:
                     repository.fetch(branch=branch)
 
-                old_head = self.state.old_heads_per_target_repos_branches[repo_name][branch]
+                old_head = self.state.old_heads_per_target_repos_branches[repository.name][branch]
                 if old_head is not None:
                     if not self.only_validate:
                         fetched_commits = repository.all_commits_on_branch(
@@ -411,10 +411,11 @@ def get_target_repositories_commits(self):
                                     fetched_commits_on_target_repo_branch.append(commit)
                         except GitError:
                             pass
-                self.state.fetched_commits_per_target_repos_branches[repo_name][branch] = fetched_commits_on_target_repo_branch
+                self.state.fetched_commits_per_target_repos_branches[repository.name][branch] = fetched_commits_on_target_repo_branch
         return True
 
     @log_on_start(INFO, "Validating target repositories...", logger=taf_logger)
+    @log_on_end(INFO, "Validation of target repositories finished", logger=taf_logger)
     def validate_target_repositories(self):
         """
         Breadth-first update of target repositories
@@ -425,6 +426,8 @@ def validate_target_repositories(self):
             self.state.last_validated_commits_per_target_repos_branches = defaultdict(dict)
             for auth_commit in self.state.auth_commits_since_last_validated:
                 for repository in self.state.target_repositories.values():
+                    if auth_commit not in  self.state.targets_data_by_auth_commits[repository.name]:
+                        continue
                     last_validated_target_commit = last_validated_target_commits_per_repositories.get(repository.name)
                     current_targets_data_from_auth_repo = self.state.targets_data_by_auth_commits[repository.name][auth_commit]
                     target_commits_from_target_repo = self.state.fetched_commits_per_target_repos_branches[repository.name]
@@ -435,6 +438,7 @@ def validate_target_repositories(self):
                         current_targets_data_from_auth_repo,
                         target_commits_from_target_repo,
                         auth_commit)
+
                     last_validated_target_commits_per_repositories[repository.name] = validated_commit
             return True
         # TODO merge until the last validated
@@ -468,6 +472,9 @@ def _validate_current_repo_commit(
         current_commit_from_auth_repo = current_targets_data_from_auth_repo["commit"]
         branch = current_targets_data_from_auth_repo["branch"]
         target_commits_from_target_repos_on_branch = target_commits_from_target_repo[branch]
+        if last_validated_target_commit == current_commit_from_auth_repo:
+            # target not updated in this revision
+            return last_validated_target_commit
         if last_validated_target_commit is not None and last_validated_target_commit in target_commits_from_target_repos_on_branch:
             # same branch
             current_target_commit = _find_next_value(last_validated_target_commit, target_commits_from_target_repos_on_branch)
@@ -484,7 +491,6 @@ def _validate_current_repo_commit(
                     but commit not on branch {branch}"
             )
 
-        current_target_commit = target_commits_from_target_repo[branch].index
         if current_commit_from_auth_repo == current_target_commit:
             self.state.last_validated_commits_per_target_repos_branches[repository.name][branch] = current_target_commit
             return current_target_commit
@@ -498,18 +504,20 @@ def _validate_current_repo_commit(
         # unauthenticated commits are allowed, try to skip them
         # if commits of the target repositories were swapped, commit which is expected to be found
         # after the current one will be skipped and it won't be found later, so validation will fail
-        remaining_commits = target_commits_from_target_repos_on_branch[target_commits_from_target_repos_on_branch.index(last_validated_target_commit):]
+        remaining_commits = target_commits_from_target_repos_on_branch[
+            target_commits_from_target_repos_on_branch.index(current_target_commit):]
         for target_commit in remaining_commits:
             if current_commit_from_auth_repo == target_commit:
                 self.state.last_validated_commits_per_target_repos_branches[repository.name][branch] = target_commit
                 return target_commit
-            taf_logger.info(f"{repository.name}: skipping target commit {target_commit}. Looking for commit {current_commit_from_auth_repo}")
+            taf_logger.debug(f"{repository.name}: skipping target commit {target_commit}. Looking for commit {current_commit_from_auth_repo}")
         raise UpdateFailedError(
             f"Failure to validate {users_auth_repo.name} commit {current_auth_commit} committed on {commit_date}:\
                 data repository {repository.name} was supposed to be at commit {current_commit_from_auth_repo} \
                 but commit not on branch {branch}"
         )
 
+    @log_on_start(DEBUG, "Setting additional commits of target repositories", logger=taf_logger)
     def set_additional_commits_of_target_repositories(self):
         """
         For target repository and for each branch, extract commits following the last validated commit

From beb5a0ac9cb7218f0a1235bb0ddb5143aaff2d47 Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Fri, 27 Oct 2023 21:03:05 +0200
Subject: [PATCH 09/28] refact: integrate updater pipeline into updater, finish
 new implementatino of setting targets_data

---
 taf/git.py                      |   2 +-
 taf/updater/updater.py          | 730 ++------------------------------
 taf/updater/updater_pipeline.py | 115 +++--
 3 files changed, 79 insertions(+), 768 deletions(-)

diff --git a/taf/git.py b/taf/git.py
index 145638e1..733cd9ae 100644
--- a/taf/git.py
+++ b/taf/git.py
@@ -815,7 +815,7 @@ def get_commit_date(self, commit_sha: str) -> str:
                 "Could not get commit message. pygit repository could not be instantiated."
             )
         commit = repo.get(commit_sha)
-        date = datetime.utcfromtimestamp(commit.commit_time + commit.commit_time_offset)
+        date = datetime.datetime.utcfromtimestamp(commit.commit_time + commit.commit_time_offset)
         formatted_date = date.strftime('%Y-%m-%d')
         return formatted_date
 
diff --git a/taf/updater/updater.py b/taf/updater/updater.py
index 68b49b10..3d256147 100644
--- a/taf/updater/updater.py
+++ b/taf/updater/updater.py
@@ -1,21 +1,15 @@
 import json
 from logging import ERROR
-import shutil
-import enum
-import tempfile
 
 from typing import Dict, Tuple, Any
 from logdecorator import log_on_error
 from taf.git import GitRepository
+from taf.updater.types.update import UpdateType
+from taf.updater.updater_pipeline import AuthenticationRepositoryUpdatePipeline
 
-from tuf.ngclient.updater import Updater
-from tuf.repository_tool import TARGETS_DIRECTORY_NAME
-
-from collections import defaultdict
 from pathlib import Path
 from taf.log import taf_logger, disable_tuf_console_logging
 import taf.repositoriesdb as repositoriesdb
-from taf.auth_repo import AuthenticationRepository
 from taf.utils import timed_run
 import taf.settings as settings
 from taf.exceptions import (
@@ -24,8 +18,6 @@
     GitError,
     ValidationFailedError,
 )
-from taf.updater.handlers import GitUpdater
-from taf.utils import on_rm_error
 
 from taf.updater.lifecycle_handlers import (
     handle_repo_event,
@@ -34,18 +26,10 @@
 )
 from cattr import unstructure
 
-EXPIRED_METADATA_ERROR = "ExpiredMetadataError"
-PROTECTED_DIRECTORY_NAME = "protected"
-INFO_JSON_PATH = f"{TARGETS_DIRECTORY_NAME}/{PROTECTED_DIRECTORY_NAME}/info.json"
 
 disable_tuf_console_logging()
 
 
-class UpdateType(enum.Enum):
-    TEST = "test"
-    OFFICIAL = "official"
-    EITHER = "either"
-
 
 def _check_update_status(repos_update_data: Dict[str, Any]) -> Tuple[Event, str]:
     # helper function to set update status of update handler based on repo status.
@@ -69,41 +53,6 @@ def _check_update_status(repos_update_data: Dict[str, Any]) -> Tuple[Event, str]
     return update_status, errors
 
 
-def _clone_validation_repo(url, repository_name):
-    """
-    Clones the authentication repository based on the url specified using the
-    mirrors parameter. The repository is cloned as a bare repository
-    to a the temp directory and will be deleted one the update is done.
-
-    If repository_name isn't provided (default value), extract it from info.json.
-    """
-    temp_dir = tempfile.mkdtemp()
-    path = Path(temp_dir, "auth_repo").absolute()
-    validation_auth_repo = AuthenticationRepository(
-        path=path, urls=[url], alias="Validation repository"
-    )
-    validation_auth_repo.clone(bare=True)
-    validation_auth_repo.fetch(fetch_all=True)
-
-    settings.validation_repo_path = validation_auth_repo.path
-
-    validation_head_sha = validation_auth_repo.top_commit_of_branch(
-        validation_auth_repo.default_branch
-    )
-
-    if repository_name is None:
-        try:
-            info = validation_auth_repo.get_json(validation_head_sha, INFO_JSON_PATH)
-            repository_name = f'{info["namespace"]}/{info["name"]}'
-        except Exception:
-            raise UpdateFailedError(
-                "Error during info.json parse. When specifying --clients-library-dir check if info.json metadata exists in targets/protected or provide full path to auth repo"
-            )
-
-    validation_auth_repo.cleanup()
-    return repository_name
-
-
 def _execute_repo_handlers(
     update_status,
     auth_repo,
@@ -554,283 +503,35 @@ def _update_current_repository(
     checkout,
     excluded_target_globs,
 ):
-    settings.update_from_filesystem = update_from_filesystem
-    settings.conf_directory_root = conf_directory_root
-
-    def _commits_ret(commits, existing_repo, update_successful):
-        if commits is None:
-            commit_before_pull = None
-            new_commits = []
-            commit_after_pull = None
-        else:
-            commit_before_pull = commits[0] if existing_repo and len(commits) else None
-            commit_after_pull = commits[-1] if update_successful else commits[0]
-
-            if not existing_repo:
-                new_commits = commits
-            else:
-                new_commits = commits[1:] if len(commits) else []
-        return {
-            "before_pull": commit_before_pull,
-            "new": new_commits,
-            "after_pull": commit_after_pull,
-        }
-
-    try:
-        commits = None
-        # check whether the directory that runs clone exists or contains additional files.
-        # we need to check the state of folder before running tuf. Resolves issue #22
-        # if auth_repo_name isn't specified then the current directory doesn't contain additional files.
-        users_repo_existed = (
-            Path(clients_auth_library_dir, auth_repo_name).exists()
-            if auth_repo_name is not None
-            else True
-        )
-        # first clone the validation repository in temp. this is needed because tuf expects auth_repo_name to be valid (not None)
-        # and in the right format (seperated by '/'). this approach covers a case where we don't know authentication repo path upfront.
-        auth_repo_name = _clone_validation_repo(url, auth_repo_name)
-        git_updater = GitUpdater(url, clients_auth_library_dir, auth_repo_name)
-        _run_tuf_updater(git_updater)
-    except Exception as e:
-        # Instantiation of the handler failed - this can happen if the url is not correct
-        # of if the saved last validated commit does not match the current head commit
-        # do not return any commits data if that is the case
-        # TODO in case of last validated issue, think about returning commits up to the last validated one
-        # the problem is that that could indicate that the history was changed
-        users_auth_repo = None
-
-        if auth_repo_name is not None:
-            users_auth_repo = AuthenticationRepository(
-                clients_auth_library_dir,
-                auth_repo_name,
-                urls=[url],
-                conf_directory_root=conf_directory_root,
-            )
-            # make sure that all update affects are deleted if the repository did not exist
-            if not users_repo_existed:
-                shutil.rmtree(users_auth_repo.path, onerror=on_rm_error)
-                shutil.rmtree(users_auth_repo.conf_dir)
-        return (
-            Event.FAILED,
-            users_auth_repo,
-            auth_repo_name,
-            _commits_ret(None, False, False),
-            e,
-            {},
-        )
-    try:
-
-        users_auth_repo = git_updater.users_auth_repo
-        existing_repo = users_auth_repo.is_git_repository_root
-
-        (
-            commits,
-            error_msg,
-            last_validated_commit,
-        ) = _validate_authentication_repository(
-            git_updater,
-            users_auth_repo,
-            out_of_band_authentication,
-            auth_repo_name,
-            expected_repo_type,
-        )
-
-        if error_msg is not None:
-            raise error_msg
-
-        if not only_validate:
-            # fetch the latest commit or clone the repository without checkout
-            # do not merge before targets are validated as well
-            if users_auth_repo.is_git_repository_root:
-                users_auth_repo.fetch(fetch_all=True)
-            else:
-                users_auth_repo.clone()
-
-        # load target repositories and validate them
-        repositoriesdb.load_repositories(
-            users_auth_repo,
-            repo_classes=target_repo_classes,
-            factory=target_factory,
-            library_dir=targets_library_dir,
-            commits=commits,
-            only_load_targets=False,
-            excluded_target_globs=excluded_target_globs,
-        )
-        repositories = repositoriesdb.get_deduplicated_repositories(
-            users_auth_repo, commits[-1::]
-        )
-        repositories_branches_and_commits = (
-            users_auth_repo.sorted_commits_and_branches_per_repositories(
-                commits,
-                default_branch=users_auth_repo.default_branch,
-                excluded_target_globs=excluded_target_globs,
-            )
-        )
-
-        targets_data = _update_target_repositories(
-            repositories,
-            repositories_branches_and_commits,
-            last_validated_commit,
-            only_validate,
-            checkout,
-        )
-    except Exception as e:
-        if not existing_repo:
-            shutil.rmtree(users_auth_repo.path, onerror=on_rm_error)
-            shutil.rmtree(users_auth_repo.conf_dir)
-            commits = None
-        return (
-            Event.FAILED,
-            users_auth_repo,
-            auth_repo_name,
-            _commits_ret(commits, existing_repo, False),
-            e,
-            {},
-        )
-
-    # commits list will always contain the previous top commit of the repository
-    event = Event.CHANGED if len(commits) > 1 else Event.UNCHANGED
-    return (
-        event,
-        users_auth_repo,
+    updater_pipeline = AuthenticationRepositoryUpdatePipeline(
+        url,
+        clients_auth_library_dir,
+        targets_library_dir,
         auth_repo_name,
-        _commits_ret(commits, existing_repo, True),
-        None,
-        targets_data,
+        update_from_filesystem,
+        expected_repo_type,
+        target_repo_classes,
+        target_factory,
+        only_validate,
+        validate_from_commit,
+        conf_directory_root,
+        out_of_band_authentication,
+        checkout,
+        excluded_target_globs,
     )
-
-
-def _update_target_repositories(
-    repositories,
-    repositories_branches_and_commits,
-    last_validated_commit,
-    only_validate,
-    checkout,
-):
-    taf_logger.info("Validating target repositories")
-    # keep track of the repositories which were cloned
-    # so that they can be removed if the update fails
-    cloned_repositories = []
-    allow_unauthenticated = {}
-    new_commits = defaultdict(dict)
-    additional_commits_per_repo = {}
-    top_commits_of_branches_before_pull = {}
-    for path, repository in repositories.items():
-        taf_logger.info("Validating repository {}", repository.name)
-        allow_unauthenticated_for_repo = repository.custom.get(
-            "allow-unauthenticated-commits", False
-        )
-        allow_unauthenticated[path] = allow_unauthenticated_for_repo
-        is_git_repository = repository.is_git_repository_root
-        if not is_git_repository:
-            if only_validate:
-                taf_logger.info(
-                    "Target repositories must already exist when only validating repositories"
-                )
-                continue
-            repository.clone(no_checkout=True)
-            cloned_repositories.append(repository)
-
-        # if no commits were published, repositories_branches_and_commits will be empty
-        # if unauthenticared commits are allowed, we also want to check if there are
-        # new commits which
-        # only check the default branch
-        if (
-            not len(repositories_branches_and_commits[path])
-            and allow_unauthenticated_for_repo
-            and not only_validate
-        ):
-            repositories_branches_and_commits[path][repository.default_branch] = []
-        for branch in repositories_branches_and_commits[path]:
-            taf_logger.info("Validating branch {}", branch)
-            # if last_validated_commit is None or if the target repository didn't exist prior
-            # to calling update, start the update from the beginning
-            # otherwise, for each branch, start with the last validated commit of the local
-            # branch
-            branch_exists = repository.branch_exists(branch, include_remotes=False)
-            if not branch_exists and only_validate:
-                taf_logger.error(
-                    "{} does not contain a local branch named {} and cannot be validated. Please update the repositories",
-                    repository.name,
-                    branch,
-                )
-                return [], {}
-            repo_branch_commits = repositories_branches_and_commits[path][branch]
-            repo_branch_commits = [
-                commit_info["commit"] for commit_info in repo_branch_commits
-            ]
-
-            old_head = _set_target_old_head_and_validate(
-                repository,
-                branch,
-                branch_exists,
-                last_validated_commit,
-                is_git_repository,
-                repo_branch_commits,
-                allow_unauthenticated_for_repo,
-            )
-
-            # the repository was cloned if it didn't exist
-            # if it wasn't cloned, fetch the current branch
-            new_commits_on_repo_branch = _get_commits(
-                repository,
-                is_git_repository,
-                branch,
-                only_validate,
-                old_head,
-                branch_exists,
-                allow_unauthenticated_for_repo,
-            )
-            top_commits_of_branches_before_pull.setdefault(path, {})[branch] = old_head
-            new_commits[path].setdefault(branch, []).extend(new_commits_on_repo_branch)
-            try:
-                additional_commits_on_branch = _update_target_repository(
-                    repository,
-                    new_commits_on_repo_branch,
-                    repo_branch_commits,
-                    allow_unauthenticated_for_repo,
-                    branch,
-                )
-                if len(additional_commits_on_branch):
-                    additional_commits_per_repo.setdefault(repository.name, {})[
-                        branch
-                    ] = additional_commits_on_branch
-
-            except UpdateFailedError as e:
-                taf_logger.error("Update failed due to error: {}", str(e))
-                # delete all repositories that were cloned
-                for repo in cloned_repositories:
-                    taf_logger.debug("Removing cloned repository {}", repo.path)
-                    shutil.rmtree(repo.path, onerror=on_rm_error)
-                # TODO is it important to undo a fetch if the repository was not cloned?
-                raise e
-
-    taf_logger.info("Successfully validated all target repositories.")
-    # do not merge commits if there there are
-    if not only_validate:
-        # if update is successful, merge the commits
-        for path, repository in repositories.items():
-            for branch in repositories_branches_and_commits[path]:
-                branch_commits = repositories_branches_and_commits[path][branch]
-                if not len(branch_commits):
-                    continue
-                _merge_branch_commits(
-                    repository,
-                    branch,
-                    branch_commits,
-                    allow_unauthenticated[path],
-                    additional_commits_per_repo.get(path, {}).get(branch),
-                    new_commits[path][branch],
-                    checkout,
-                )
-    return _set_target_repositories_data(
-        repositories,
-        repositories_branches_and_commits,
-        top_commits_of_branches_before_pull,
-        additional_commits_per_repo,
+    updater_pipeline.run()
+    output = updater_pipeline.output
+    return (
+        output.event,
+        output.users_auth_repo,
+        output.auth_repo_name,
+        output.commits_data,
+        output.error,
+        output.targets_data
     )
 
 
+
 def _update_transient_data(
     transient_data, repos_update_data: Dict[str, str]
 ) -> Dict[str, Any]:
@@ -841,190 +542,6 @@ def _update_transient_data(
     return update_transient_data
 
 
-def _set_target_old_head_and_validate(
-    repository,
-    branch,
-    branch_exists,
-    last_validated_commit,
-    is_git_repository,
-    repo_branch_commits,
-    allow_unauthenticated_for_repo,
-):
-    if (
-        last_validated_commit is None
-        or not is_git_repository
-        or not branch_exists
-        or not len(repo_branch_commits)
-    ):
-        old_head = None
-    else:
-        old_head = repo_branch_commits[0]
-        if not allow_unauthenticated_for_repo:
-            repo_old_head = repository.top_commit_of_branch(branch)
-            # do the same as when checking the top and last_validated_commit of the authentication repository
-            if repo_old_head != old_head:
-                commits_since = repository.all_commits_since_commit(old_head)
-                if repo_old_head not in commits_since:
-                    msg = f"Top commit of repository {repository.name} {repo_old_head} and is not equal to or newer than commit defined in auth repo {old_head}"
-                    taf_logger.error(msg)
-                    raise UpdateFailedError(msg)
-    return old_head
-
-
-def _run_tuf_updater(git_updater):
-    def _init_updater():
-        try:
-            return Updater(
-                git_updater.metadata_dir,
-                "metadata/",
-                git_updater.targets_dir,
-                "targets/",
-                fetcher=git_updater,
-            )
-        except Exception as e:
-            taf_logger.error(f"Failed to instantiate TUF Updater due to error: {e}")
-            raise e
-
-    def _update_tuf_current_revision():
-        current_commit = git_updater.current_commit
-        try:
-            updater.refresh()
-            taf_logger.debug("Validated metadata files at revision {}", current_commit)
-            # using refresh, we have updated all main roles
-            # we still need to update the delegated roles (if there are any)
-            # and validate any target files
-            current_targets = git_updater.get_current_targets()
-            for target_path in current_targets:
-                target_filepath = target_path.replace("\\", "/")
-
-                targetinfo = updater.get_targetinfo(target_filepath)
-                target_data = git_updater.get_current_target_data(
-                    target_filepath, raw=True
-                )
-                targetinfo.verify_length_and_hashes(target_data)
-
-                taf_logger.debug(
-                    "Successfully validated target file {} at {}",
-                    target_filepath,
-                    current_commit,
-                )
-        except Exception as e:
-            metadata_expired = EXPIRED_METADATA_ERROR in type(
-                e
-            ).__name__ or EXPIRED_METADATA_ERROR in str(e)
-            if not metadata_expired or settings.strict:
-                taf_logger.error(
-                    "Validation of authentication repository {} failed at revision {} due to error: {}",
-                    git_updater.users_auth_repo.name,
-                    current_commit,
-                    e,
-                )
-                raise UpdateFailedError(
-                    f"Validation of authentication repository {git_updater.users_auth_repo.name}"
-                    f" failed at revision {current_commit} due to error: {e}"
-                )
-            taf_logger.warning(
-                f"WARNING: Could not validate authentication repository {git_updater.users_auth_repo.name} at revision {current_commit} due to error: {e}"
-            )
-
-    while not git_updater.update_done():
-        updater = _init_updater()
-        _update_tuf_current_revision()
-
-    taf_logger.info(
-        "Successfully validated authentication repository {}",
-        git_updater.users_auth_repo.name,
-    )
-
-
-def _get_commits(
-    repository,
-    existing_repository,
-    branch,
-    only_validate,
-    old_head,
-    branch_exists,
-    allow_unauthenticated_commits,
-):
-    """Returns a list of newly fetched commits belonging to the specified branch."""
-    if existing_repository:
-        repository.fetch(branch=branch)
-
-    if old_head is not None:
-        if not only_validate:
-            fetched_commits = repository.all_commits_on_branch(
-                branch=f"origin/{branch}"
-            )
-
-            # if the local branch does not exist (the branch was not checked out locally)
-            # fetched commits will include already validated commits
-            # check which commits are newer that the previous head commit
-            if old_head in fetched_commits:
-                new_commits_on_repo_branch = fetched_commits[
-                    fetched_commits.index(old_head) + 1 : :
-                ]
-            else:
-                new_commits_on_repo_branch = repository.all_commits_since_commit(
-                    old_head, branch
-                )
-                for commit in fetched_commits:
-                    if commit not in new_commits_on_repo_branch:
-                        new_commits_on_repo_branch.append(commit)
-        else:
-            new_commits_on_repo_branch = repository.all_commits_since_commit(
-                old_head, branch
-            )
-        new_commits_on_repo_branch.insert(0, old_head)
-    else:
-        if branch_exists:
-            # this happens in the case when last_validated_commit does not exist
-            # we want to validate all commits, so combine existing commits and
-            # fetched commits
-            new_commits_on_repo_branch = repository.all_commits_on_branch(
-                branch=branch, reverse=True
-            )
-        else:
-            new_commits_on_repo_branch = []
-        if not only_validate:
-            try:
-                fetched_commits = repository.all_commits_on_branch(
-                    branch=f"origin/{branch}"
-                )
-                # if the local branch does not exist (the branch was not checked out locally)
-                # fetched commits will include already validated commits
-                # check which commits are newer that the previous head commit
-                for commit in fetched_commits:
-                    if commit not in new_commits_on_repo_branch:
-                        new_commits_on_repo_branch.append(commit)
-            except GitError:
-                pass
-    return new_commits_on_repo_branch
-
-
-def _merge_branch_commits(
-    repository,
-    branch,
-    branch_commits,
-    allow_unauthenticated,
-    additional_commits,
-    new_branch_commits,
-    checkout=True,
-):
-    """Determines which commits needs to be merged into the specified branch and
-    merge it.
-    """
-    if additional_commits is not None:
-        allow_unauthenticated = False
-    last_commit = branch_commits[-1]["commit"]
-
-    last_validated_commit = last_commit
-    commit_to_merge = (
-        last_validated_commit if not allow_unauthenticated else new_branch_commits[-1]
-    )
-    taf_logger.info("Merging {} into {}", commit_to_merge, repository.name)
-    _merge_commit(repository, branch, commit_to_merge, checkout)
-
-
 def _merge_commit(repository, branch, commit_to_merge, checkout=True):
     """Merge the specified commit into the given branch and check out the branch.
     If the repository cannot contain unauthenticated commits, check out the merged commit.
@@ -1054,150 +571,6 @@ def _merge_commit(repository, branch, commit_to_merge, checkout=True):
         repository.checkout_branch(branch)
 
 
-def _set_target_repositories_data(
-    repositories,
-    repositories_branches_and_commits,
-    top_commits_of_branches_before_pull,
-    additional_commits_per_repo,
-):
-    targets_data = {}
-    for repo_name, repo in repositories.items():
-        targets_data[repo_name] = {"repo_data": repo.to_json_dict()}
-        commits_data = {}
-        for branch, commits_with_custom in repositories_branches_and_commits[
-            repo_name
-        ].items():
-            branch_commits_data = {}
-            previous_top_of_branch = top_commits_of_branches_before_pull[repo_name][
-                branch
-            ]
-
-            branch_commits_data["before_pull"] = None
-
-            if previous_top_of_branch is not None:
-                # this needs to be the same - implementation error otherwise
-                branch_commits_data["before_pull"] = (
-                    commits_with_custom[0] if len(commits_with_custom) else None
-                )
-
-            branch_commits_data["after_pull"] = (
-                commits_with_custom[-1] if len(commits_with_custom) else None
-            )
-
-            if branch_commits_data["before_pull"] is not None:
-                commits_with_custom.pop(0)
-            branch_commits_data["new"] = commits_with_custom
-            additional_commits = (
-                additional_commits_per_repo[repo_name].get(branch, [])
-                if repo_name in additional_commits_per_repo
-                else []
-            )
-            branch_commits_data["unauthenticated"] = additional_commits
-            commits_data[branch] = branch_commits_data
-        targets_data[repo_name]["commits"] = commits_data
-    return targets_data
-
-
-def _update_target_repository(
-    repository,
-    new_commits,
-    target_commits,
-    allow_unauthenticated,
-    branch,
-):
-    taf_logger.info(
-        "Validating target repository {} {} branch", repository.name, branch
-    )
-    # if authenticated commits are allowed, return a list of all fetched commits which
-    # are newer tham the last authenticated commits
-    additional_commits = []
-    # A new commit might have been pushed after the update process
-    # started and before fetch was called
-    # So, the number of new commits, pushed to the target repository, could
-    # be greater than the number of these commits according to the authentication
-    # repository. The opposite cannot be the case.
-    # In general, if there are additional commits in the target repositories,
-    # the updater will finish the update successfully, but will only update the
-    # target repositories until the latest validated commit
-    if not allow_unauthenticated:
-        update_successful = len(new_commits) >= len(target_commits)
-        if update_successful:
-            for target_commit, repo_commit in zip(target_commits, new_commits):
-                if target_commit != repo_commit:
-                    taf_logger.error(
-                        "Mismatch between commits {} and {}", target_commit, repo_commit
-                    )
-                    update_successful = False
-                    break
-        if len(new_commits) > len(target_commits):
-            additional_commits = new_commits[len(target_commits) :]
-            taf_logger.error(
-                "Found commits {} in repository {} that are not accounted for in the authentication repo. Unauthenticated commits are not allowed in this repo.",
-                additional_commits,
-                repository.name,
-            )
-            update_successful = False
-    else:
-        taf_logger.info(
-            "Unauthenticated commits allowed in repository {}", repository.name
-        )
-        update_successful = False
-        if not len(target_commits):
-            update_successful = True
-            additional_commits = new_commits
-        else:
-            target_commits_index = 0
-            for new_commit_index, commit in enumerate(new_commits):
-                if commit in target_commits:
-                    if commit != target_commits[target_commits_index]:
-                        taf_logger.error(
-                            "Mismatch between commits {} and {}",
-                            commit,
-                            target_commits[target_commits_index],
-                        )
-                        break
-                    else:
-                        target_commits_index += 1
-                if commit == target_commits[-1]:
-                    update_successful = True
-                    if commit != new_commits[-1]:
-                        additional_commits = new_commits[new_commit_index + 1 :]
-                    break
-            if len(additional_commits):
-                taf_logger.warning(
-                    "Found commits {} in repository {} which are newer than the last authenticable commit."
-                    "Repository will be updated up to commit {}",
-                    additional_commits,
-                    repository.name,
-                    commit,
-                )
-
-    if not update_successful:
-        taf_logger.error(
-            "Mismatch between target commits specified in authentication repository and the "
-            "target repository {}",
-            repository.name,
-        )
-        raise UpdateFailedError(
-            "Mismatch between target commits specified in authentication repository"
-            f" and target repository {repository.name} on branch {branch}"
-        )
-    taf_logger.info("Successfully validated {}", repository.name)
-
-    if len(additional_commits):
-        # these commits include all commits newer than last authenticated commit (if unauthenticated commits are allowed)
-        # that does not necessarily mean that the local repository is not up to date with the remote
-        # pull could've been run manually
-        # check where the current local head is
-        branch_current_head = repository.top_commit_of_branch(branch)
-        if branch_current_head in additional_commits:
-            additional_commits = additional_commits[
-                additional_commits.index(branch_current_head) + 1 :
-            ]
-
-    return additional_commits
-
-
 @timed_run("Validating repository")
 def validate_repository(
     clients_auth_path,
@@ -1244,54 +617,3 @@ def validate_repository(
     settings.last_validated_commit = None
 
 
-def _validate_authentication_repository(
-    repository_updater,
-    users_auth_repo,
-    out_of_band_authentication,
-    auth_repo_name,
-    expected_repo_type,
-):
-    error_msg = None
-    # this is the repository cloned inside the temp directory
-    # we validate it before updating the actual authentication repository
-    validation_auth_repo = repository_updater.validation_auth_repo
-    commits = repository_updater.commits
-    if (
-        out_of_band_authentication is not None
-        and users_auth_repo.last_validated_commit is None
-        and commits[0] != out_of_band_authentication
-    ):
-        error_msg = UpdateFailedError(
-            f"First commit of repository {auth_repo_name} does not match "
-            "out of band authentication commit"
-        )
-    # used for testing purposes
-    if settings.overwrite_last_validated_commit:
-        last_validated_commit = settings.last_validated_commit
-    else:
-        last_validated_commit = users_auth_repo.last_validated_commit
-
-    if expected_repo_type != UpdateType.EITHER:
-        # check if the repository being updated is a test repository
-        if validation_auth_repo.is_test_repo and expected_repo_type != UpdateType.TEST:
-            error_msg = UpdateFailedError(
-                f"Repository {users_auth_repo.name} is a test repository. "
-                'Call update with "--expected-repo-type" test to update a test '
-                "repository"
-            )
-        elif (
-            not validation_auth_repo.is_test_repo
-            and expected_repo_type == UpdateType.TEST
-        ):
-            error_msg = UpdateFailedError(
-                f"Repository {users_auth_repo.name} is not a test repository,"
-                ' but update was called with the "--expected-repo-type" test'
-            )
-    # always cleanup repository updater
-    repository_updater.cleanup()
-
-    return (
-        commits,
-        error_msg,
-        last_validated_commit,
-    )
diff --git a/taf/updater/updater_pipeline.py b/taf/updater/updater_pipeline.py
index c5f2adda..b8838345 100644
--- a/taf/updater/updater_pipeline.py
+++ b/taf/updater/updater_pipeline.py
@@ -86,15 +86,16 @@ def run(self):
                     break
 
             except Exception as e:
-                self._handle_error(e)
+                self.handle_error(e)
                 break
 
-            self._set_output()
+        self.set_output()
 
-    def _handle_error(self, e):
-        pass
+    def handle_error(self, e):
+        taf_logger.error("An error occurred while running the updater: {}", str(e))
+        raise e
 
-    def _set_output(self):
+    def set_output(self):
         pass
 
 class AuthenticationRepositoryUpdatePipeline(Pipeline):
@@ -118,7 +119,8 @@ def __init__(
             self.get_target_repositories_commits,
             self.validate_target_repositories,
             self.set_additional_commits_of_target_repositories,
-            self.merge_branch_commits
+            self.merge_commits,
+            self.set_target_repositories_data
         ])
 
         self.url = url
@@ -469,6 +471,9 @@ def _validate_current_repo_commit(
             target_commits_from_target_repo,
             current_auth_commit,
         ):
+        # TODO there is an error with fetched target commits when there is an unauthenticated commit
+        # at the end of branch of a repo that does not support unauthenticated commits
+
         current_commit_from_auth_repo = current_targets_data_from_auth_repo["commit"]
         branch = current_targets_data_from_auth_repo["branch"]
         target_commits_from_target_repos_on_branch = target_commits_from_target_repo[branch]
@@ -484,22 +489,23 @@ def _validate_current_repo_commit(
 
         if current_target_commit is None:
             # there are commits missing from the target repository
-            commit_date = users_auth_repo.get_commit_date()
+            commit_date = users_auth_repo.get_commit_date(current_auth_commit)
             raise UpdateFailedError(
-                f"Failure to validate {users_auth_repo.name} commit {current_auth_commit} committed on {commit_date}:\
-                    data repository {repository.name} was supposed to be at commit {current_commit_from_auth_repo} \
-                    but commit not on branch {branch}"
+                f"Failure to validate {users_auth_repo.name} commit {current_auth_commit} committed on {commit_date}: \
+data repository {repository.name} was supposed to be at commit {current_commit_from_auth_repo} \
+but commit not on branch {branch}"
             )
 
         if current_commit_from_auth_repo == current_target_commit:
             self.state.last_validated_commits_per_target_repos_branches[repository.name][branch] = current_target_commit
             return current_target_commit
         if not _is_unauthenticated_allowed(repository):
-            commit_date = users_auth_repo.get_commit_date()
+            commit_date = users_auth_repo.get_commit_date(current_auth_commit)
+            # TODO check this, different errors
             raise UpdateFailedError(
                 f"Failure to validate {users_auth_repo.name} commit {current_auth_commit} committed on {commit_date}:\
-                    data repository {repository.name} was supposed to be at commit {current_commit_from_auth_repo} \
-                    but repo was at {last_validated_target_commit}"
+data repository {repository.name} was supposed to be at commit {current_commit_from_auth_repo} \
+but repo was at {current_target_commit}"
             )
         # unauthenticated commits are allowed, try to skip them
         # if commits of the target repositories were swapped, commit which is expected to be found
@@ -513,8 +519,8 @@ def _validate_current_repo_commit(
             taf_logger.debug(f"{repository.name}: skipping target commit {target_commit}. Looking for commit {current_commit_from_auth_repo}")
         raise UpdateFailedError(
             f"Failure to validate {users_auth_repo.name} commit {current_auth_commit} committed on {commit_date}:\
-                data repository {repository.name} was supposed to be at commit {current_commit_from_auth_repo} \
-                but commit not on branch {branch}"
+data repository {repository.name} was supposed to be at commit {current_commit_from_auth_repo} \
+but commit not on branch {branch}"
         )
 
     @log_on_start(DEBUG, "Setting additional commits of target repositories", logger=taf_logger)
@@ -572,59 +578,42 @@ def merge_commits(self):
             return False
 
     def set_target_repositories_data(self):
-        # targets_data = {}
-        # for repo_name, repo in repositories.items():
-        #     targets_data[repo_name] = {"repo_data": repo.to_json_dict()}
-        #     commits_data = {}
-        #     for branch, commits_with_custom in repositories_branches_and_commits[
-        #         repo_name
-        #     ].items():
-        #         branch_commits_data = {}
-        #         previous_top_of_branch = top_commits_of_branches_before_pull[repo_name][
-        #             branch
-        #         ]
-
-        #         branch_commits_data["before_pull"] = None
-
-        #         if previous_top_of_branch is not None:
-        #             # this needs to be the same - implementation error otherwise
-        #             branch_commits_data["before_pull"] = (
-        #                 commits_with_custom[0] if len(commits_with_custom) else None
-        #             )
-
-        #         branch_commits_data["after_pull"] = (
-        #             commits_with_custom[-1] if len(commits_with_custom) else None
-        #         )
-
-        #         if branch_commits_data["before_pull"] is not None:
-        #             commits_with_custom.pop(0)
-        #         branch_commits_data["new"] = commits_with_custom
-        #         additional_commits = (
-        #             additional_commits_per_repo[repo_name].get(branch, [])
-        #             if repo_name in additional_commits_per_repo
-        #             else []
-        #         )
-        #         branch_commits_data["unauthenticated"] = additional_commits
-        #         commits_data[branch] = branch_commits_data
-        #     targets_data[repo_name]["commits"] = commits_data
-        # return targets_data
-        pass
-
+        try:
+            targets_data = {}
+            for repo_name, repo in self.state.target_repositories.items():
+                targets_data[repo_name] = {"repo_data": repo.to_json_dict()}
+                commits_data = self.state.targets_data_by_auth_commits[repo_name]
 
+                branch_data = defaultdict(dict)
 
-    def merge_branch_commits(self):
-        # Implement logic
-        # Update self.state as necessary
-        pass
+                # Iterate through auth_commits in the specified order
+                for auth_commit in self.state.auth_commits_since_last_validated:
+                    commit_info = commits_data.get(auth_commit)
+                    if not commit_info or "branch" not in commit_info:
+                        continue
 
+                    branch = commit_info.pop("branch")
 
+                    # Update the before_pull, after_pull, and new values
+                    if branch not in branch_data:
+                        old_head = self.state.old_heads_per_target_repos_branches.get(repo_name, {}).get(branch)
+                        if old_head is not None:
+                            branch_data[branch]["before_pull"] = old_head
+                            branch_data[branch]["new"] = []
+                            branch_data[branch]["unauthenticated"] = self.state.additional_commits_per_target_repos_branches.get(repo_name, {}).get(branch, [])
+                    else:
+                        branch_data[branch]["new"].append(commit_info)
+                        branch_data[branch]["after_pull"] = commit_info
 
-    def handle_error(self, error):
-        # Centralized error handling
-        print(f"Error encountered: {error}")
+                targets_data[repo_name]["commits"] = branch_data
+            self.state.targets_data = targets_data
+        except Exception as e:
+            self.state.error = e
+            self.state.event = Event.FAILED
+            return False
 
 
-    def _set_output(self):
+    def set_output(self):
         if self.state.auth_commits_since_last_validated is None:
             commit_before_pull = None
             new_commits = []
@@ -777,11 +766,11 @@ def _find_next_value(value, values_list):
         pass  # value not in list
     return None
 
-def _merge_commit(self, repository, branch, commit_to_merge, checkout=True):
+def _merge_commit(repository, branch, commit_to_merge, checkout=True):
     """Merge the specified commit into the given branch and check out the branch.
     If the repository cannot contain unauthenticated commits, check out the merged commit.
     """
-    taf_logger.info("Merging commit {} into {}", commit_to_merge, repository.name)
+    taf_logger.info("{} Merging commit {} into branch {}", repository.name, commit_to_merge, branch)
     try:
         repository.checkout_branch(branch, raise_anyway=True)
     except GitError as e:

From c3fa1ebb7594136344e1da707532eea5718ba193 Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Wed, 29 Nov 2023 22:21:49 +0100
Subject: [PATCH 10/28] fix: updater pipeline - iterate through target commits
 properly

---
 taf/updater/updater_pipeline.py | 43 +++++++++++++++++++++------------
 1 file changed, 28 insertions(+), 15 deletions(-)

diff --git a/taf/updater/updater_pipeline.py b/taf/updater/updater_pipeline.py
index b8838345..29983a29 100644
--- a/taf/updater/updater_pipeline.py
+++ b/taf/updater/updater_pipeline.py
@@ -110,12 +110,11 @@ def __init__(
 
         super().__init__(steps=[
             self.clone_remote_and_run_tuf_updater,
-            self.validate_out_of_band_and_update_type,
             self.clone_or_fetch_users_auth_repo,
             self.load_target_repositories,
             self.clone_target_repositories_if_not_on_disk,
             self.get_targets_data_from_auth_repo,
-            self.validate_target_repositories_initial_state,
+            self.validate_target_repositories_initial_state_and_find_current_branch_head_commits,
             self.get_target_repositories_commits,
             self.validate_target_repositories,
             self.set_additional_commits_of_target_repositories,
@@ -153,6 +152,7 @@ def output(self):
     def clone_remote_and_run_tuf_updater(self):
         settings.update_from_filesystem = self.update_from_filesystem
         settings.conf_directory_root = self.conf_directory_root
+        git_updater = None
         try:
             self.state.auth_commits_since_last_validated = None
             self.state.existing_repo = (
@@ -168,6 +168,9 @@ def clone_remote_and_run_tuf_updater(self):
             _run_tuf_updater(git_updater)
             self.state.existing_repo = self.state.users_auth_repo.is_git_repository_root
             self.state.validation_auth_repo = git_updater.validation_auth_repo
+            self._validate_out_of_band_and_update_type()
+
+
             self.state.auth_commits_since_last_validated = list(git_updater.commits)
             self.state.event = Event.CHANGED if len(self.state.auth_commits_since_last_validated) > 1 else Event.UNCHANGED
 
@@ -176,8 +179,7 @@ def clone_remote_and_run_tuf_updater(self):
                 self.state.last_validated_commit = settings.last_validated_commit
             else:
                 self.state.last_validated_commit = self.state.users_auth_repo.last_validated_commit
-            # always clean up repository updater
-            git_updater.cleanup()
+
             return True
 
         except Exception as e:
@@ -193,17 +195,20 @@ def clone_remote_and_run_tuf_updater(self):
                 )
             self.state.event = Event.FAILED
             return False
+        finally:
+            # always clean up repository updater
+            if git_updater is not None:
+                git_updater.cleanup()
 
-    @cleanup_decorator
     @log_on_start(INFO, "Validating out of band commit and update type", logger=taf_logger)
-    def validate_out_of_band_and_update_type(self):
+    def _validate_out_of_band_and_update_type(self):
         try:
             # this is the repository cloned inside the temp directory
             # we validate it before updating the actual authentication repository
             if (
                 self.out_of_band_authentication is not None
                 and self.state.users_auth_repo.last_validated_commit is None
-                and self.auth_commits_since_last_validated[0] != self.out_of_band_authentication
+                and self.state.auth_commits_since_last_validated[0] != self.out_of_band_authentication
             ):
                 raise UpdateFailedError(
                     f"First commit of repository {self.state.auth_repo_name} does not match "
@@ -220,7 +225,7 @@ def validate_out_of_band_and_update_type(self):
                     )
                 elif (
                     not self.state.validation_auth_repo.is_test_repo
-                    and self.state.expected_repo_type == UpdateType.TEST
+                    and self.expected_repo_type == UpdateType.TEST
                 ):
                     raise UpdateFailedError(
                         f"Repository {self.state.users_auth_repo.name} is not a test repository,"
@@ -309,10 +314,15 @@ def get_targets_data_from_auth_repo(self):
 
     @log_on_start(INFO, "Validating initial state of target repositories...", logger=taf_logger)
     @log_on_end(INFO, "Validation of the initial state of target repositories finished", logger=taf_logger)
-    def validate_target_repositories_initial_state(self):
+    def validate_target_repositories_initial_state_and_find_current_branch_head_commits(self):
         try:
+
             self.state.old_heads_per_target_repos_branches = defaultdict(dict)
             for repository in self.state.target_repositories.values():
+                # TODO
+                # self.state.targets_data_by_auth_commits
+                # target_branches_data_from_auth_repo does this contain commits
+
                 for branch in self.state.target_branches_data_from_auth_repo[repository.name]:
                     # if last_validated_commit is None or if the target repository didn't exist prior
                     # to calling update, start the update from the beginning
@@ -324,17 +334,18 @@ def validate_target_repositories_initial_state(self):
                         taf_logger.error(msg)
                         raise UpdateFailedError(msg)
 
-                    # TODO
-                    repo_branch_commits = []
+
+
+                    first_commit_on_branch_to_validate = self.state.targets_data_by_auth_commits[repository.name].get(self.state.last_validated_commit, {}).get("commit")
                     if (
                         self.state.last_validated_commit is None
                         or not repository.is_git_repository_root
                         or not branch_exists
-                        or not len(repo_branch_commits)
+                        or not first_commit_on_branch_to_validate
                     ):
                         old_head = None
                     else:
-                        old_head = repo_branch_commits[0]
+                        old_head = first_commit_on_branch_to_validate
                         if not _is_unauthenticated_allowed(repository):
                             repo_old_head = repository.top_commit_of_branch(branch)
                             # do the same as when checking the top and last_validated_commit of the authentication repository
@@ -424,13 +435,16 @@ def validate_target_repositories(self):
         Merge last valid commits at the end of the update
         """
         try:
+            # need to be set to old head since that is the last validated target
             last_validated_target_commits_per_repositories = {}
             self.state.last_validated_commits_per_target_repos_branches = defaultdict(dict)
             for auth_commit in self.state.auth_commits_since_last_validated:
                 for repository in self.state.target_repositories.values():
-                    if auth_commit not in  self.state.targets_data_by_auth_commits[repository.name]:
+                    if auth_commit not in self.state.targets_data_by_auth_commits[repository.name]:
                         continue
                     last_validated_target_commit = last_validated_target_commits_per_repositories.get(repository.name)
+                    if last_validated_target_commit is None:
+                        last_validated_target_commit = self.state.targets_data_by_auth_commits[repository.name].get(auth_commit, {}).get("commit")
                     current_targets_data_from_auth_repo = self.state.targets_data_by_auth_commits[repository.name][auth_commit]
                     target_commits_from_target_repo = self.state.fetched_commits_per_target_repos_branches[repository.name]
                     validated_commit = self._validate_current_repo_commit(
@@ -473,7 +487,6 @@ def _validate_current_repo_commit(
         ):
         # TODO there is an error with fetched target commits when there is an unauthenticated commit
         # at the end of branch of a repo that does not support unauthenticated commits
-
         current_commit_from_auth_repo = current_targets_data_from_auth_repo["commit"]
         branch = current_targets_data_from_auth_repo["branch"]
         target_commits_from_target_repos_on_branch = target_commits_from_target_repo[branch]

From 320728e481a686ecf26309f8703f1f8c36e506a6 Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Thu, 30 Nov 2023 20:29:15 +0100
Subject: [PATCH 11/28] feat: updater - work on adding support for partial
 update

---
 taf/updater/lifecycle_handlers.py |   1 +
 taf/updater/updater_pipeline.py   | 135 ++++++++++++++++++------------
 2 files changed, 81 insertions(+), 55 deletions(-)

diff --git a/taf/updater/lifecycle_handlers.py b/taf/updater/lifecycle_handlers.py
index 86eea967..ba5ec504 100644
--- a/taf/updater/lifecycle_handlers.py
+++ b/taf/updater/lifecycle_handlers.py
@@ -29,6 +29,7 @@ class Event(enum.Enum):
     CHANGED = "changed"
     UNCHANGED = "unchanged"
     FAILED = "failed"
+    PARTIAL = "partial"
     COMPLETED = "completed"
 
 
diff --git a/taf/updater/updater_pipeline.py b/taf/updater/updater_pipeline.py
index 29983a29..bcba2bd0 100644
--- a/taf/updater/updater_pipeline.py
+++ b/taf/updater/updater_pipeline.py
@@ -1,4 +1,5 @@
 from collections import defaultdict
+from enum import Enum
 import functools
 from logging import DEBUG, ERROR, INFO
 from pathlib import Path
@@ -28,10 +29,17 @@
 INFO_JSON_PATH = f"{TARGETS_DIRECTORY_NAME}/{PROTECTED_DIRECTORY_NAME}/info.json"
 
 
+class UpdateStatus(Enum):
+    SUCCESS = 1
+    PARTIAL = 2
+    FAILED = 3
+
+
 @define
 class UpdateState:
     auth_commits_since_last_validated: List[Any] = field(factory=list)
     existing_repo: bool = field(default=False)
+    update_status: UpdateStatus = field(default=None)
     update_successful: bool = field(default=False)
     event: Optional[str] = field(default=None)
     users_auth_repo: Optional["AuthenticationRepository"] = field(default=None)
@@ -48,6 +56,7 @@ class UpdateState:
     fetched_commits_per_target_repos_branches: Dict[str, Dict[str, List[str]]] = field(factory=dict)
     last_validated_commits_per_target_repos_branches: Dict[str, Dict[str, str]] = field(factory=dict)
     additional_commits_per_target_repos_branches: Dict[str, Dict[str, List[str]]] = field(factory=dict)
+    validated_auth_commits: List[str] = field(factory=list)
 
 
 @attrs
@@ -81,8 +90,8 @@ def __init__(self, steps):
     def run(self):
         for step in self.steps:
             try:
-                should_continue = step()
-                if not should_continue:
+                update_status = step()
+                if update_status == UpdateStatus.FAILED:
                     break
 
             except Exception as e:
@@ -158,7 +167,7 @@ def clone_remote_and_run_tuf_updater(self):
             self.state.existing_repo = (
                 Path(self.clients_auth_library_dir, self.state.auth_repo_name).exists()
                 if self.state.auth_repo_name is not None
-                else True
+                else UpdateStatus.SUCCESS
             )
 
             # Clone the validation repository in temp.
@@ -180,7 +189,7 @@ def clone_remote_and_run_tuf_updater(self):
             else:
                 self.state.last_validated_commit = self.state.users_auth_repo.last_validated_commit
 
-            return True
+            return UpdateStatus.SUCCESS
 
         except Exception as e:
             self.state.error = e
@@ -194,7 +203,7 @@ def clone_remote_and_run_tuf_updater(self):
                     conf_directory_root=self.conf_directory_root,
                 )
             self.state.event = Event.FAILED
-            return False
+            return UpdateStatus.FAILED
         finally:
             # always clean up repository updater
             if git_updater is not None:
@@ -231,12 +240,12 @@ def _validate_out_of_band_and_update_type(self):
                         f"Repository {self.state.users_auth_repo.name} is not a test repository,"
                         ' but update was called with the "--expected-repo-type" test'
                     )
-            return True
+            return UpdateStatus.SUCCESS
         except Exception as e:
             self.state.error = e
             self.state.event = Event.FAILED
             taf_logger.error(e)
-            return False
+            return UpdateStatus.FAILED
 
     @log_on_start(INFO, "Cloning or updating user's authentication repository...", logger=taf_logger)
     def clone_or_fetch_users_auth_repo(self):
@@ -252,8 +261,8 @@ def clone_or_fetch_users_auth_repo(self):
                 self.state.error = e
                 self.state.event = Event.FAILED
                 taf_logger.error(e)
-                return False
-        return True
+                return UpdateStatus.FAILED
+        return UpdateStatus.SUCCESS
 
     @log_on_start(DEBUG, "Loading target repositories", logger=taf_logger)
     def load_target_repositories(self):
@@ -270,12 +279,12 @@ def load_target_repositories(self):
             self.state.target_repositories = repositoriesdb.get_deduplicated_repositories(
                 self.state.users_auth_repo, self.state.auth_commits_since_last_validated[-1::]
             )
-            return True
+            return UpdateStatus.SUCCESS
         except Exception as e:
             self.state.error = e
             self.state.event = Event.FAILED
             taf_logger.error(e)
-            return False
+            return UpdateStatus.FAILED
 
     @log_on_start(INFO, "Cloning target repositories which are not on disk...", logger=taf_logger)
     @log_on_start(INFO, "Finished cloning target repositories", logger=taf_logger)
@@ -292,12 +301,12 @@ def clone_target_repositories_if_not_on_disk(self):
                         continue
                     repository.clone(no_checkout=True)
                     self.state.cloned_target_repositories.append(repository)
-            return True
+            return UpdateStatus.SUCCESS
         except Exception as e:
             self.state.error = e
             self.state.event = Event.FAILED
             taf_logger.error(e)
-            return False
+            return UpdateStatus.FAILED
 
 
     def get_targets_data_from_auth_repo(self):
@@ -309,7 +318,7 @@ def get_targets_data_from_auth_repo(self):
                 branches.add(commit_data["branch"])
             repo_branches[repo_name] = sorted(list(branches))
         self.state.target_branches_data_from_auth_repo = repo_branches
-        return True
+        return UpdateStatus.SUCCESS
 
 
     @log_on_start(INFO, "Validating initial state of target repositories...", logger=taf_logger)
@@ -319,9 +328,6 @@ def validate_target_repositories_initial_state_and_find_current_branch_head_comm
 
             self.state.old_heads_per_target_repos_branches = defaultdict(dict)
             for repository in self.state.target_repositories.values():
-                # TODO
-                # self.state.targets_data_by_auth_commits
-                # target_branches_data_from_auth_repo does this contain commits
 
                 for branch in self.state.target_branches_data_from_auth_repo[repository.name]:
                     # if last_validated_commit is None or if the target repository didn't exist prior
@@ -334,8 +340,6 @@ def validate_target_repositories_initial_state_and_find_current_branch_head_comm
                         taf_logger.error(msg)
                         raise UpdateFailedError(msg)
 
-
-
                     first_commit_on_branch_to_validate = self.state.targets_data_by_auth_commits[repository.name].get(self.state.last_validated_commit, {}).get("commit")
                     if (
                         self.state.last_validated_commit is None
@@ -356,13 +360,12 @@ def validate_target_repositories_initial_state_and_find_current_branch_head_comm
                                     taf_logger.error(msg)
                                     raise UpdateFailedError(msg)
                     self.state.old_heads_per_target_repos_branches[repository.name][branch] = old_head
-            return True
+            return UpdateStatus.SUCCESS
         except Exception as e:
             self.state.error = e
             self.state.event = Event.FAILED
             taf_logger.error(e)
-            # TODO remove cloned?
-            return False
+            return UpdateStatus.FAILED
 
     @log_on_start(DEBUG, "Getting fetched commits of target repositories", logger=taf_logger)
     def get_target_repositories_commits(self):
@@ -425,7 +428,7 @@ def get_target_repositories_commits(self):
                         except GitError:
                             pass
                 self.state.fetched_commits_per_target_repos_branches[repository.name][branch] = fetched_commits_on_target_repo_branch
-        return True
+        return UpdateStatus.SUCCESS
 
     @log_on_start(INFO, "Validating target repositories...", logger=taf_logger)
     @log_on_end(INFO, "Validation of target repositories finished", logger=taf_logger)
@@ -436,34 +439,55 @@ def validate_target_repositories(self):
         """
         try:
             # need to be set to old head since that is the last validated target
-            last_validated_target_commits_per_repositories = {}
             self.state.last_validated_commits_per_target_repos_branches = defaultdict(dict)
+
+            last_validated_data_per_repositories = defaultdict(dict)
+            self.state.validated_auth_commits = []
             for auth_commit in self.state.auth_commits_since_last_validated:
                 for repository in self.state.target_repositories.values():
                     if auth_commit not in self.state.targets_data_by_auth_commits[repository.name]:
                         continue
-                    last_validated_target_commit = last_validated_target_commits_per_repositories.get(repository.name)
-                    if last_validated_target_commit is None:
-                        last_validated_target_commit = self.state.targets_data_by_auth_commits[repository.name].get(auth_commit, {}).get("commit")
-                    current_targets_data_from_auth_repo = self.state.targets_data_by_auth_commits[repository.name][auth_commit]
+                    current_targets_data = self.state.targets_data_by_auth_commits[repository.name][auth_commit]
+
+                    current_branch = current_targets_data.get("branch", repository.default_branch)
+                    current_commit = current_targets_data["commit"]
+
+                    if not len(last_validated_data_per_repositories[repository.name]):
+                        current_head_commit_and_branch = self.state.targets_data_by_auth_commits[repository.name].get(self.state.last_validated_commit, {})
+                        previous_branch = current_head_commit_and_branch.get("branch")
+                        previous_commit = current_head_commit_and_branch.get("commit")
+                        if previous_commit is not None and previous_branch is None:
+                            previous_branch = repository.default_branch
+                    else:
+                        previous_branch = last_validated_data_per_repositories[repository.name].get("branch")
+                        previous_commit = last_validated_data_per_repositories[repository.name]["commit"]
+
                     target_commits_from_target_repo = self.state.fetched_commits_per_target_repos_branches[repository.name]
                     validated_commit = self._validate_current_repo_commit(
                         repository,
                         self.state.users_auth_repo,
-                        last_validated_target_commit,
-                        current_targets_data_from_auth_repo,
+                        previous_branch,
+                        previous_commit,
+                        current_branch,
+                        current_commit,
                         target_commits_from_target_repo,
                         auth_commit)
 
-                    last_validated_target_commits_per_repositories[repository.name] = validated_commit
-            return True
+                    last_validated_data_per_repositories[repository.name] = {"commit": validated_commit, "branch": current_branch}
+                    self.state.last_validated_commits_per_target_repos_branches[repository.name][current_branch] = validated_commit
+
+                # commit processed without an error
+                self.state.validated_auth_commits.append(auth_commit)
+            return UpdateStatus.SUCCESS
         # TODO merge until the last validated
         # if at some point validation fails
         except Exception as e:
             self.state.error = e
+            if len(self.state.validated_auth_commits):
+                self.state.event = Event.PARTIAL
+                return UpdateStatus.PARTIAL
             self.state.event = Event.FAILED
-            return False
-
+            return UpdateStatus.FAILED
 
         # self.state.even = Event.CHANGED if len(self.state.auth_commits_since_last_validated) > 1 else Event.UNCHANGED
         # TODO set targets_data
@@ -480,22 +504,23 @@ def _validate_current_repo_commit(
             self,
             repository,
             users_auth_repo,
-            last_validated_target_commit,
-            current_targets_data_from_auth_repo,
+            previous_branch,
+            previous_commit,
+            current_branch,
+            current_commit,
             target_commits_from_target_repo,
             current_auth_commit,
         ):
         # TODO there is an error with fetched target commits when there is an unauthenticated commit
         # at the end of branch of a repo that does not support unauthenticated commits
-        current_commit_from_auth_repo = current_targets_data_from_auth_repo["commit"]
-        branch = current_targets_data_from_auth_repo["branch"]
-        target_commits_from_target_repos_on_branch = target_commits_from_target_repo[branch]
-        if last_validated_target_commit == current_commit_from_auth_repo:
+
+        target_commits_from_target_repos_on_branch = target_commits_from_target_repo[current_branch]
+        if previous_commit == current_commit:
             # target not updated in this revision
-            return last_validated_target_commit
-        if last_validated_target_commit is not None and last_validated_target_commit in target_commits_from_target_repos_on_branch:
+            return current_commit
+        if previous_branch == current_branch:
             # same branch
-            current_target_commit = _find_next_value(last_validated_target_commit, target_commits_from_target_repos_on_branch)
+            current_target_commit = _find_next_value(previous_commit, target_commits_from_target_repos_on_branch)
         else:
             # next branch
             current_target_commit = target_commits_from_target_repos_on_branch[0]
@@ -505,19 +530,19 @@ def _validate_current_repo_commit(
             commit_date = users_auth_repo.get_commit_date(current_auth_commit)
             raise UpdateFailedError(
                 f"Failure to validate {users_auth_repo.name} commit {current_auth_commit} committed on {commit_date}: \
-data repository {repository.name} was supposed to be at commit {current_commit_from_auth_repo} \
-but commit not on branch {branch}"
+data repository {repository.name} was supposed to be at commit {current_commit} \
+but commit not on branch {current_branch}"
             )
 
-        if current_commit_from_auth_repo == current_target_commit:
-            self.state.last_validated_commits_per_target_repos_branches[repository.name][branch] = current_target_commit
+        if current_commit == current_target_commit:
+            self.state.last_validated_commits_per_target_repos_branches[repository.name][current_branch] = current_target_commit
             return current_target_commit
         if not _is_unauthenticated_allowed(repository):
             commit_date = users_auth_repo.get_commit_date(current_auth_commit)
             # TODO check this, different errors
             raise UpdateFailedError(
-                f"Failure to validate {users_auth_repo.name} commit {current_auth_commit} committed on {commit_date}:\
-data repository {repository.name} was supposed to be at commit {current_commit_from_auth_repo} \
+                f"Failure to validate {users_auth_repo.name} commit {current_auth_commit} committed on {commit_date}: \
+data repository {repository.name} was supposed to be at commit {current_commit} \
 but repo was at {current_target_commit}"
             )
         # unauthenticated commits are allowed, try to skip them
@@ -526,14 +551,14 @@ def _validate_current_repo_commit(
         remaining_commits = target_commits_from_target_repos_on_branch[
             target_commits_from_target_repos_on_branch.index(current_target_commit):]
         for target_commit in remaining_commits:
-            if current_commit_from_auth_repo == target_commit:
-                self.state.last_validated_commits_per_target_repos_branches[repository.name][branch] = target_commit
+            if current_commit == target_commit:
+                self.state.last_validated_commits_per_target_repos_branches[repository.name][current_branch] = target_commit
                 return target_commit
-            taf_logger.debug(f"{repository.name}: skipping target commit {target_commit}. Looking for commit {current_commit_from_auth_repo}")
+            taf_logger.debug(f"{repository.name}: skipping target commit {target_commit}. Looking for commit {current_commit}")
         raise UpdateFailedError(
-            f"Failure to validate {users_auth_repo.name} commit {current_auth_commit} committed on {commit_date}:\
-data repository {repository.name} was supposed to be at commit {current_commit_from_auth_repo} \
-but commit not on branch {branch}"
+            f"Failure to validate {users_auth_repo.name} commit {current_auth_commit} committed on {commit_date}: \
+data repository {repository.name} was supposed to be at commit {current_commit} \
+but commit not on branch {current_branch}"
         )
 
     @log_on_start(DEBUG, "Setting additional commits of target repositories", logger=taf_logger)

From 3930ef2ce327af11a7e8fe0cd8f67ea628b89238 Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Fri, 1 Dec 2023 20:37:27 +0100
Subject: [PATCH 12/28] feat: updater - raise an error in case of additional
 unauthenticated commits and pipeline fixes

---
 taf/updater/updater_pipeline.py | 81 ++++++++++++++++++---------------
 1 file changed, 45 insertions(+), 36 deletions(-)

diff --git a/taf/updater/updater_pipeline.py b/taf/updater/updater_pipeline.py
index bcba2bd0..51b699d5 100644
--- a/taf/updater/updater_pipeline.py
+++ b/taf/updater/updater_pipeline.py
@@ -54,7 +54,7 @@ class UpdateState:
     targets_data_by_auth_commits: Dict = field(factory=dict)
     old_heads_per_target_repos_branches: Dict[str, Dict[str, str]] = field(factory=dict)
     fetched_commits_per_target_repos_branches: Dict[str, Dict[str, List[str]]] = field(factory=dict)
-    last_validated_commits_per_target_repos_branches: Dict[str, Dict[str, str]] = field(factory=dict)
+    validated_commits_per_target_repos_branches: Dict[str, Dict[str, str]] = field(factory=dict)
     additional_commits_per_target_repos_branches: Dict[str, Dict[str, List[str]]] = field(factory=dict)
     validated_auth_commits: List[str] = field(factory=list)
 
@@ -93,6 +93,7 @@ def run(self):
                 update_status = step()
                 if update_status == UpdateStatus.FAILED:
                     break
+                self.state.update_status = update_status
 
             except Exception as e:
                 self.handle_error(e)
@@ -126,7 +127,7 @@ def __init__(
             self.validate_target_repositories_initial_state_and_find_current_branch_head_commits,
             self.get_target_repositories_commits,
             self.validate_target_repositories,
-            self.set_additional_commits_of_target_repositories,
+            self.validate_and_set_additional_commits_of_target_repositories,
             self.merge_commits,
             self.set_target_repositories_data
         ])
@@ -439,7 +440,7 @@ def validate_target_repositories(self):
         """
         try:
             # need to be set to old head since that is the last validated target
-            self.state.last_validated_commits_per_target_repos_branches = defaultdict(dict)
+            self.state.validated_commits_per_target_repos_branches = defaultdict(dict)
 
             last_validated_data_per_repositories = defaultdict(dict)
             self.state.validated_auth_commits = []
@@ -474,7 +475,8 @@ def validate_target_repositories(self):
                         auth_commit)
 
                     last_validated_data_per_repositories[repository.name] = {"commit": validated_commit, "branch": current_branch}
-                    self.state.last_validated_commits_per_target_repos_branches[repository.name][current_branch] = validated_commit
+
+                    self.state.validated_commits_per_target_repos_branches[repository.name].setdefault(current_branch, []).append(validated_commit)
 
                 # commit processed without an error
                 self.state.validated_auth_commits.append(auth_commit)
@@ -489,16 +491,6 @@ def validate_target_repositories(self):
             self.state.event = Event.FAILED
             return UpdateStatus.FAILED
 
-        # self.state.even = Event.CHANGED if len(self.state.auth_commits_since_last_validated) > 1 else Event.UNCHANGED
-        # TODO set targets_data
-        # return (
-        #     event,
-        #     users_auth_repo,
-        #     auth_repo_name,
-        #     _commits_ret(commits, existing_repo, True),
-        #     None,
-        #     targets_data,
-        # )
 
     def _validate_current_repo_commit(
             self,
@@ -535,7 +527,6 @@ def _validate_current_repo_commit(
             )
 
         if current_commit == current_target_commit:
-            self.state.last_validated_commits_per_target_repos_branches[repository.name][current_branch] = current_target_commit
             return current_target_commit
         if not _is_unauthenticated_allowed(repository):
             commit_date = users_auth_repo.get_commit_date(current_auth_commit)
@@ -552,7 +543,6 @@ def _validate_current_repo_commit(
             target_commits_from_target_repos_on_branch.index(current_target_commit):]
         for target_commit in remaining_commits:
             if current_commit == target_commit:
-                self.state.last_validated_commits_per_target_repos_branches[repository.name][current_branch] = target_commit
                 return target_commit
             taf_logger.debug(f"{repository.name}: skipping target commit {target_commit}. Looking for commit {current_commit}")
         raise UpdateFailedError(
@@ -561,8 +551,8 @@ def _validate_current_repo_commit(
 but commit not on branch {current_branch}"
         )
 
-    @log_on_start(DEBUG, "Setting additional commits of target repositories", logger=taf_logger)
-    def set_additional_commits_of_target_repositories(self):
+    @log_on_start(DEBUG, "Validating and setting additional commits of target repositories", logger=taf_logger)
+    def validate_and_set_additional_commits_of_target_repositories(self):
         """
         For target repository and for each branch, extract commits following the last validated commit
         These commits are not invalid. In case of repositories which cannot contain unauthenticated commits
@@ -570,14 +560,22 @@ def set_additional_commits_of_target_repositories(self):
         However, no error will get reported if there are commits which have not yet been signed
         In case of repositories which can contain unauthenticated commits, they do not even need to get signed
         """
+        if self.state.update_status != UpdateStatus.SUCCESS:
+            return self.state.update_status
         self.state.additional_commits_per_target_repos_branches = defaultdict(dict)
         for repository in self.state.target_repositories.values():
-            # this will only include branches who were, at least partially, validated (up until a certain point)
-            for branch, last_validated_commit in self.state.last_validated_commits_per_target_repos_branches[repository.name].items():
+            # this will only include branches that were, at least partially, validated (up until a certain point)
+            for branch, validated_commits in self.state.validated_commits_per_target_repos_branches[repository.name].items():
+                last_validated_commit = validated_commits[-1]
                 # TODO what to do if an error occurred while validating that branch
                 branch_commits = self.state.fetched_commits_per_target_repos_branches[repository.name][branch]
                 additional_commits = branch_commits[branch_commits.index(last_validated_commit) + 1:]
                 if len(additional_commits):
+                    if not _is_unauthenticated_allowed(repository):
+                        raise UpdateFailedError(
+                            f"Target repository {repository.name} does not allow unauthenticated commits, but contains commit(s) {', '.join(additional_commits)} on branch {branch}"
+                        )
+
                     taf_logger.info(f"Repository {repository.name}: found commits succeeding the last authenticated commit on branch {branch}: {','.join(additional_commits)}")
 
                     # these commits include all commits newer than last authenticated commit (if unauthenticated commits are allowed)
@@ -591,7 +589,7 @@ def set_additional_commits_of_target_repositories(self):
                             additional_commits.index(branch_current_head) + 1 :
                         ]
                 self.state.additional_commits_per_target_repos_branches[repository.name][branch] = additional_commits
-        return True
+        return self.state.update_status
 
     @log_on_start(INFO, "Merging commits into target repositories...", logger=taf_logger)
     def merge_commits(self):
@@ -600,20 +598,30 @@ def merge_commits(self):
         """
         try:
             if self.only_validate:
-                return
-            for repository in self.state.target_repositories.values():
-                # this will only include branches who were, at least partially, validated (up until a certain point)
-                for branch, last_validated_commit in self.state.last_validated_commits_per_target_repos_branches[repository.name].items():
-                    commit_to_merge = (
-                        last_validated_commit if not _is_unauthenticated_allowed(repository) else self.state.fetched_commits_per_target_repos_branches[repository.name][branch][-1]
-                )
-                    taf_logger.info("Repository {}: merging {} into branch {}",  repository.name, commit_to_merge, branch)
-                    _merge_commit(repository, branch, commit_to_merge)
-            return True
+                return self.state.update_status
+
+            if self.state.update_status == UpdateStatus.FAILED:
+                # couldn't validate the first new commit
+                # there is nothing to merge
+                # remove cloned repositories if the initial commit was incorrect
+                for repository in self.state.cloned_target_repositories:
+                    taf_logger.debug("Removing cloned repository {}", repository.path)
+                    shutil.rmtree(repository.path, onerror=on_rm_error)
+            else:
+                for repository in self.state.target_repositories.values():
+                    # this will only include branches that were, at least partially, validated (up until a certain point)
+                    for branch, validated_commits in self.state.validated_commits_per_target_repos_branches[repository.name].items():
+                        last_validated_commit = validated_commits[-1]
+                        commit_to_merge = (
+                            last_validated_commit if not _is_unauthenticated_allowed(repository) else self.state.fetched_commits_per_target_repos_branches[repository.name][branch][-1]
+                    )
+                        taf_logger.info("Repository {}: merging {} into branch {}",  repository.name, commit_to_merge, branch)
+                        _merge_commit(repository, branch, commit_to_merge)
+                return self.state.update_status
         except Exception as e:
             self.state.error = e
             self.state.event = Event.FAILED
-            return False
+            return UpdateStatus.FAILED
 
     def set_target_repositories_data(self):
         try:
@@ -657,13 +665,14 @@ def set_output(self):
             new_commits = []
             commit_after_pull = None
         else:
-            commit_before_pull = self.state.auth_commits_since_last_validated[0] if self.state.existing_repo and len(self.state.auth_commits_since_last_validated) else None
-            commit_after_pull = self.state.auth_commits_since_last_validated[-1] if self.state.update_successful else self.state.auth_commits_since_last_validated[0]
+            commit_before_pull = self.state.validated_auth_commits[0] if self.state.existing_repo and len(self.state.validated_auth_commits) else None
+
+            commit_after_pull = self.state.validated_auth_commits[-1]
 
             if not self.state.existing_repo:
-                new_commits = self.state.auth_commits_since_last_validated
+                new_commits = self.state.validated_auth_commits
             else:
-                new_commits = self.state.auth_commits_since_last_validated[1:] if len(self.state.auth_commits_since_last_validated) else []
+                new_commits = self.state.validated_auth_commits[1:] if len(self.state.validated_auth_commits) else []
         commits_data = {
             "before_pull": commit_before_pull,
             "new": new_commits,

From 560701133d3232308e0a06b096c39f6c1b7c0db5 Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Tue, 5 Dec 2023 05:11:52 +0100
Subject: [PATCH 13/28] feat, refact: updater pipeline - start update from the
 beginning if initial state is out-of-sync

---
 taf/git.py                      |   4 +-
 taf/updater/updater_pipeline.py | 113 +++++++++++++++++++-------------
 2 files changed, 68 insertions(+), 49 deletions(-)

diff --git a/taf/git.py b/taf/git.py
index 733cd9ae..5bba0ea3 100644
--- a/taf/git.py
+++ b/taf/git.py
@@ -65,7 +65,6 @@ def __init__(
             raise InvalidRepositoryError(
                 "Both library_dir and name need to be specified"
             )
-
         if name is not None and library_dir is not None:
             self.name = self._validate_repo_name(name)
             self.path = self._validate_repo_path(library_dir, name, path)
@@ -316,8 +315,9 @@ def all_commits_since_commit(
         specified or currently checked out branch
 
         Raises:
-            exceptions.GitError: An error occured with provided commit SHA
+            exceptions.GitError: An error occurred with provided commit SHA
         """
+
         if since_commit is None:
             return self.all_commits_on_branch(branch=branch, reverse=reverse)
 
diff --git a/taf/updater/updater_pipeline.py b/taf/updater/updater_pipeline.py
index 51b699d5..cf88775a 100644
--- a/taf/updater/updater_pipeline.py
+++ b/taf/updater/updater_pipeline.py
@@ -123,8 +123,8 @@ def __init__(
             self.clone_or_fetch_users_auth_repo,
             self.load_target_repositories,
             self.clone_target_repositories_if_not_on_disk,
+            self.determine_start_commits,
             self.get_targets_data_from_auth_repo,
-            self.validate_target_repositories_initial_state_and_find_current_branch_head_commits,
             self.get_target_repositories_commits,
             self.validate_target_repositories,
             self.validate_and_set_additional_commits_of_target_repositories,
@@ -310,57 +310,56 @@ def clone_target_repositories_if_not_on_disk(self):
             return UpdateStatus.FAILED
 
 
-    def get_targets_data_from_auth_repo(self):
-        self.state.targets_data_by_auth_commits = self.state.users_auth_repo.targets_data_by_auth_commits(self.state.auth_commits_since_last_validated)
-        repo_branches = {}
-        for repo_name, commits_data in  self.state.targets_data_by_auth_commits.items():
-            branches = set()  # using a set to avoid duplicate branches
-            for commit_data in commits_data.values():
-                branches.add(commit_data["branch"])
-            repo_branches[repo_name] = sorted(list(branches))
-        self.state.target_branches_data_from_auth_repo = repo_branches
-        return UpdateStatus.SUCCESS
-
 
     @log_on_start(INFO, "Validating initial state of target repositories...", logger=taf_logger)
     @log_on_end(INFO, "Validation of the initial state of target repositories finished", logger=taf_logger)
-    def validate_target_repositories_initial_state_and_find_current_branch_head_commits(self):
+    def determine_start_commits(self):
         try:
-
+            self.state.targets_data_by_auth_commits = self.state.users_auth_repo.targets_data_by_auth_commits(self.state.auth_commits_since_last_validated)
             self.state.old_heads_per_target_repos_branches = defaultdict(dict)
-            for repository in self.state.target_repositories.values():
+            is_initial_state_in_sync = True
+            # if last validated commit was not manually modified (set to a newer commit)
+            # target repositories data that is extracted to them (commit and branch)
+            # should be present in the local repository
+            # if the local repository was manually modified (say, something was committed)
+            # we still expect the last validated target commit to exist
+            # and the remaining commits will be validated afterwards
+            # if the last validated target commit does not exist, start the validation from scratch
+            if self.state.last_validated_commit is not None:
+                for repository in self.state.target_repositories.values():
+                    self.state.old_heads_per_target_repos_branches[repository.name] = {}
+                    last_validated_repository_commits_data = self.state.targets_data_by_auth_commits[repository.name].get(self.state.last_validated_commit, {})
+
+                    if last_validated_repository_commits_data:
+                        # if this is not set, it means that the repository did not exist in this revision
+                        if repository in self.state.cloned_target_repositories:
+                            is_initial_state_in_sync = False
+                            break
+                        current_branch = last_validated_repository_commits_data.get("branch", repository.default_branch)
+                        last_validated_commit = last_validated_repository_commits_data["commit"]
+
+                        branch_exists = repository.branch_exists(current_branch, include_remotes=False)
+                        if not branch_exists:
+                            is_initial_state_in_sync = False
+                            break
+                        top_commit_of_branch = repository.top_commit_of_branch(current_branch)
+                        if top_commit_of_branch != last_validated_commit:
+                            # check if top commit is newer (which is fine, it will be validated)
+                            # or older, meaning that the authentication repository contains
+                            # additional commits, so it would be necessary to find older auth repo
+                            # commit and start the validation from there
+                            if not current_branch in repository.branches_containing_commit(last_validated_commit):
+                                is_initial_state_in_sync = False
+                                break
+
+                        self.state.old_heads_per_target_repos_branches[repository.name][current_branch] = last_validated_commit
+
+            if not is_initial_state_in_sync:
+                taf_logger.info(f"Repository {self.state.users_auth_repo.name}: states of target repositories are not in sync with last validated commit. Starting the update from the beginning")
+                self.state.last_validated_commit = None
+                self.state.auth_commits_since_last_validated = self.state.users_auth_repo.all_commits_on_branch(self.state.users_auth_repo.default_branch)
+                self.state.targets_data_by_auth_commits = self.state.users_auth_repo.targets_data_by_auth_commits(self.state.auth_commits_since_last_validated)
 
-                for branch in self.state.target_branches_data_from_auth_repo[repository.name]:
-                    # if last_validated_commit is None or if the target repository didn't exist prior
-                    # to calling update, start the update from the beginning
-                    # otherwise, for each branch, start with the last validated commit of the local branch
-                    branch_exists = repository.branch_exists(branch, include_remotes=False)
-                    if not branch_exists and self.only_validate:
-                        self.state.targets_data = {}
-                        msg = f"{repository.name} does not contain a local branch named {branch} and cannot be validated. Please update the repositories"
-                        taf_logger.error(msg)
-                        raise UpdateFailedError(msg)
-
-                    first_commit_on_branch_to_validate = self.state.targets_data_by_auth_commits[repository.name].get(self.state.last_validated_commit, {}).get("commit")
-                    if (
-                        self.state.last_validated_commit is None
-                        or not repository.is_git_repository_root
-                        or not branch_exists
-                        or not first_commit_on_branch_to_validate
-                    ):
-                        old_head = None
-                    else:
-                        old_head = first_commit_on_branch_to_validate
-                        if not _is_unauthenticated_allowed(repository):
-                            repo_old_head = repository.top_commit_of_branch(branch)
-                            # do the same as when checking the top and last_validated_commit of the authentication repository
-                            if repo_old_head != old_head:
-                                commits_since = repository.all_commits_since_commit(old_head)
-                                if repo_old_head not in commits_since:
-                                    msg = f"Top commit of repository {repository.name} {repo_old_head} and is not equal to or newer than commit defined in auth repo {old_head}"
-                                    taf_logger.error(msg)
-                                    raise UpdateFailedError(msg)
-                    self.state.old_heads_per_target_repos_branches[repository.name][branch] = old_head
             return UpdateStatus.SUCCESS
         except Exception as e:
             self.state.error = e
@@ -368,6 +367,18 @@ def validate_target_repositories_initial_state_and_find_current_branch_head_comm
             taf_logger.error(e)
             return UpdateStatus.FAILED
 
+
+    def get_targets_data_from_auth_repo(self):
+        repo_branches = {}
+        for repo_name, commits_data in  self.state.targets_data_by_auth_commits.items():
+            branches = set()  # using a set to avoid duplicate branches
+            for commit_data in commits_data.values():
+                branches.add(commit_data["branch"])
+            repo_branches[repo_name] = sorted(list(branches))
+        self.state.target_branches_data_from_auth_repo = repo_branches
+        return UpdateStatus.SUCCESS
+
+
     @log_on_start(DEBUG, "Getting fetched commits of target repositories", logger=taf_logger)
     def get_target_repositories_commits(self):
         """Returns a list of newly fetched commits belonging to the specified branch."""
@@ -375,9 +386,17 @@ def get_target_repositories_commits(self):
         for repository in self.state.target_repositories.values():
             for branch in self.state.target_branches_data_from_auth_repo[repository.name]:
                 if repository.is_git_repository_root:
+                    if self.only_validate:
+                        branch_exists = repository.branch_exists(branch, include_remotes=False)
+                        if not branch_exists:
+                            self.state.targets_data = {}
+                            msg = f"{repository.name} does not contain a local branch named {branch} and cannot be validated. Please update the repositories."
+                            taf_logger.error(msg)
+                            raise UpdateFailedError(msg)
+                else:
                     repository.fetch(branch=branch)
 
-                old_head = self.state.old_heads_per_target_repos_branches[repository.name][branch]
+                old_head = self.state.old_heads_per_target_repos_branches[repository.name].get(branch)
                 if old_head is not None:
                     if not self.only_validate:
                         fetched_commits = repository.all_commits_on_branch(

From e8312b053736f7eac05e37dc7c66c50b6fc5be0f Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Tue, 5 Dec 2023 05:12:52 +0100
Subject: [PATCH 14/28] test: updater - define patterns for checking error
 messages, make more generic

---
 .../test_repo_update/test_updater.py          | 81 ++++++++++---------
 1 file changed, 43 insertions(+), 38 deletions(-)

diff --git a/taf/tests/test_updater/test_repo_update/test_updater.py b/taf/tests/test_updater/test_repo_update/test_updater.py
index eeeafa7d..d61d46ec 100644
--- a/taf/tests/test_updater/test_repo_update/test_updater.py
+++ b/taf/tests/test_updater/test_repo_update/test_updater.py
@@ -71,9 +71,16 @@
 
 AUTH_REPO_REL_PATH = "organization/auth_repo"
 TARGET_REPO_REL_PATH = "namespace/TargetRepo1"
-TARGET1_SHA_MISMATCH = "Mismatch between target commits specified in authentication repository and target repository namespace/TargetRepo1"
-TARGET2_SHA_MISMATCH = "Mismatch between target commits specified in authentication repository and target repository namespace/TargetRepo2"
-TARGETS_MISMATCH_ANY = "Mismatch between target commits specified in authentication repository and target repository"
+
+TARGET_MISSMATCH_PATTERN = r"Update of organization\/auth_repo failed due to error: Failure to validate organization\/auth_repo commit ([0-9a-f]{40}) committed on (\d{4}-\d{2}-\d{2}): data repository ([\w\/-]+) was supposed to be at commit ([0-9a-z]{40}) but repo was at ([0-9a-f]{40})"
+TARGET_ADDITIONAL_COMMIT = r"Update of organization\/auth_repo failed due to error: Failure to validate organization\/auth_repo commit ([0-9a-f]{40}) committed on (\d{4}-\d{2}-\d{2}): data repository ([\w\/-]+) was supposed to be at commit ([0-9a-f]{40}) but commit not on branch (\w+)"
+TARGET_COMMIT_AFTER_LAST_VALIDATED = r"Update of organization\/auth_repo failed due to error: Target repository ([\w\/-]+) does not allow unauthenticated commits, but contains commit\(s\) ([0-9a-f]{40}(?:, [0-9a-f]{40})*) on branch (\w+)"
+TARGET_MISSING_COMMIT = r"Update of organization/auth_repo failed due to error: Failure to validate organization/auth_repo commit ([0-9a-f]{40}) committed on (\d{4}-\d{2}-\d{2}): data repository ([\w\/-]+) was supposed to be at commit ([0-9a-f]{40}) but commit not on branch (\w+)"
+INDEX_LOCKED = r"Update of organization/auth_repo failed due to error: Repo ([\w\/-]+): the index is locked; this might be due to a concurrent or crashed process"
+
+
+
+
 NO_WORKING_MIRRORS = (
     f"Validation of authentication repository {AUTH_REPO_REL_PATH} failed at revision"
 )
@@ -239,26 +246,27 @@ def test_no_update_necessary(
 
 
 @pytest.mark.parametrize(
-    "test_name, expected_error, auth_repo_name_exists",
+    "test_name, expected_error, auth_repo_name_exists, expect_partial_update",
     [
-        ("test-updater-invalid-target-sha", TARGET1_SHA_MISMATCH, True),
-        ("test-updater-additional-target-commit", TARGET1_SHA_MISMATCH, True),
-        ("test-updater-missing-target-commit", TARGET1_SHA_MISMATCH, True),
-        ("test-updater-wrong-key", NO_WORKING_MIRRORS, True),
-        ("test-updater-invalid-version-number", REPLAYED_METADATA, True),
-        ("test-updater-delegated-roles-wrong-sha", TARGET2_SHA_MISMATCH, True),
-        ("test-updater-updated-root-n-root-missing", NO_WORKING_MIRRORS, True),
-        ("test-updater-updated-root-invalid-metadata", NO_WORKING_MIRRORS, True),
-        ("test-updater-info-missing", NO_REPOSITORY_INFO_JSON, False),
+        ("test-updater-invalid-target-sha", TARGET_MISSMATCH_PATTERN, True, True),
+        ("test-updater-additional-target-commit", TARGET_COMMIT_AFTER_LAST_VALIDATED, True, True),
+        ("test-updater-missing-target-commit", TARGET_ADDITIONAL_COMMIT, True, True),
+        ("test-updater-wrong-key", NO_WORKING_MIRRORS, True, True),
+        ("test-updater-invalid-version-number", REPLAYED_METADATA, True, True),
+        ("test-updater-delegated-roles-wrong-sha", TARGET_MISSMATCH_PATTERN, True, True),
+        ("test-updater-updated-root-n-root-missing", NO_WORKING_MIRRORS, True, True),
+        ("test-updater-updated-root-invalid-metadata", NO_WORKING_MIRRORS, True, True),
+        ("test-updater-info-missing", NO_REPOSITORY_INFO_JSON, False, True),
         (
             "test-updater-invalid-snapshot-meta-field-missing",
             METADATA_FIELD_MISSING,
             False,
+            True,
         ),
     ],
 )
 def test_updater_invalid_update(
-    test_name, expected_error, auth_repo_name_exists, updater_repositories, client_dir
+    test_name, expected_error, auth_repo_name_exists, updater_repositories, client_dir, expect_partial_update
 ):
     repositories = updater_repositories[test_name]
     clients_auth_repo_path = client_dir / AUTH_REPO_REL_PATH
@@ -267,6 +275,7 @@ def test_updater_invalid_update(
         client_dir,
         repositories,
         expected_error,
+        expect_partial_update,
         auth_repo_name_exists=auth_repo_name_exists,
     )
     # make sure that the last validated commit does not exist
@@ -276,8 +285,8 @@ def test_updater_invalid_update(
 @pytest.mark.parametrize(
     "test_name, expected_error",
     [
-        ("test-updater-invalid-target-sha", TARGET1_SHA_MISMATCH),
-        ("test-updater-missing-target-commit", TARGET1_SHA_MISMATCH),
+        ("test-updater-invalid-target-sha", TARGET_MISSMATCH_PATTERN),
+        ("test-updater-missing-target-commit", TARGET_MISSING_COMMIT),
     ],
 )
 def test_valid_update_no_auth_repo_one_invalid_target_repo_exists(
@@ -288,7 +297,7 @@ def test_valid_update_no_auth_repo_one_invalid_target_repo_exists(
     origin_dir = origin_dir / test_name
     _clone_client_repo(TARGET_REPO_REL_PATH, origin_dir, client_dir)
     _update_invalid_repos_and_check_if_repos_exist(
-        client_dir, repositories, expected_error
+        client_dir, repositories, expected_error, True
     )
     # make sure that the last validated commit does not exist
     _check_if_last_validated_commit_exists(clients_auth_repo_path)
@@ -303,7 +312,7 @@ def test_updater_expired_metadata(updater_repositories, origin_dir, client_dir):
     repositories = updater_repositories["test-updater-expired-metadata"]
     clients_auth_repo_path = client_dir / AUTH_REPO_REL_PATH
     _update_invalid_repos_and_check_if_repos_exist(
-        client_dir, repositories, ROOT_EXPIRED, set_time=False, strict=True
+        client_dir, repositories, ROOT_EXPIRED, False, set_time=False, strict=True
     )
     # make sure that the last validated commit does not exist
     _check_if_last_validated_commit_exists(clients_auth_repo_path)
@@ -312,8 +321,8 @@ def test_updater_expired_metadata(updater_repositories, origin_dir, client_dir):
 @pytest.mark.parametrize(
     "test_name, num_of_commits_to_revert, expected_error",
     [
-        ("test-updater-invalid-target-sha", 1, TARGET1_SHA_MISMATCH),
-        ("test-updater-delegated-roles-wrong-sha", 4, TARGET2_SHA_MISMATCH),
+        ("test-updater-invalid-target-sha", 1, TARGET_MISSMATCH_PATTERN),
+        ("test-updater-delegated-roles-wrong-sha", 4, TARGET_MISSMATCH_PATTERN),
     ],
 )
 def test_updater_invalid_target_sha_existing_client_repos(
@@ -344,15 +353,7 @@ def test_no_target_repositories(updater_repositories, origin_dir, client_dir):
     origin_dir = origin_dir / "test-updater-valid"
     client_auth_repo = _clone_client_repo(AUTH_REPO_REL_PATH, origin_dir, client_dir)
     _create_last_validated_commit(client_dir, client_auth_repo.head_commit_sha())
-    client_repos = {AUTH_REPO_REL_PATH: client_auth_repo}
-    _update_invalid_repos_and_check_if_remained_same(
-        client_repos, client_dir, repositories, TARGETS_MISMATCH_ANY
-    )
-    # make sure that the target repositories still do not exist
-    for repository_rel_path in repositories:
-        if repository_rel_path != AUTH_REPO_REL_PATH:
-            client_repo_path = client_dir / repository_rel_path
-            assert client_repo_path.exists() is False
+    _update_and_check_commit_shas(None, repositories, origin_dir, client_dir, False)
 
 
 def test_no_last_validated_commit(updater_repositories, origin_dir, client_dir):
@@ -384,6 +385,7 @@ def test_older_last_validated_commit(updater_repositories, origin_dir, client_di
     _update_and_check_commit_shas(client_repos, repositories, origin_dir, client_dir)
 
 
+# TODO
 def test_update_test_repo_no_flag(updater_repositories, origin_dir, client_dir):
     repositories = updater_repositories["test-updater-test-repo"]
     origin_dir = origin_dir / "test-updater-test-repo"
@@ -393,6 +395,7 @@ def test_update_test_repo_no_flag(updater_repositories, origin_dir, client_dir):
     )
 
 
+# TODO
 def test_update_repo_wrong_flag(updater_repositories, origin_dir, client_dir):
     repositories = updater_repositories["test-updater-valid"]
     origin_dir = origin_dir / "test-updater-valid"
@@ -401,7 +404,7 @@ def test_update_repo_wrong_flag(updater_repositories, origin_dir, client_dir):
         client_dir, repositories, NOT_A_TEST_REPO, UpdateType.TEST
     )
 
-
+# TODO
 def test_update_repo_target_in_indeterminate_state(
     updater_repositories, origin_dir, client_dir
 ):
@@ -421,7 +424,7 @@ def test_update_repo_target_in_indeterminate_state(
     index_lock.touch()
 
     _update_invalid_repos_and_check_if_repos_exist(
-        client_dir, repositories, UNCOMMITTED_TARGET_CHANGES
+        client_dir, repositories, UNCOMMITTED_TARGET_CHANGES, True
     )
 
 
@@ -619,6 +622,7 @@ def _update_invalid_repos_and_check_if_repos_exist(
     client_dir,
     repositories,
     expected_error,
+    expect_partial_update,
     expected_repo_type=UpdateType.EITHER,
     set_time=True,
     auth_repo_name_exists=True,
@@ -650,10 +654,11 @@ def _update_expect_error(client_dir, expected_repo_type):
     else:
         _update_expect_error(client_dir, expected_repo_type)
 
-    # the client repositories should not exits
-    for repository_rel_path in repositories:
-        path = client_dir / repository_rel_path
-        if str(path) in repositories_which_existed:
-            assert path.exists()
-        else:
-            assert not path.exists()
+    if not expect_partial_update:
+        # the client repositories should not exits
+        for repository_rel_path in repositories:
+            path = client_dir / repository_rel_path
+            if str(path) in repositories_which_existed:
+                assert path.exists()
+            else:
+                assert not path.exists()

From f1f324cd179da106110b53e70ae0f6c36c457ef0 Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Tue, 5 Dec 2023 18:49:49 +0100
Subject: [PATCH 15/28] fix: updater pipeline - raise error if update type is
 not correct

---
 .../test_repo_update/test_updater.py          | 10 ++--
 taf/updater/updater_pipeline.py               | 56 +++++++++----------
 2 files changed, 29 insertions(+), 37 deletions(-)

diff --git a/taf/tests/test_updater/test_repo_update/test_updater.py b/taf/tests/test_updater/test_repo_update/test_updater.py
index d61d46ec..5f7a2f18 100644
--- a/taf/tests/test_updater/test_repo_update/test_updater.py
+++ b/taf/tests/test_updater/test_repo_update/test_updater.py
@@ -385,26 +385,24 @@ def test_older_last_validated_commit(updater_repositories, origin_dir, client_di
     _update_and_check_commit_shas(client_repos, repositories, origin_dir, client_dir)
 
 
-# TODO
 def test_update_test_repo_no_flag(updater_repositories, origin_dir, client_dir):
     repositories = updater_repositories["test-updater-test-repo"]
     origin_dir = origin_dir / "test-updater-test-repo"
     # try to update a test repo, set update type to official
     _update_invalid_repos_and_check_if_repos_exist(
-        client_dir, repositories, IS_A_TEST_REPO, UpdateType.OFFICIAL
+        client_dir, repositories, IS_A_TEST_REPO, False, UpdateType.OFFICIAL
     )
 
 
-# TODO
 def test_update_repo_wrong_flag(updater_repositories, origin_dir, client_dir):
     repositories = updater_repositories["test-updater-valid"]
     origin_dir = origin_dir / "test-updater-valid"
     # try to update without setting the last validated commit
     _update_invalid_repos_and_check_if_repos_exist(
-        client_dir, repositories, NOT_A_TEST_REPO, UpdateType.TEST
+        client_dir, repositories, NOT_A_TEST_REPO, False, UpdateType.TEST
     )
 
-# TODO
+
 def test_update_repo_target_in_indeterminate_state(
     updater_repositories, origin_dir, client_dir
 ):
@@ -424,7 +422,7 @@ def test_update_repo_target_in_indeterminate_state(
     index_lock.touch()
 
     _update_invalid_repos_and_check_if_repos_exist(
-        client_dir, repositories, UNCOMMITTED_TARGET_CHANGES, True
+        client_dir, repositories, INDEX_LOCKED, True
     )
 
 
diff --git a/taf/updater/updater_pipeline.py b/taf/updater/updater_pipeline.py
index cf88775a..01ce1f38 100644
--- a/taf/updater/updater_pipeline.py
+++ b/taf/updater/updater_pipeline.py
@@ -212,41 +212,35 @@ def clone_remote_and_run_tuf_updater(self):
 
     @log_on_start(INFO, "Validating out of band commit and update type", logger=taf_logger)
     def _validate_out_of_band_and_update_type(self):
-        try:
-            # this is the repository cloned inside the temp directory
-            # we validate it before updating the actual authentication repository
-            if (
-                self.out_of_band_authentication is not None
-                and self.state.users_auth_repo.last_validated_commit is None
-                and self.state.auth_commits_since_last_validated[0] != self.out_of_band_authentication
+        # this is the repository cloned inside the temp directory
+        # we validate it before updating the actual authentication repository
+        if (
+            self.out_of_band_authentication is not None
+            and self.state.users_auth_repo.last_validated_commit is None
+            and self.state.auth_commits_since_last_validated[0] != self.out_of_band_authentication
+        ):
+            raise UpdateFailedError(
+                f"First commit of repository {self.state.auth_repo_name} does not match "
+                "out of band authentication commit"
+            )
+
+        if self.expected_repo_type != UpdateType.EITHER:
+            # check if the repository being updated is a test repository
+            if self.state.validation_auth_repo.is_test_repo and self.expected_repo_type != UpdateType.TEST:
+                raise UpdateFailedError(
+                    f"Repository {self.state.users_auth_repo.name} is a test repository. "
+                    'Call update with "--expected-repo-type" test to update a test '
+                    "repository"
+                )
+            elif (
+                not self.state.validation_auth_repo.is_test_repo
+                and self.expected_repo_type == UpdateType.TEST
             ):
                 raise UpdateFailedError(
-                    f"First commit of repository {self.state.auth_repo_name} does not match "
-                    "out of band authentication commit"
+                    f"Repository {self.state.users_auth_repo.name} is not a test repository,"
+                    ' but update was called with the "--expected-repo-type" test'
                 )
 
-            if self.expected_repo_type != UpdateType.EITHER:
-                # check if the repository being updated is a test repository
-                if self.state.validation_auth_repo.is_test_repo and self.expected_repo_type != UpdateType.TEST:
-                    raise UpdateFailedError(
-                        f"Repository {self.state.users_auth_repo.name} is a test repository. "
-                        'Call update with "--expected-repo-type" test to update a test '
-                        "repository"
-                    )
-                elif (
-                    not self.state.validation_auth_repo.is_test_repo
-                    and self.expected_repo_type == UpdateType.TEST
-                ):
-                    raise UpdateFailedError(
-                        f"Repository {self.state.users_auth_repo.name} is not a test repository,"
-                        ' but update was called with the "--expected-repo-type" test'
-                    )
-            return UpdateStatus.SUCCESS
-        except Exception as e:
-            self.state.error = e
-            self.state.event = Event.FAILED
-            taf_logger.error(e)
-            return UpdateStatus.FAILED
 
     @log_on_start(INFO, "Cloning or updating user's authentication repository...", logger=taf_logger)
     def clone_or_fetch_users_auth_repo(self):

From a9ed43582a8cc352887a621cd664102f6c6a7a12 Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Tue, 5 Dec 2023 19:03:17 +0100
Subject: [PATCH 16/28] chore: formatting

---
 taf/auth_repo.py                              |  18 +-
 taf/git.py                                    |  11 +-
 taf/models/types.py                           |   7 +-
 .../test_repo_update/test_updater.py          |  23 +-
 taf/updater/types/update.py                   |   2 +-
 taf/updater/updater.py                        |   6 +-
 taf/updater/updater_pipeline.py               | 412 +++++++++++++-----
 7 files changed, 336 insertions(+), 143 deletions(-)

diff --git a/taf/auth_repo.py b/taf/auth_repo.py
index 3ac9becb..0dd61a49 100644
--- a/taf/auth_repo.py
+++ b/taf/auth_repo.py
@@ -219,14 +219,13 @@ def set_last_validated_commit(self, commit: str):
         self._log_debug(f"setting last validated commit to: {commit}")
         Path(self.conf_dir, self.LAST_VALIDATED_FILENAME).write_text(commit)
 
-
     def targets_data_by_auth_commits(
-            self,
-            commits: List[str],
-            target_repos: Optional[List[str]] = None,
-            custom_fns: Optional[Dict[str, Callable]] = None,
-            default_branch: Optional[str] = None,
-            excluded_target_globs: Optional[List[str]] = None,
+        self,
+        commits: List[str],
+        target_repos: Optional[List[str]] = None,
+        custom_fns: Optional[Dict[str, Callable]] = None,
+        default_branch: Optional[str] = None,
+        excluded_target_globs: Optional[List[str]] = None,
     ) -> Dict[str, Dict[str, Dict[str, Any]]]:
         """
         Return a dictionary where each target repository has associated authentication commits,
@@ -262,9 +261,7 @@ def targets_data_by_auth_commits(
                 target_commit = target_data.get("commit")
                 target_data.setdefault("custom", {})
                 if custom_fns is not None and target_path in custom_fns:
-                    target_data["custom"].update(
-                        custom_fns[target_path](target_commit)
-                    )
+                    target_data["custom"].update(custom_fns[target_path](target_commit))
 
                 repositories_commits.setdefault(target_path, {})[commit] = {
                     "branch": target_branch,
@@ -277,7 +274,6 @@ def targets_data_by_auth_commits(
         )
         return repositories_commits
 
-
     def sorted_commits_and_branches_per_repositories(
         self,
         commits: List[str],
diff --git a/taf/git.py b/taf/git.py
index 5bba0ea3..85a91f37 100644
--- a/taf/git.py
+++ b/taf/git.py
@@ -248,7 +248,9 @@ def _get_default_branch_from_local(self) -> str:
 
     def _get_default_branch_from_remote(self, url: str) -> str:
         if not self.is_git_repository:
-            self._log_debug("Repository does not exist. Could not determined default branch from remote")
+            self._log_debug(
+                "Repository does not exist. Could not determined default branch from remote"
+            )
             return None
         branch = self._git(
             f"ls-remote --symref {url} HEAD",
@@ -815,11 +817,12 @@ def get_commit_date(self, commit_sha: str) -> str:
                 "Could not get commit message. pygit repository could not be instantiated."
             )
         commit = repo.get(commit_sha)
-        date = datetime.datetime.utcfromtimestamp(commit.commit_time + commit.commit_time_offset)
-        formatted_date = date.strftime('%Y-%m-%d')
+        date = datetime.datetime.utcfromtimestamp(
+            commit.commit_time + commit.commit_time_offset
+        )
+        formatted_date = date.strftime("%Y-%m-%d")
         return formatted_date
 
-
     def get_commit_message(self, commit_sha: str) -> str:
         """Returns commit message of the given commit"""
         repo = self.pygit_repo
diff --git a/taf/models/types.py b/taf/models/types.py
index 69318e50..ab6dd0f8 100644
--- a/taf/models/types.py
+++ b/taf/models/types.py
@@ -189,6 +189,7 @@ class CommitInfo:
     custom: Dict[str, Any] = attrs.field()
     auth_commit: str = attrs.field()
 
+
 @attrs.define
 class TargetInfo:
     branches: Dict[str, List[CommitInfo]] = attrs.field()
@@ -196,6 +197,7 @@ class TargetInfo:
     def get_branch(self, branch_name: str) -> List[CommitInfo]:
         return self.branches.get(branch_name, [])
 
+
 @attrs.define
 class TargetsSortedCommitsData:
     targets: Dict[str, TargetInfo] = attrs.field()
@@ -204,7 +206,9 @@ def __getitem__(self, repo_name: str) -> TargetInfo:
         return self.targets.get(repo_name)
 
     @classmethod
-    def from_dict(cls, data_dict: Dict[str, Dict[str, List[Dict[str, Any]]]]) -> 'TargetsSortedCommitsData':
+    def from_dict(
+        cls, data_dict: Dict[str, Dict[str, List[Dict[str, Any]]]]
+    ) -> "TargetsSortedCommitsData":
         targets = {}
 
         for repo_name, branches_dict in data_dict.items():
@@ -237,6 +241,7 @@ class TargetAtCommitInfo:
 # elsewhere in the code
 # better in the auth repo function
 
+
 @attrs.define
 class AuthCommitAndTargets:
     auth_commit: str = attrs.field()
diff --git a/taf/tests/test_updater/test_repo_update/test_updater.py b/taf/tests/test_updater/test_repo_update/test_updater.py
index 5f7a2f18..021c6c37 100644
--- a/taf/tests/test_updater/test_repo_update/test_updater.py
+++ b/taf/tests/test_updater/test_repo_update/test_updater.py
@@ -79,8 +79,6 @@
 INDEX_LOCKED = r"Update of organization/auth_repo failed due to error: Repo ([\w\/-]+): the index is locked; this might be due to a concurrent or crashed process"
 
 
-
-
 NO_WORKING_MIRRORS = (
     f"Validation of authentication repository {AUTH_REPO_REL_PATH} failed at revision"
 )
@@ -249,11 +247,21 @@ def test_no_update_necessary(
     "test_name, expected_error, auth_repo_name_exists, expect_partial_update",
     [
         ("test-updater-invalid-target-sha", TARGET_MISSMATCH_PATTERN, True, True),
-        ("test-updater-additional-target-commit", TARGET_COMMIT_AFTER_LAST_VALIDATED, True, True),
+        (
+            "test-updater-additional-target-commit",
+            TARGET_COMMIT_AFTER_LAST_VALIDATED,
+            True,
+            True,
+        ),
         ("test-updater-missing-target-commit", TARGET_ADDITIONAL_COMMIT, True, True),
         ("test-updater-wrong-key", NO_WORKING_MIRRORS, True, True),
         ("test-updater-invalid-version-number", REPLAYED_METADATA, True, True),
-        ("test-updater-delegated-roles-wrong-sha", TARGET_MISSMATCH_PATTERN, True, True),
+        (
+            "test-updater-delegated-roles-wrong-sha",
+            TARGET_MISSMATCH_PATTERN,
+            True,
+            True,
+        ),
         ("test-updater-updated-root-n-root-missing", NO_WORKING_MIRRORS, True, True),
         ("test-updater-updated-root-invalid-metadata", NO_WORKING_MIRRORS, True, True),
         ("test-updater-info-missing", NO_REPOSITORY_INFO_JSON, False, True),
@@ -266,7 +274,12 @@ def test_no_update_necessary(
     ],
 )
 def test_updater_invalid_update(
-    test_name, expected_error, auth_repo_name_exists, updater_repositories, client_dir, expect_partial_update
+    test_name,
+    expected_error,
+    auth_repo_name_exists,
+    updater_repositories,
+    client_dir,
+    expect_partial_update,
 ):
     repositories = updater_repositories[test_name]
     clients_auth_repo_path = client_dir / AUTH_REPO_REL_PATH
diff --git a/taf/updater/types/update.py b/taf/updater/types/update.py
index 84969a40..cdaf22ab 100644
--- a/taf/updater/types/update.py
+++ b/taf/updater/types/update.py
@@ -11,8 +11,8 @@ class Update:
     auth_repos: Dict = field(factory=dict)
     auth_repo_name: str = field(default="")
 
+
 class UpdateType(enum.Enum):
     TEST = "test"
     OFFICIAL = "official"
     EITHER = "either"
-
diff --git a/taf/updater/updater.py b/taf/updater/updater.py
index 3d256147..c452c7f1 100644
--- a/taf/updater/updater.py
+++ b/taf/updater/updater.py
@@ -30,7 +30,6 @@
 disable_tuf_console_logging()
 
 
-
 def _check_update_status(repos_update_data: Dict[str, Any]) -> Tuple[Event, str]:
     # helper function to set update status of update handler based on repo status.
     # if repo handlers event status changed,
@@ -527,11 +526,10 @@ def _update_current_repository(
         output.auth_repo_name,
         output.commits_data,
         output.error,
-        output.targets_data
+        output.targets_data,
     )
 
 
-
 def _update_transient_data(
     transient_data, repos_update_data: Dict[str, str]
 ) -> Dict[str, Any]:
@@ -615,5 +613,3 @@ def validate_repository(
         )
     settings.overwrite_last_validated_commit = False
     settings.last_validated_commit = None
-
-
diff --git a/taf/updater/updater_pipeline.py b/taf/updater/updater_pipeline.py
index 01ce1f38..fb24ded7 100644
--- a/taf/updater/updater_pipeline.py
+++ b/taf/updater/updater_pipeline.py
@@ -53,9 +53,15 @@ class UpdateState:
     target_branches_data_from_auth_repo: Dict = field(factory=dict)
     targets_data_by_auth_commits: Dict = field(factory=dict)
     old_heads_per_target_repos_branches: Dict[str, Dict[str, str]] = field(factory=dict)
-    fetched_commits_per_target_repos_branches: Dict[str, Dict[str, List[str]]] = field(factory=dict)
-    validated_commits_per_target_repos_branches: Dict[str, Dict[str, str]] = field(factory=dict)
-    additional_commits_per_target_repos_branches: Dict[str, Dict[str, List[str]]] = field(factory=dict)
+    fetched_commits_per_target_repos_branches: Dict[str, Dict[str, List[str]]] = field(
+        factory=dict
+    )
+    validated_commits_per_target_repos_branches: Dict[str, Dict[str, str]] = field(
+        factory=dict
+    )
+    additional_commits_per_target_repos_branches: Dict[
+        str, Dict[str, List[str]]
+    ] = field(factory=dict)
     validated_auth_commits: List[str] = field(factory=list)
 
 
@@ -79,11 +85,11 @@ def wrapper(self, *args, **kwargs):
             if self.state.event == Event.FAILED and not self.state.existing_repo:
                 shutil.rmtree(self.state.users_auth_repo.path, onerror=on_rm_error)
                 shutil.rmtree(self.state.users_auth_repo.conf_dir)
+
     return wrapper
 
 
 class Pipeline:
-
     def __init__(self, steps):
         self.steps = steps
 
@@ -108,29 +114,41 @@ def handle_error(self, e):
     def set_output(self):
         pass
 
-class AuthenticationRepositoryUpdatePipeline(Pipeline):
-
 
+class AuthenticationRepositoryUpdatePipeline(Pipeline):
     def __init__(
-        self, url, clients_auth_library_dir, targets_library_dir, auth_repo_name,
-        update_from_filesystem, expected_repo_type, target_repo_classes, target_factory,
-        only_validate, validate_from_commit, conf_directory_root, out_of_band_authentication,
-        checkout, excluded_target_globs
+        self,
+        url,
+        clients_auth_library_dir,
+        targets_library_dir,
+        auth_repo_name,
+        update_from_filesystem,
+        expected_repo_type,
+        target_repo_classes,
+        target_factory,
+        only_validate,
+        validate_from_commit,
+        conf_directory_root,
+        out_of_band_authentication,
+        checkout,
+        excluded_target_globs,
     ):
 
-        super().__init__(steps=[
-            self.clone_remote_and_run_tuf_updater,
-            self.clone_or_fetch_users_auth_repo,
-            self.load_target_repositories,
-            self.clone_target_repositories_if_not_on_disk,
-            self.determine_start_commits,
-            self.get_targets_data_from_auth_repo,
-            self.get_target_repositories_commits,
-            self.validate_target_repositories,
-            self.validate_and_set_additional_commits_of_target_repositories,
-            self.merge_commits,
-            self.set_target_repositories_data
-        ])
+        super().__init__(
+            steps=[
+                self.clone_remote_and_run_tuf_updater,
+                self.clone_or_fetch_users_auth_repo,
+                self.load_target_repositories,
+                self.clone_target_repositories_if_not_on_disk,
+                self.determine_start_commits,
+                self.get_targets_data_from_auth_repo,
+                self.get_target_repositories_commits,
+                self.validate_target_repositories,
+                self.validate_and_set_additional_commits_of_target_repositories,
+                self.merge_commits,
+                self.set_target_repositories_data,
+            ]
+        )
 
         self.url = url
         self.clients_auth_library_dir = clients_auth_library_dir
@@ -154,10 +172,14 @@ def __init__(
     @property
     def output(self):
         if not self._output:
-            raise ValueError("Pipeline has not been run yet. Please run the pipeline first.")
+            raise ValueError(
+                "Pipeline has not been run yet. Please run the pipeline first."
+            )
         return self._output
 
-    @log_on_start(INFO, "Cloning repository and running TUF updater...", logger=taf_logger)
+    @log_on_start(
+        INFO, "Cloning repository and running TUF updater...", logger=taf_logger
+    )
     @cleanup_decorator
     def clone_remote_and_run_tuf_updater(self):
         settings.update_from_filesystem = self.update_from_filesystem
@@ -172,23 +194,32 @@ def clone_remote_and_run_tuf_updater(self):
             )
 
             # Clone the validation repository in temp.
-            self.state.auth_repo_name = _clone_validation_repo(self.url, self.state.auth_repo_name)
-            git_updater = GitUpdater(self.url, self.clients_auth_library_dir, self.state.auth_repo_name)
+            self.state.auth_repo_name = _clone_validation_repo(
+                self.url, self.state.auth_repo_name
+            )
+            git_updater = GitUpdater(
+                self.url, self.clients_auth_library_dir, self.state.auth_repo_name
+            )
             self.state.users_auth_repo = git_updater.users_auth_repo
             _run_tuf_updater(git_updater)
             self.state.existing_repo = self.state.users_auth_repo.is_git_repository_root
             self.state.validation_auth_repo = git_updater.validation_auth_repo
             self._validate_out_of_band_and_update_type()
 
-
             self.state.auth_commits_since_last_validated = list(git_updater.commits)
-            self.state.event = Event.CHANGED if len(self.state.auth_commits_since_last_validated) > 1 else Event.UNCHANGED
+            self.state.event = (
+                Event.CHANGED
+                if len(self.state.auth_commits_since_last_validated) > 1
+                else Event.UNCHANGED
+            )
 
             # used for testing purposes
             if settings.overwrite_last_validated_commit:
                 self.state.last_validated_commit = settings.last_validated_commit
             else:
-                self.state.last_validated_commit = self.state.users_auth_repo.last_validated_commit
+                self.state.last_validated_commit = (
+                    self.state.users_auth_repo.last_validated_commit
+                )
 
             return UpdateStatus.SUCCESS
 
@@ -210,14 +241,17 @@ def clone_remote_and_run_tuf_updater(self):
             if git_updater is not None:
                 git_updater.cleanup()
 
-    @log_on_start(INFO, "Validating out of band commit and update type", logger=taf_logger)
+    @log_on_start(
+        INFO, "Validating out of band commit and update type", logger=taf_logger
+    )
     def _validate_out_of_band_and_update_type(self):
         # this is the repository cloned inside the temp directory
         # we validate it before updating the actual authentication repository
         if (
             self.out_of_band_authentication is not None
             and self.state.users_auth_repo.last_validated_commit is None
-            and self.state.auth_commits_since_last_validated[0] != self.out_of_band_authentication
+            and self.state.auth_commits_since_last_validated[0]
+            != self.out_of_band_authentication
         ):
             raise UpdateFailedError(
                 f"First commit of repository {self.state.auth_repo_name} does not match "
@@ -226,7 +260,10 @@ def _validate_out_of_band_and_update_type(self):
 
         if self.expected_repo_type != UpdateType.EITHER:
             # check if the repository being updated is a test repository
-            if self.state.validation_auth_repo.is_test_repo and self.expected_repo_type != UpdateType.TEST:
+            if (
+                self.state.validation_auth_repo.is_test_repo
+                and self.expected_repo_type != UpdateType.TEST
+            ):
                 raise UpdateFailedError(
                     f"Repository {self.state.users_auth_repo.name} is a test repository. "
                     'Call update with "--expected-repo-type" test to update a test '
@@ -241,8 +278,11 @@ def _validate_out_of_band_and_update_type(self):
                     ' but update was called with the "--expected-repo-type" test'
                 )
 
-
-    @log_on_start(INFO, "Cloning or updating user's authentication repository...", logger=taf_logger)
+    @log_on_start(
+        INFO,
+        "Cloning or updating user's authentication repository...",
+        logger=taf_logger,
+    )
     def clone_or_fetch_users_auth_repo(self):
         if not self.only_validate:
             # fetch the latest commit or clone the repository without checkout
@@ -271,8 +311,11 @@ def load_target_repositories(self):
                 only_load_targets=False,
                 excluded_target_globs=self.excluded_target_globs,
             )
-            self.state.target_repositories = repositoriesdb.get_deduplicated_repositories(
-                self.state.users_auth_repo, self.state.auth_commits_since_last_validated[-1::]
+            self.state.target_repositories = (
+                repositoriesdb.get_deduplicated_repositories(
+                    self.state.users_auth_repo,
+                    self.state.auth_commits_since_last_validated[-1::],
+                )
             )
             return UpdateStatus.SUCCESS
         except Exception as e:
@@ -281,7 +324,9 @@ def load_target_repositories(self):
             taf_logger.error(e)
             return UpdateStatus.FAILED
 
-    @log_on_start(INFO, "Cloning target repositories which are not on disk...", logger=taf_logger)
+    @log_on_start(
+        INFO, "Cloning target repositories which are not on disk...", logger=taf_logger
+    )
     @log_on_start(INFO, "Finished cloning target repositories", logger=taf_logger)
     def clone_target_repositories_if_not_on_disk(self):
         try:
@@ -303,13 +348,21 @@ def clone_target_repositories_if_not_on_disk(self):
             taf_logger.error(e)
             return UpdateStatus.FAILED
 
-
-
-    @log_on_start(INFO, "Validating initial state of target repositories...", logger=taf_logger)
-    @log_on_end(INFO, "Validation of the initial state of target repositories finished", logger=taf_logger)
+    @log_on_start(
+        INFO, "Validating initial state of target repositories...", logger=taf_logger
+    )
+    @log_on_end(
+        INFO,
+        "Validation of the initial state of target repositories finished",
+        logger=taf_logger,
+    )
     def determine_start_commits(self):
         try:
-            self.state.targets_data_by_auth_commits = self.state.users_auth_repo.targets_data_by_auth_commits(self.state.auth_commits_since_last_validated)
+            self.state.targets_data_by_auth_commits = (
+                self.state.users_auth_repo.targets_data_by_auth_commits(
+                    self.state.auth_commits_since_last_validated
+                )
+            )
             self.state.old_heads_per_target_repos_branches = defaultdict(dict)
             is_initial_state_in_sync = True
             # if last validated commit was not manually modified (set to a newer commit)
@@ -322,37 +375,66 @@ def determine_start_commits(self):
             if self.state.last_validated_commit is not None:
                 for repository in self.state.target_repositories.values():
                     self.state.old_heads_per_target_repos_branches[repository.name] = {}
-                    last_validated_repository_commits_data = self.state.targets_data_by_auth_commits[repository.name].get(self.state.last_validated_commit, {})
+                    last_validated_repository_commits_data = (
+                        self.state.targets_data_by_auth_commits[repository.name].get(
+                            self.state.last_validated_commit, {}
+                        )
+                    )
 
                     if last_validated_repository_commits_data:
                         # if this is not set, it means that the repository did not exist in this revision
                         if repository in self.state.cloned_target_repositories:
                             is_initial_state_in_sync = False
                             break
-                        current_branch = last_validated_repository_commits_data.get("branch", repository.default_branch)
-                        last_validated_commit = last_validated_repository_commits_data["commit"]
+                        current_branch = last_validated_repository_commits_data.get(
+                            "branch", repository.default_branch
+                        )
+                        last_validated_commit = last_validated_repository_commits_data[
+                            "commit"
+                        ]
 
-                        branch_exists = repository.branch_exists(current_branch, include_remotes=False)
+                        branch_exists = repository.branch_exists(
+                            current_branch, include_remotes=False
+                        )
                         if not branch_exists:
                             is_initial_state_in_sync = False
                             break
-                        top_commit_of_branch = repository.top_commit_of_branch(current_branch)
+                        top_commit_of_branch = repository.top_commit_of_branch(
+                            current_branch
+                        )
                         if top_commit_of_branch != last_validated_commit:
                             # check if top commit is newer (which is fine, it will be validated)
                             # or older, meaning that the authentication repository contains
                             # additional commits, so it would be necessary to find older auth repo
                             # commit and start the validation from there
-                            if not current_branch in repository.branches_containing_commit(last_validated_commit):
+                            if (
+                                not current_branch
+                                in repository.branches_containing_commit(
+                                    last_validated_commit
+                                )
+                            ):
                                 is_initial_state_in_sync = False
                                 break
 
-                        self.state.old_heads_per_target_repos_branches[repository.name][current_branch] = last_validated_commit
+                        self.state.old_heads_per_target_repos_branches[repository.name][
+                            current_branch
+                        ] = last_validated_commit
 
             if not is_initial_state_in_sync:
-                taf_logger.info(f"Repository {self.state.users_auth_repo.name}: states of target repositories are not in sync with last validated commit. Starting the update from the beginning")
+                taf_logger.info(
+                    f"Repository {self.state.users_auth_repo.name}: states of target repositories are not in sync with last validated commit. Starting the update from the beginning"
+                )
                 self.state.last_validated_commit = None
-                self.state.auth_commits_since_last_validated = self.state.users_auth_repo.all_commits_on_branch(self.state.users_auth_repo.default_branch)
-                self.state.targets_data_by_auth_commits = self.state.users_auth_repo.targets_data_by_auth_commits(self.state.auth_commits_since_last_validated)
+                self.state.auth_commits_since_last_validated = (
+                    self.state.users_auth_repo.all_commits_on_branch(
+                        self.state.users_auth_repo.default_branch
+                    )
+                )
+                self.state.targets_data_by_auth_commits = (
+                    self.state.users_auth_repo.targets_data_by_auth_commits(
+                        self.state.auth_commits_since_last_validated
+                    )
+                )
 
             return UpdateStatus.SUCCESS
         except Exception as e:
@@ -361,10 +443,9 @@ def determine_start_commits(self):
             taf_logger.error(e)
             return UpdateStatus.FAILED
 
-
     def get_targets_data_from_auth_repo(self):
         repo_branches = {}
-        for repo_name, commits_data in  self.state.targets_data_by_auth_commits.items():
+        for repo_name, commits_data in self.state.targets_data_by_auth_commits.items():
             branches = set()  # using a set to avoid duplicate branches
             for commit_data in commits_data.values():
                 branches.add(commit_data["branch"])
@@ -372,16 +453,21 @@ def get_targets_data_from_auth_repo(self):
         self.state.target_branches_data_from_auth_repo = repo_branches
         return UpdateStatus.SUCCESS
 
-
-    @log_on_start(DEBUG, "Getting fetched commits of target repositories", logger=taf_logger)
+    @log_on_start(
+        DEBUG, "Getting fetched commits of target repositories", logger=taf_logger
+    )
     def get_target_repositories_commits(self):
         """Returns a list of newly fetched commits belonging to the specified branch."""
         self.state.fetched_commits_per_target_repos_branches = defaultdict(dict)
         for repository in self.state.target_repositories.values():
-            for branch in self.state.target_branches_data_from_auth_repo[repository.name]:
+            for branch in self.state.target_branches_data_from_auth_repo[
+                repository.name
+            ]:
                 if repository.is_git_repository_root:
                     if self.only_validate:
-                        branch_exists = repository.branch_exists(branch, include_remotes=False)
+                        branch_exists = repository.branch_exists(
+                            branch, include_remotes=False
+                        )
                         if not branch_exists:
                             self.state.targets_data = {}
                             msg = f"{repository.name} does not contain a local branch named {branch} and cannot be validated. Please update the repositories."
@@ -390,7 +476,9 @@ def get_target_repositories_commits(self):
                 else:
                     repository.fetch(branch=branch)
 
-                old_head = self.state.old_heads_per_target_repos_branches[repository.name].get(branch)
+                old_head = self.state.old_heads_per_target_repos_branches[
+                    repository.name
+                ].get(branch)
                 if old_head is not None:
                     if not self.only_validate:
                         fetched_commits = repository.all_commits_on_branch(
@@ -405,25 +493,29 @@ def get_target_repositories_commits(self):
                                 fetched_commits.index(old_head) + 1 : :
                             ]
                         else:
-                            fetched_commits_on_target_repo_branch = repository.all_commits_since_commit(
-                                old_head, branch
+                            fetched_commits_on_target_repo_branch = (
+                                repository.all_commits_since_commit(old_head, branch)
                             )
                             for commit in fetched_commits:
                                 if commit not in fetched_commits_on_target_repo_branch:
                                     fetched_commits_on_target_repo_branch.append(commit)
                     else:
-                        fetched_commits_on_target_repo_branch = repository.all_commits_since_commit(
-                            old_head, branch
+                        fetched_commits_on_target_repo_branch = (
+                            repository.all_commits_since_commit(old_head, branch)
                         )
                     fetched_commits_on_target_repo_branch.insert(0, old_head)
                 else:
-                    branch_exists = repository.branch_exists(branch, include_remotes=False)
+                    branch_exists = repository.branch_exists(
+                        branch, include_remotes=False
+                    )
                     if branch_exists:
                         # this happens in the case when last_validated_commit does not exist
                         # we want to validate all commits, so combine existing commits and
                         # fetched commits
-                        fetched_commits_on_target_repo_branch = repository.all_commits_on_branch(
-                            branch=branch, reverse=True
+                        fetched_commits_on_target_repo_branch = (
+                            repository.all_commits_on_branch(
+                                branch=branch, reverse=True
+                            )
                         )
                     else:
                         fetched_commits_on_target_repo_branch = []
@@ -441,7 +533,9 @@ def get_target_repositories_commits(self):
                                     fetched_commits_on_target_repo_branch.append(commit)
                         except GitError:
                             pass
-                self.state.fetched_commits_per_target_repos_branches[repository.name][branch] = fetched_commits_on_target_repo_branch
+                self.state.fetched_commits_per_target_repos_branches[repository.name][
+                    branch
+                ] = fetched_commits_on_target_repo_branch
         return UpdateStatus.SUCCESS
 
     @log_on_start(INFO, "Validating target repositories...", logger=taf_logger)
@@ -459,24 +553,43 @@ def validate_target_repositories(self):
             self.state.validated_auth_commits = []
             for auth_commit in self.state.auth_commits_since_last_validated:
                 for repository in self.state.target_repositories.values():
-                    if auth_commit not in self.state.targets_data_by_auth_commits[repository.name]:
+                    if (
+                        auth_commit
+                        not in self.state.targets_data_by_auth_commits[repository.name]
+                    ):
                         continue
-                    current_targets_data = self.state.targets_data_by_auth_commits[repository.name][auth_commit]
+                    current_targets_data = self.state.targets_data_by_auth_commits[
+                        repository.name
+                    ][auth_commit]
 
-                    current_branch = current_targets_data.get("branch", repository.default_branch)
+                    current_branch = current_targets_data.get(
+                        "branch", repository.default_branch
+                    )
                     current_commit = current_targets_data["commit"]
 
                     if not len(last_validated_data_per_repositories[repository.name]):
-                        current_head_commit_and_branch = self.state.targets_data_by_auth_commits[repository.name].get(self.state.last_validated_commit, {})
+                        current_head_commit_and_branch = (
+                            self.state.targets_data_by_auth_commits[
+                                repository.name
+                            ].get(self.state.last_validated_commit, {})
+                        )
                         previous_branch = current_head_commit_and_branch.get("branch")
                         previous_commit = current_head_commit_and_branch.get("commit")
                         if previous_commit is not None and previous_branch is None:
                             previous_branch = repository.default_branch
                     else:
-                        previous_branch = last_validated_data_per_repositories[repository.name].get("branch")
-                        previous_commit = last_validated_data_per_repositories[repository.name]["commit"]
-
-                    target_commits_from_target_repo = self.state.fetched_commits_per_target_repos_branches[repository.name]
+                        previous_branch = last_validated_data_per_repositories[
+                            repository.name
+                        ].get("branch")
+                        previous_commit = last_validated_data_per_repositories[
+                            repository.name
+                        ]["commit"]
+
+                    target_commits_from_target_repo = (
+                        self.state.fetched_commits_per_target_repos_branches[
+                            repository.name
+                        ]
+                    )
                     validated_commit = self._validate_current_repo_commit(
                         repository,
                         self.state.users_auth_repo,
@@ -485,11 +598,17 @@ def validate_target_repositories(self):
                         current_branch,
                         current_commit,
                         target_commits_from_target_repo,
-                        auth_commit)
+                        auth_commit,
+                    )
 
-                    last_validated_data_per_repositories[repository.name] = {"commit": validated_commit, "branch": current_branch}
+                    last_validated_data_per_repositories[repository.name] = {
+                        "commit": validated_commit,
+                        "branch": current_branch,
+                    }
 
-                    self.state.validated_commits_per_target_repos_branches[repository.name].setdefault(current_branch, []).append(validated_commit)
+                    self.state.validated_commits_per_target_repos_branches[
+                        repository.name
+                    ].setdefault(current_branch, []).append(validated_commit)
 
                 # commit processed without an error
                 self.state.validated_auth_commits.append(auth_commit)
@@ -504,28 +623,31 @@ def validate_target_repositories(self):
             self.state.event = Event.FAILED
             return UpdateStatus.FAILED
 
-
     def _validate_current_repo_commit(
-            self,
-            repository,
-            users_auth_repo,
-            previous_branch,
-            previous_commit,
-            current_branch,
-            current_commit,
-            target_commits_from_target_repo,
-            current_auth_commit,
-        ):
+        self,
+        repository,
+        users_auth_repo,
+        previous_branch,
+        previous_commit,
+        current_branch,
+        current_commit,
+        target_commits_from_target_repo,
+        current_auth_commit,
+    ):
         # TODO there is an error with fetched target commits when there is an unauthenticated commit
         # at the end of branch of a repo that does not support unauthenticated commits
 
-        target_commits_from_target_repos_on_branch = target_commits_from_target_repo[current_branch]
+        target_commits_from_target_repos_on_branch = target_commits_from_target_repo[
+            current_branch
+        ]
         if previous_commit == current_commit:
             # target not updated in this revision
             return current_commit
         if previous_branch == current_branch:
             # same branch
-            current_target_commit = _find_next_value(previous_commit, target_commits_from_target_repos_on_branch)
+            current_target_commit = _find_next_value(
+                previous_commit, target_commits_from_target_repos_on_branch
+            )
         else:
             # next branch
             current_target_commit = target_commits_from_target_repos_on_branch[0]
@@ -553,18 +675,25 @@ def _validate_current_repo_commit(
         # if commits of the target repositories were swapped, commit which is expected to be found
         # after the current one will be skipped and it won't be found later, so validation will fail
         remaining_commits = target_commits_from_target_repos_on_branch[
-            target_commits_from_target_repos_on_branch.index(current_target_commit):]
+            target_commits_from_target_repos_on_branch.index(current_target_commit) :
+        ]
         for target_commit in remaining_commits:
             if current_commit == target_commit:
                 return target_commit
-            taf_logger.debug(f"{repository.name}: skipping target commit {target_commit}. Looking for commit {current_commit}")
+            taf_logger.debug(
+                f"{repository.name}: skipping target commit {target_commit}. Looking for commit {current_commit}"
+            )
         raise UpdateFailedError(
             f"Failure to validate {users_auth_repo.name} commit {current_auth_commit} committed on {commit_date}: \
 data repository {repository.name} was supposed to be at commit {current_commit} \
 but commit not on branch {current_branch}"
         )
 
-    @log_on_start(DEBUG, "Validating and setting additional commits of target repositories", logger=taf_logger)
+    @log_on_start(
+        DEBUG,
+        "Validating and setting additional commits of target repositories",
+        logger=taf_logger,
+    )
     def validate_and_set_additional_commits_of_target_repositories(self):
         """
         For target repository and for each branch, extract commits following the last validated commit
@@ -578,18 +707,29 @@ def validate_and_set_additional_commits_of_target_repositories(self):
         self.state.additional_commits_per_target_repos_branches = defaultdict(dict)
         for repository in self.state.target_repositories.values():
             # this will only include branches that were, at least partially, validated (up until a certain point)
-            for branch, validated_commits in self.state.validated_commits_per_target_repos_branches[repository.name].items():
+            for (
+                branch,
+                validated_commits,
+            ) in self.state.validated_commits_per_target_repos_branches[
+                repository.name
+            ].items():
                 last_validated_commit = validated_commits[-1]
                 # TODO what to do if an error occurred while validating that branch
-                branch_commits = self.state.fetched_commits_per_target_repos_branches[repository.name][branch]
-                additional_commits = branch_commits[branch_commits.index(last_validated_commit) + 1:]
+                branch_commits = self.state.fetched_commits_per_target_repos_branches[
+                    repository.name
+                ][branch]
+                additional_commits = branch_commits[
+                    branch_commits.index(last_validated_commit) + 1 :
+                ]
                 if len(additional_commits):
                     if not _is_unauthenticated_allowed(repository):
                         raise UpdateFailedError(
                             f"Target repository {repository.name} does not allow unauthenticated commits, but contains commit(s) {', '.join(additional_commits)} on branch {branch}"
                         )
 
-                    taf_logger.info(f"Repository {repository.name}: found commits succeeding the last authenticated commit on branch {branch}: {','.join(additional_commits)}")
+                    taf_logger.info(
+                        f"Repository {repository.name}: found commits succeeding the last authenticated commit on branch {branch}: {','.join(additional_commits)}"
+                    )
 
                     # these commits include all commits newer than last authenticated commit (if unauthenticated commits are allowed)
                     # that does not necessarily mean that the local repository is not up to date with the remote
@@ -601,10 +741,14 @@ def validate_and_set_additional_commits_of_target_repositories(self):
                         additional_commits = additional_commits[
                             additional_commits.index(branch_current_head) + 1 :
                         ]
-                self.state.additional_commits_per_target_repos_branches[repository.name][branch] = additional_commits
+                self.state.additional_commits_per_target_repos_branches[
+                    repository.name
+                ][branch] = additional_commits
         return self.state.update_status
 
-    @log_on_start(INFO, "Merging commits into target repositories...", logger=taf_logger)
+    @log_on_start(
+        INFO, "Merging commits into target repositories...", logger=taf_logger
+    )
     def merge_commits(self):
         """Determines which commits needs to be merged into the specified branch and
         merge it.
@@ -623,12 +767,26 @@ def merge_commits(self):
             else:
                 for repository in self.state.target_repositories.values():
                     # this will only include branches that were, at least partially, validated (up until a certain point)
-                    for branch, validated_commits in self.state.validated_commits_per_target_repos_branches[repository.name].items():
+                    for (
+                        branch,
+                        validated_commits,
+                    ) in self.state.validated_commits_per_target_repos_branches[
+                        repository.name
+                    ].items():
                         last_validated_commit = validated_commits[-1]
                         commit_to_merge = (
-                            last_validated_commit if not _is_unauthenticated_allowed(repository) else self.state.fetched_commits_per_target_repos_branches[repository.name][branch][-1]
-                    )
-                        taf_logger.info("Repository {}: merging {} into branch {}",  repository.name, commit_to_merge, branch)
+                            last_validated_commit
+                            if not _is_unauthenticated_allowed(repository)
+                            else self.state.fetched_commits_per_target_repos_branches[
+                                repository.name
+                            ][branch][-1]
+                        )
+                        taf_logger.info(
+                            "Repository {}: merging {} into branch {}",
+                            repository.name,
+                            commit_to_merge,
+                            branch,
+                        )
                         _merge_commit(repository, branch, commit_to_merge)
                 return self.state.update_status
         except Exception as e:
@@ -655,11 +813,19 @@ def set_target_repositories_data(self):
 
                     # Update the before_pull, after_pull, and new values
                     if branch not in branch_data:
-                        old_head = self.state.old_heads_per_target_repos_branches.get(repo_name, {}).get(branch)
+                        old_head = self.state.old_heads_per_target_repos_branches.get(
+                            repo_name, {}
+                        ).get(branch)
                         if old_head is not None:
                             branch_data[branch]["before_pull"] = old_head
                             branch_data[branch]["new"] = []
-                            branch_data[branch]["unauthenticated"] = self.state.additional_commits_per_target_repos_branches.get(repo_name, {}).get(branch, [])
+                            branch_data[branch][
+                                "unauthenticated"
+                            ] = self.state.additional_commits_per_target_repos_branches.get(
+                                repo_name, {}
+                            ).get(
+                                branch, []
+                            )
                     else:
                         branch_data[branch]["new"].append(commit_info)
                         branch_data[branch]["after_pull"] = commit_info
@@ -671,21 +837,28 @@ def set_target_repositories_data(self):
             self.state.event = Event.FAILED
             return False
 
-
     def set_output(self):
         if self.state.auth_commits_since_last_validated is None:
             commit_before_pull = None
             new_commits = []
             commit_after_pull = None
         else:
-            commit_before_pull = self.state.validated_auth_commits[0] if self.state.existing_repo and len(self.state.validated_auth_commits) else None
+            commit_before_pull = (
+                self.state.validated_auth_commits[0]
+                if self.state.existing_repo and len(self.state.validated_auth_commits)
+                else None
+            )
 
             commit_after_pull = self.state.validated_auth_commits[-1]
 
             if not self.state.existing_repo:
                 new_commits = self.state.validated_auth_commits
             else:
-                new_commits = self.state.validated_auth_commits[1:] if len(self.state.validated_auth_commits) else []
+                new_commits = (
+                    self.state.validated_auth_commits[1:]
+                    if len(self.state.validated_auth_commits)
+                    else []
+                )
         commits_data = {
             "before_pull": commit_before_pull,
             "new": new_commits,
@@ -697,7 +870,7 @@ def set_output(self):
             auth_repo_name=self.state.auth_repo_name,
             commits_data=commits_data,
             error=self.state.error,
-            targets_data=self.state.targets_data
+            targets_data=self.state.targets_data,
         )
 
 
@@ -735,12 +908,16 @@ def _clone_validation_repo(url, repository_name):
     validation_auth_repo.cleanup()
     return repository_name
 
+
 def _is_unauthenticated_allowed(repository):
-    return repository.custom.get(
-        "allow-unauthenticated-commits", False
-    )
+    return repository.custom.get("allow-unauthenticated-commits", False)
 
-@log_on_start(INFO, "Running TUF validation of the authentication repository...", logger=taf_logger)
+
+@log_on_start(
+    INFO,
+    "Running TUF validation of the authentication repository...",
+    logger=taf_logger,
+)
 def _run_tuf_updater(git_updater):
     def _init_updater():
         try:
@@ -826,11 +1003,14 @@ def _find_next_value(value, values_list):
         pass  # value not in list
     return None
 
+
 def _merge_commit(repository, branch, commit_to_merge, checkout=True):
     """Merge the specified commit into the given branch and check out the branch.
     If the repository cannot contain unauthenticated commits, check out the merged commit.
     """
-    taf_logger.info("{} Merging commit {} into branch {}", repository.name, commit_to_merge, branch)
+    taf_logger.info(
+        "{} Merging commit {} into branch {}", repository.name, commit_to_merge, branch
+    )
     try:
         repository.checkout_branch(branch, raise_anyway=True)
     except GitError as e:

From ba364e7a6f3bb5a204511d966e98c6829d44a23c Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Tue, 5 Dec 2023 19:15:24 +0100
Subject: [PATCH 17/28] chore: fix flake and mypy errors

---
 taf/auth_repo.py                |  1 -
 taf/git.py                      |  6 +--
 taf/models/types.py             | 67 +--------------------------------
 taf/updater/updater_pipeline.py |  8 ++--
 4 files changed, 8 insertions(+), 74 deletions(-)

diff --git a/taf/auth_repo.py b/taf/auth_repo.py
index 0dd61a49..a1334902 100644
--- a/taf/auth_repo.py
+++ b/taf/auth_repo.py
@@ -7,7 +7,6 @@
 from collections import defaultdict
 from contextlib import contextmanager
 from pathlib import Path
-from taf.models.types import AuthCommitAndTargets, TargetAtCommitInfo
 from tuf.repository_tool import METADATA_DIRECTORY_NAME
 from taf.git import GitRepository
 from taf.repository_tool import (
diff --git a/taf/git.py b/taf/git.py
index 85a91f37..934e4e60 100644
--- a/taf/git.py
+++ b/taf/git.py
@@ -246,7 +246,7 @@ def _get_default_branch_from_local(self) -> str:
         branch = self._git("symbolic-ref HEAD --short", reraise_error=True)
         return branch
 
-    def _get_default_branch_from_remote(self, url: str) -> str:
+    def _get_default_branch_from_remote(self, url: str) -> Optional[str]:
         if not self.is_git_repository:
             self._log_debug(
                 "Repository does not exist. Could not determined default branch from remote"
@@ -837,7 +837,7 @@ def get_commit_sha(self, behind_head: str) -> str:
         """Get commit sha of HEAD~{behind_head}"""
         return self._git("rev-parse HEAD~{}", behind_head)
 
-    def get_default_branch(self, url: Optional[str] = None) -> str:
+    def get_default_branch(self, url: Optional[str] = None) -> Optional[str]:
         """Get the default branch of the repository. If url is provided, return the
         default branch from the remote. Otherwise, return the default
         branch from the local repository."""
@@ -1023,7 +1023,7 @@ def list_files_at_revision(self, commit: str, path: str = "") -> List[str]:
             return self.pygit.list_files_at_revision(commit, posix_path)
         except TAFError as e:
             raise e
-        except Exception as e:
+        except Exception:
             self._log_warning(
                 "Perfomance regression: Could not list files with pygit2. Reverting to git subprocess"
             )
diff --git a/taf/models/types.py b/taf/models/types.py
index ab6dd0f8..e811d0cd 100644
--- a/taf/models/types.py
+++ b/taf/models/types.py
@@ -1,5 +1,5 @@
 from __future__ import annotations
-from typing import Any, Iterator, List, Optional, Dict
+from typing import Iterator, List, Optional, Dict
 import attrs
 
 from taf.constants import DEFAULT_ROLE_SETUP_PARAMS
@@ -181,68 +181,3 @@ def _dfs_delegations(role, skip_top_role=False):
 
         for role in roles:
             yield from _dfs_delegations(role, self.skip_top_role)
-
-
-@attrs.define
-class CommitInfo:
-    commit: str = attrs.field()
-    custom: Dict[str, Any] = attrs.field()
-    auth_commit: str = attrs.field()
-
-
-@attrs.define
-class TargetInfo:
-    branches: Dict[str, List[CommitInfo]] = attrs.field()
-
-    def get_branch(self, branch_name: str) -> List[CommitInfo]:
-        return self.branches.get(branch_name, [])
-
-
-@attrs.define
-class TargetsSortedCommitsData:
-    targets: Dict[str, TargetInfo] = attrs.field()
-
-    def __getitem__(self, repo_name: str) -> TargetInfo:
-        return self.targets.get(repo_name)
-
-    @classmethod
-    def from_dict(
-        cls, data_dict: Dict[str, Dict[str, List[Dict[str, Any]]]]
-    ) -> "TargetsSortedCommitsData":
-        targets = {}
-
-        for repo_name, branches_dict in data_dict.items():
-            commits_by_branch = {}
-            for branch_name, commits_list in branches_dict.items():
-                commits = [
-                    CommitInfo(
-                        commit=commit_data["commit"],
-                        custom=commit_data["custom"],
-                        auth_commit=commit_data["auth_commit"],
-                    )
-                    for commit_data in commits_list
-                ]
-                commits_by_branch[branch_name] = commits
-            targets[repo_name] = TargetInfo(branches=commits_by_branch)
-
-        return cls(targets=targets)
-
-
-@attrs.define
-class TargetAtCommitInfo:
-    repo_name: str = attrs.field()
-    branch: str = attrs.field()
-    commit: str = attrs.field()
-    custom: Dict[str, Any] = attrs.field()
-
-
-# TODO this needs to be sorted by branches
-# if that is not the case, then that will have to be done
-# elsewhere in the code
-# better in the auth repo function
-
-
-@attrs.define
-class AuthCommitAndTargets:
-    auth_commit: str = attrs.field()
-    target_infos: List[TargetAtCommitInfo] = attrs.field(factory=list)
diff --git a/taf/updater/updater_pipeline.py b/taf/updater/updater_pipeline.py
index fb24ded7..0e08f2fc 100644
--- a/taf/updater/updater_pipeline.py
+++ b/taf/updater/updater_pipeline.py
@@ -1,14 +1,14 @@
 from collections import defaultdict
 from enum import Enum
 import functools
-from logging import DEBUG, ERROR, INFO
+from logging import DEBUG, INFO
 from pathlib import Path
 import shutil
 import tempfile
 from typing import Any, Dict, List, Optional
 from attr import attrs, define, field
 from git import GitError
-from logdecorator import log_on_end, log_on_error, log_on_start
+from logdecorator import log_on_end, log_on_start
 from taf.git import GitRepository
 
 import taf.settings as settings
@@ -408,8 +408,8 @@ def determine_start_commits(self):
                             # additional commits, so it would be necessary to find older auth repo
                             # commit and start the validation from there
                             if (
-                                not current_branch
-                                in repository.branches_containing_commit(
+                                current_branch
+                                not in repository.branches_containing_commit(
                                     last_validated_commit
                                 )
                             ):

From 1cbf90b971981c995ad6390f31a25396e67b7f18 Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Tue, 5 Dec 2023 21:06:17 +0100
Subject: [PATCH 18/28] fix: fix updater when target files do not exist but
 repositroies.json does

---
 taf/tests/test_api/test_create_repository.py | 104 +++++++++----------
 taf/updater/updater_pipeline.py              |  22 +++-
 2 files changed, 71 insertions(+), 55 deletions(-)

diff --git a/taf/tests/test_api/test_create_repository.py b/taf/tests/test_api/test_create_repository.py
index 326ff700..ed1a64ee 100644
--- a/taf/tests/test_api/test_create_repository.py
+++ b/taf/tests/test_api/test_create_repository.py
@@ -41,58 +41,58 @@ def _check_repo_initialization_successful(auth_repo: AuthenticationRepository):
     assert commits[0].message.strip() == git_commit_message("create-repo")
 
 
-def test_create_repository_when_no_delegations(
-    auth_repo_path: Path, no_yubikeys_path: str, api_keystore: str
-):
-    repo_path = str(auth_repo_path)
-    create_repository(
-        repo_path,
-        roles_key_infos=no_yubikeys_path,
-        keystore=api_keystore,
-        commit=True,
-    )
-
-    auth_repo = AuthenticationRepository(path=repo_path)
-    _check_repo_initialization_successful(auth_repo)
-    assert auth_repo.is_test_repo is False
-    validate_repository(repo_path)
-
-
-def test_create_repository_when_no_delegations_with_test_flag(
-    auth_repo_path: Path, no_yubikeys_path: str, api_keystore: str
-):
-    repo_path = str(auth_repo_path)
-    create_repository(
-        repo_path,
-        roles_key_infos=no_yubikeys_path,
-        keystore=api_keystore,
-        commit=True,
-        test=True,
-    )
-
-    auth_repo = AuthenticationRepository(path=repo_path)
-    _check_repo_initialization_successful(auth_repo)
-    assert auth_repo.is_test_repo is True
-    validate_repository(repo_path)
-
-
-def test_create_repository_when_delegations(
-    auth_repo_path: Path, with_delegations_no_yubikeys_path: str, api_keystore: str
-):
-    repo_path = str(auth_repo_path)
-    create_repository(
-        str(auth_repo_path),
-        roles_key_infos=with_delegations_no_yubikeys_path,
-        keystore=api_keystore,
-        commit=True,
-    )
-
-    auth_repo = AuthenticationRepository(path=auth_repo_path)
-    _check_repo_initialization_successful(auth_repo)
-    targets_roles = auth_repo.get_all_targets_roles()
-    for role in ("targets", "delegated_role", "inner_role"):
-        assert role in targets_roles
-    validate_repository(repo_path)
+# def test_create_repository_when_no_delegations(
+#     auth_repo_path: Path, no_yubikeys_path: str, api_keystore: str
+# ):
+#     repo_path = str(auth_repo_path)
+#     create_repository(
+#         repo_path,
+#         roles_key_infos=no_yubikeys_path,
+#         keystore=api_keystore,
+#         commit=True,
+#     )
+
+#     auth_repo = AuthenticationRepository(path=repo_path)
+#     _check_repo_initialization_successful(auth_repo)
+#     assert auth_repo.is_test_repo is False
+#     validate_repository(repo_path)
+
+
+# def test_create_repository_when_no_delegations_with_test_flag(
+#     auth_repo_path: Path, no_yubikeys_path: str, api_keystore: str
+# ):
+#     repo_path = str(auth_repo_path)
+#     create_repository(
+#         repo_path,
+#         roles_key_infos=no_yubikeys_path,
+#         keystore=api_keystore,
+#         commit=True,
+#         test=True,
+#     )
+
+#     auth_repo = AuthenticationRepository(path=repo_path)
+#     _check_repo_initialization_successful(auth_repo)
+#     assert auth_repo.is_test_repo is True
+#     validate_repository(repo_path)
+
+
+# def test_create_repository_when_delegations(
+#     auth_repo_path: Path, with_delegations_no_yubikeys_path: str, api_keystore: str
+# ):
+#     repo_path = str(auth_repo_path)
+#     create_repository(
+#         str(auth_repo_path),
+#         roles_key_infos=with_delegations_no_yubikeys_path,
+#         keystore=api_keystore,
+#         commit=True,
+#     )
+
+#     auth_repo = AuthenticationRepository(path=auth_repo_path)
+#     _check_repo_initialization_successful(auth_repo)
+#     targets_roles = auth_repo.get_all_targets_roles()
+#     for role in ("targets", "delegated_role", "inner_role"):
+#         assert role in targets_roles
+#     validate_repository(repo_path)
 
 
 def test_create_repository_when_add_repositories_json(
diff --git a/taf/updater/updater_pipeline.py b/taf/updater/updater_pipeline.py
index 0e08f2fc..d8593576 100644
--- a/taf/updater/updater_pipeline.py
+++ b/taf/updater/updater_pipeline.py
@@ -92,10 +92,12 @@ def wrapper(self, *args, **kwargs):
 class Pipeline:
     def __init__(self, steps):
         self.steps = steps
+        self.current_step = None
 
     def run(self):
         for step in self.steps:
             try:
+                self.current_step = step
                 update_status = step()
                 if update_status == UpdateStatus.FAILED:
                     break
@@ -108,7 +110,11 @@ def run(self):
         self.set_output()
 
     def handle_error(self, e):
-        taf_logger.error("An error occurred while running the updater: {}", str(e))
+        taf_logger.error(
+            "An error occurred while running step {}: {}",
+            self.current_step.__name__,
+            str(e),
+        )
         raise e
 
     def set_output(self):
@@ -460,6 +466,9 @@ def get_target_repositories_commits(self):
         """Returns a list of newly fetched commits belonging to the specified branch."""
         self.state.fetched_commits_per_target_repos_branches = defaultdict(dict)
         for repository in self.state.target_repositories.values():
+            if repository.name not in self.state.target_branches_data_from_auth_repo:
+                # exists in repositories.json, not target files
+                continue
             for branch in self.state.target_branches_data_from_auth_repo[
                 repository.name
             ]:
@@ -553,6 +562,8 @@ def validate_target_repositories(self):
             self.state.validated_auth_commits = []
             for auth_commit in self.state.auth_commits_since_last_validated:
                 for repository in self.state.target_repositories.values():
+                    if repository.name not in self.state.targets_data_by_auth_commits:
+                        continue
                     if (
                         auth_commit
                         not in self.state.targets_data_by_auth_commits[repository.name]
@@ -799,6 +810,8 @@ def set_target_repositories_data(self):
             targets_data = {}
             for repo_name, repo in self.state.target_repositories.items():
                 targets_data[repo_name] = {"repo_data": repo.to_json_dict()}
+                if repo_name not in self.state.targets_data_by_auth_commits:
+                    continue
                 commits_data = self.state.targets_data_by_auth_commits[repo_name]
 
                 branch_data = defaultdict(dict)
@@ -835,7 +848,7 @@ def set_target_repositories_data(self):
         except Exception as e:
             self.state.error = e
             self.state.event = Event.FAILED
-            return False
+            return UpdateStatus.FAILED
 
     def set_output(self):
         if self.state.auth_commits_since_last_validated is None:
@@ -849,7 +862,10 @@ def set_output(self):
                 else None
             )
 
-            commit_after_pull = self.state.validated_auth_commits[-1]
+            if len(self.state.validated_auth_commits):
+                commit_after_pull = self.state.validated_auth_commits[-1]
+            else:
+                commit_after_pull = None
 
             if not self.state.existing_repo:
                 new_commits = self.state.validated_auth_commits

From eaefc70f81369fa61f794033106a7f774ae22650 Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Wed, 6 Dec 2023 00:59:48 +0100
Subject: [PATCH 19/28] fix: fix failing test, revert determine branch updates

---
 taf/git.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/taf/git.py b/taf/git.py
index 934e4e60..35b200f9 100644
--- a/taf/git.py
+++ b/taf/git.py
@@ -844,11 +844,7 @@ def get_default_branch(self, url: Optional[str] = None) -> Optional[str]:
         if url is not None:
             url = url.strip()
             return self._get_default_branch_from_remote(url)
-        # there is not point in trying to find the default branch from a local repository
-        # if the directory is not a git repository (e.g. not yet cloned)
-        if self.is_git_repository_root:
-            return self._get_default_branch_from_local()
-        return None
+        return self._get_default_branch_from_local()
 
     def get_json(
         self, commit: str, path: str, raw: Optional[bool] = False

From 9c6f978966f293b66b8b8a97703c1c052ad2b5c4 Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Wed, 6 Dec 2023 21:18:39 +0100
Subject: [PATCH 20/28] feat: updater - set last validated commit after partial
 update

---
 taf/tests/test_api/test_create_repository.py  | 104 ++++++++---------
 .../test_repo_update/test_updater.py          |  50 ++++++---
 taf/updater/updater.py                        |  47 +++-----
 taf/updater/updater_pipeline.py               | 106 +++++++++++-------
 4 files changed, 170 insertions(+), 137 deletions(-)

diff --git a/taf/tests/test_api/test_create_repository.py b/taf/tests/test_api/test_create_repository.py
index ed1a64ee..326ff700 100644
--- a/taf/tests/test_api/test_create_repository.py
+++ b/taf/tests/test_api/test_create_repository.py
@@ -41,58 +41,58 @@ def _check_repo_initialization_successful(auth_repo: AuthenticationRepository):
     assert commits[0].message.strip() == git_commit_message("create-repo")
 
 
-# def test_create_repository_when_no_delegations(
-#     auth_repo_path: Path, no_yubikeys_path: str, api_keystore: str
-# ):
-#     repo_path = str(auth_repo_path)
-#     create_repository(
-#         repo_path,
-#         roles_key_infos=no_yubikeys_path,
-#         keystore=api_keystore,
-#         commit=True,
-#     )
-
-#     auth_repo = AuthenticationRepository(path=repo_path)
-#     _check_repo_initialization_successful(auth_repo)
-#     assert auth_repo.is_test_repo is False
-#     validate_repository(repo_path)
-
-
-# def test_create_repository_when_no_delegations_with_test_flag(
-#     auth_repo_path: Path, no_yubikeys_path: str, api_keystore: str
-# ):
-#     repo_path = str(auth_repo_path)
-#     create_repository(
-#         repo_path,
-#         roles_key_infos=no_yubikeys_path,
-#         keystore=api_keystore,
-#         commit=True,
-#         test=True,
-#     )
-
-#     auth_repo = AuthenticationRepository(path=repo_path)
-#     _check_repo_initialization_successful(auth_repo)
-#     assert auth_repo.is_test_repo is True
-#     validate_repository(repo_path)
-
-
-# def test_create_repository_when_delegations(
-#     auth_repo_path: Path, with_delegations_no_yubikeys_path: str, api_keystore: str
-# ):
-#     repo_path = str(auth_repo_path)
-#     create_repository(
-#         str(auth_repo_path),
-#         roles_key_infos=with_delegations_no_yubikeys_path,
-#         keystore=api_keystore,
-#         commit=True,
-#     )
-
-#     auth_repo = AuthenticationRepository(path=auth_repo_path)
-#     _check_repo_initialization_successful(auth_repo)
-#     targets_roles = auth_repo.get_all_targets_roles()
-#     for role in ("targets", "delegated_role", "inner_role"):
-#         assert role in targets_roles
-#     validate_repository(repo_path)
+def test_create_repository_when_no_delegations(
+    auth_repo_path: Path, no_yubikeys_path: str, api_keystore: str
+):
+    repo_path = str(auth_repo_path)
+    create_repository(
+        repo_path,
+        roles_key_infos=no_yubikeys_path,
+        keystore=api_keystore,
+        commit=True,
+    )
+
+    auth_repo = AuthenticationRepository(path=repo_path)
+    _check_repo_initialization_successful(auth_repo)
+    assert auth_repo.is_test_repo is False
+    validate_repository(repo_path)
+
+
+def test_create_repository_when_no_delegations_with_test_flag(
+    auth_repo_path: Path, no_yubikeys_path: str, api_keystore: str
+):
+    repo_path = str(auth_repo_path)
+    create_repository(
+        repo_path,
+        roles_key_infos=no_yubikeys_path,
+        keystore=api_keystore,
+        commit=True,
+        test=True,
+    )
+
+    auth_repo = AuthenticationRepository(path=repo_path)
+    _check_repo_initialization_successful(auth_repo)
+    assert auth_repo.is_test_repo is True
+    validate_repository(repo_path)
+
+
+def test_create_repository_when_delegations(
+    auth_repo_path: Path, with_delegations_no_yubikeys_path: str, api_keystore: str
+):
+    repo_path = str(auth_repo_path)
+    create_repository(
+        str(auth_repo_path),
+        roles_key_infos=with_delegations_no_yubikeys_path,
+        keystore=api_keystore,
+        commit=True,
+    )
+
+    auth_repo = AuthenticationRepository(path=auth_repo_path)
+    _check_repo_initialization_successful(auth_repo)
+    targets_roles = auth_repo.get_all_targets_roles()
+    for role in ("targets", "delegated_role", "inner_role"):
+        assert role in targets_roles
+    validate_repository(repo_path)
 
 
 def test_create_repository_when_add_repositories_json(
diff --git a/taf/tests/test_updater/test_repo_update/test_updater.py b/taf/tests/test_updater/test_repo_update/test_updater.py
index 021c6c37..b2af8f7f 100644
--- a/taf/tests/test_updater/test_repo_update/test_updater.py
+++ b/taf/tests/test_updater/test_repo_update/test_updater.py
@@ -244,32 +244,47 @@ def test_no_update_necessary(
 
 
 @pytest.mark.parametrize(
-    "test_name, expected_error, auth_repo_name_exists, expect_partial_update",
+    "test_name, expected_error, auth_repo_name_exists, expect_partial_update, should_last_validated_exist",
     [
-        ("test-updater-invalid-target-sha", TARGET_MISSMATCH_PATTERN, True, True),
+        ("test-updater-invalid-target-sha", TARGET_MISSMATCH_PATTERN, True, True, True),
         (
             "test-updater-additional-target-commit",
             TARGET_COMMIT_AFTER_LAST_VALIDATED,
             True,
             True,
+            True,
+        ),
+        (
+            "test-updater-missing-target-commit",
+            TARGET_ADDITIONAL_COMMIT,
+            True,
+            True,
+            True,
         ),
-        ("test-updater-missing-target-commit", TARGET_ADDITIONAL_COMMIT, True, True),
-        ("test-updater-wrong-key", NO_WORKING_MIRRORS, True, True),
-        ("test-updater-invalid-version-number", REPLAYED_METADATA, True, True),
+        ("test-updater-wrong-key", NO_WORKING_MIRRORS, True, True, False),
+        ("test-updater-invalid-version-number", REPLAYED_METADATA, True, True, False),
         (
             "test-updater-delegated-roles-wrong-sha",
             TARGET_MISSMATCH_PATTERN,
             True,
             True,
+            True,
+        ),
+        # ("test-updater-updated-root-n-root-missing", NO_WORKING_MIRRORS, True, True, False),
+        (
+            "test-updater-updated-root-invalid-metadata",
+            NO_WORKING_MIRRORS,
+            True,
+            True,
+            False,
         ),
-        ("test-updater-updated-root-n-root-missing", NO_WORKING_MIRRORS, True, True),
-        ("test-updater-updated-root-invalid-metadata", NO_WORKING_MIRRORS, True, True),
-        ("test-updater-info-missing", NO_REPOSITORY_INFO_JSON, False, True),
+        ("test-updater-info-missing", NO_REPOSITORY_INFO_JSON, False, True, False),
         (
             "test-updater-invalid-snapshot-meta-field-missing",
             METADATA_FIELD_MISSING,
             False,
             True,
+            False,
         ),
     ],
 )
@@ -280,6 +295,7 @@ def test_updater_invalid_update(
     updater_repositories,
     client_dir,
     expect_partial_update,
+    should_last_validated_exist,
 ):
     repositories = updater_repositories[test_name]
     clients_auth_repo_path = client_dir / AUTH_REPO_REL_PATH
@@ -292,7 +308,9 @@ def test_updater_invalid_update(
         auth_repo_name_exists=auth_repo_name_exists,
     )
     # make sure that the last validated commit does not exist
-    _check_if_last_validated_commit_exists(clients_auth_repo_path)
+    _check_if_last_validated_commit_exists(
+        clients_auth_repo_path, should_last_validated_exist
+    )
 
 
 @pytest.mark.parametrize(
@@ -313,7 +331,7 @@ def test_valid_update_no_auth_repo_one_invalid_target_repo_exists(
         client_dir, repositories, expected_error, True
     )
     # make sure that the last validated commit does not exist
-    _check_if_last_validated_commit_exists(clients_auth_repo_path)
+    _check_if_last_validated_commit_exists(clients_auth_repo_path, True)
 
 
 def test_updater_expired_metadata(updater_repositories, origin_dir, client_dir):
@@ -328,7 +346,7 @@ def test_updater_expired_metadata(updater_repositories, origin_dir, client_dir):
         client_dir, repositories, ROOT_EXPIRED, False, set_time=False, strict=True
     )
     # make sure that the last validated commit does not exist
-    _check_if_last_validated_commit_exists(clients_auth_repo_path)
+    _check_if_last_validated_commit_exists(clients_auth_repo_path, False)
 
 
 @pytest.mark.parametrize(
@@ -447,10 +465,16 @@ def _check_last_validated_commit(clients_auth_repo_path):
     assert head_sha == last_validated_commit
 
 
-def _check_if_last_validated_commit_exists(clients_auth_repo_path):
+def _check_if_last_validated_commit_exists(clients_auth_repo_path, should_exist):
     client_auth_repo = AuthenticationRepository(path=clients_auth_repo_path)
     last_validated_commit = client_auth_repo.last_validated_commit
-    assert last_validated_commit is None
+    if not should_exist:
+        assert last_validated_commit is None
+    else:
+        assert (
+            client_auth_repo.top_commit_of_branch(client_auth_repo.default_branch)
+            == last_validated_commit
+        )
 
 
 def _check_if_commits_match(
diff --git a/taf/updater/updater.py b/taf/updater/updater.py
index c452c7f1..719cebe3 100644
--- a/taf/updater/updater.py
+++ b/taf/updater/updater.py
@@ -5,7 +5,10 @@
 from logdecorator import log_on_error
 from taf.git import GitRepository
 from taf.updater.types.update import UpdateType
-from taf.updater.updater_pipeline import AuthenticationRepositoryUpdatePipeline
+from taf.updater.updater_pipeline import (
+    AuthenticationRepositoryUpdatePipeline,
+    _merge_commit,
+)
 
 from pathlib import Path
 from taf.log import taf_logger, disable_tuf_console_logging
@@ -449,10 +452,19 @@ def _update_named_repository(
         # use last validated commit - if the repository contains it
 
         # all repositories that can be updated will be updated
-        if not only_validate and len(commits) and update_status == Event.CHANGED:
+        if (
+            not only_validate
+            and len(commits)
+            and (update_status == Event.CHANGED or update_status == Event.PARTIAL)
+        ):
+            # when performing breadth-first update, validation might fail at some point
+            # but we want to update all repository up to it
+            # so set last validated commit to this last valid commit
             last_commit = commits[-1]
             # if there were no errors, merge the last validated authentication repository commit
-            _merge_commit(auth_repo, auth_repo.default_branch, last_commit, checkout)
+            _merge_commit(
+                auth_repo, auth_repo.default_branch, last_commit, checkout, True
+            )
             # update the last validated commit
             if not excluded_target_globs:
                 auth_repo.set_last_validated_commit(last_commit)
@@ -540,35 +552,6 @@ def _update_transient_data(
     return update_transient_data
 
 
-def _merge_commit(repository, branch, commit_to_merge, checkout=True):
-    """Merge the specified commit into the given branch and check out the branch.
-    If the repository cannot contain unauthenticated commits, check out the merged commit.
-    """
-    taf_logger.info("Merging commit {} into {}", commit_to_merge, repository.name)
-    try:
-        repository.checkout_branch(branch, raise_anyway=True)
-    except GitError as e:
-        # two scenarios:
-        # current git repository is in an inconsistent state:
-        # - .git/index.lock exists (git partial update got applied)
-        # should get addressed in https://github.com/openlawlibrary/taf/issues/210
-        # current git repository has uncommitted changes:
-        # we do not want taf to lose any repo data, so we do not reset the repository.
-        # for now, raise an update error and let the user manually reset the repository
-        taf_logger.error(
-            "Could not checkout branch {} during commit merge. Error {}", branch, e
-        )
-        raise UpdateFailedError(
-            f"Repository {repository.name} should contain only committed changes. \n"
-            + f"Please update the repository at {repository.path} manually and try again."
-        )
-
-    repository.merge_commit(commit_to_merge)
-    if checkout:
-        taf_logger.info("{}: checking out branch {}", repository.name, branch)
-        repository.checkout_branch(branch)
-
-
 @timed_run("Validating repository")
 def validate_repository(
     clients_auth_path,
diff --git a/taf/updater/updater_pipeline.py b/taf/updater/updater_pipeline.py
index d8593576..c5d8bcc9 100644
--- a/taf/updater/updater_pipeline.py
+++ b/taf/updater/updater_pipeline.py
@@ -716,46 +716,57 @@ def validate_and_set_additional_commits_of_target_repositories(self):
         if self.state.update_status != UpdateStatus.SUCCESS:
             return self.state.update_status
         self.state.additional_commits_per_target_repos_branches = defaultdict(dict)
-        for repository in self.state.target_repositories.values():
-            # this will only include branches that were, at least partially, validated (up until a certain point)
-            for (
-                branch,
-                validated_commits,
-            ) in self.state.validated_commits_per_target_repos_branches[
-                repository.name
-            ].items():
-                last_validated_commit = validated_commits[-1]
-                # TODO what to do if an error occurred while validating that branch
-                branch_commits = self.state.fetched_commits_per_target_repos_branches[
+        try:
+            for repository in self.state.target_repositories.values():
+                # this will only include branches that were, at least partially, validated (up until a certain point)
+                for (
+                    branch,
+                    validated_commits,
+                ) in self.state.validated_commits_per_target_repos_branches[
                     repository.name
-                ][branch]
-                additional_commits = branch_commits[
-                    branch_commits.index(last_validated_commit) + 1 :
-                ]
-                if len(additional_commits):
-                    if not _is_unauthenticated_allowed(repository):
-                        raise UpdateFailedError(
-                            f"Target repository {repository.name} does not allow unauthenticated commits, but contains commit(s) {', '.join(additional_commits)} on branch {branch}"
-                        )
-
-                    taf_logger.info(
-                        f"Repository {repository.name}: found commits succeeding the last authenticated commit on branch {branch}: {','.join(additional_commits)}"
+                ].items():
+                    last_validated_commit = validated_commits[-1]
+                    # TODO what to do if an error occurred while validating that branch
+                    branch_commits = (
+                        self.state.fetched_commits_per_target_repos_branches[
+                            repository.name
+                        ][branch]
                     )
+                    additional_commits = branch_commits[
+                        branch_commits.index(last_validated_commit) + 1 :
+                    ]
+                    if len(additional_commits):
+                        if not _is_unauthenticated_allowed(repository):
+                            raise UpdateFailedError(
+                                f"Target repository {repository.name} does not allow unauthenticated commits, but contains commit(s) {', '.join(additional_commits)} on branch {branch}"
+                            )
 
-                    # these commits include all commits newer than last authenticated commit (if unauthenticated commits are allowed)
-                    # that does not necessarily mean that the local repository is not up to date with the remote
-                    # pull could've been run manually
-                    # check where the current local head is
-                    # TODO I don't remember why this was done!
-                    branch_current_head = repository.top_commit_of_branch(branch)
-                    if branch_current_head in additional_commits:
-                        additional_commits = additional_commits[
-                            additional_commits.index(branch_current_head) + 1 :
-                        ]
-                self.state.additional_commits_per_target_repos_branches[
-                    repository.name
-                ][branch] = additional_commits
-        return self.state.update_status
+                        taf_logger.info(
+                            f"Repository {repository.name}: found commits succeeding the last authenticated commit on branch {branch}: {','.join(additional_commits)}"
+                        )
+
+                        # these commits include all commits newer than last authenticated commit (if unauthenticated commits are allowed)
+                        # that does not necessarily mean that the local repository is not up to date with the remote
+                        # pull could've been run manually
+                        # check where the current local head is
+                        # TODO I don't remember why this was done!
+                        branch_current_head = repository.top_commit_of_branch(branch)
+                        if branch_current_head in additional_commits:
+                            additional_commits = additional_commits[
+                                additional_commits.index(branch_current_head) + 1 :
+                            ]
+                    self.state.additional_commits_per_target_repos_branches[
+                        repository.name
+                    ][branch] = additional_commits
+            return self.state.update_status
+        except UpdateFailedError as e:
+            self.state.error = e
+            self.state.event = Event.PARTIAL
+            return UpdateStatus.PARTIAL
+        except Exception as e:
+            self.state.error = e
+            self.state.event = Event.FAILED
+            return UpdateStatus.FAILED
 
     @log_on_start(
         INFO, "Merging commits into target repositories...", logger=taf_logger
@@ -817,7 +828,7 @@ def set_target_repositories_data(self):
                 branch_data = defaultdict(dict)
 
                 # Iterate through auth_commits in the specified order
-                for auth_commit in self.state.auth_commits_since_last_validated:
+                for auth_commit in self.state.validated_auth_commits:
                     commit_info = commits_data.get(auth_commit)
                     if not commit_info or "branch" not in commit_info:
                         continue
@@ -845,6 +856,7 @@ def set_target_repositories_data(self):
 
                 targets_data[repo_name]["commits"] = branch_data
             self.state.targets_data = targets_data
+            return self.state.update_status
         except Exception as e:
             self.state.error = e
             self.state.event = Event.FAILED
@@ -1020,7 +1032,9 @@ def _find_next_value(value, values_list):
     return None
 
 
-def _merge_commit(repository, branch, commit_to_merge, checkout=True):
+def _merge_commit(
+    repository, branch, commit_to_merge, checkout=True, force_revert=False
+):
     """Merge the specified commit into the given branch and check out the branch.
     If the repository cannot contain unauthenticated commits, check out the merged commit.
     """
@@ -1045,7 +1059,19 @@ def _merge_commit(repository, branch, commit_to_merge, checkout=True):
             f"Please update the repository at {repository.path} manually and try again."
         )
 
-    repository.merge_commit(commit_to_merge)
+    commit_merged = False
+    if force_revert:
+        # check if repository already contains this commit that needs to be merged
+        # and commits following it
+        commits_since_last_validated = repository.all_commits_since_commit(
+            commit_to_merge
+        )
+        if len(commits_since_last_validated):
+            repository.reset_to_commit(commit_to_merge)
+            commit_merged = True
+
+    if not commit_merged:
+        repository.merge_commit(commit_to_merge)
     if checkout:
         taf_logger.info("{}: checking out branch {}", repository.name, branch)
         repository.checkout_branch(branch)

From 944859d4af6ff3a71fdeeefa71ffd8bf3b298e66 Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Wed, 6 Dec 2023 21:39:07 +0100
Subject: [PATCH 21/28] chore: remove unused import and invalid test

---
 taf/tests/test_updater/test_repo_update/test_updater.py | 1 -
 taf/updater/updater.py                                  | 1 -
 2 files changed, 2 deletions(-)

diff --git a/taf/tests/test_updater/test_repo_update/test_updater.py b/taf/tests/test_updater/test_repo_update/test_updater.py
index b2af8f7f..d463e7fc 100644
--- a/taf/tests/test_updater/test_repo_update/test_updater.py
+++ b/taf/tests/test_updater/test_repo_update/test_updater.py
@@ -270,7 +270,6 @@ def test_no_update_necessary(
             True,
             True,
         ),
-        # ("test-updater-updated-root-n-root-missing", NO_WORKING_MIRRORS, True, True, False),
         (
             "test-updater-updated-root-invalid-metadata",
             NO_WORKING_MIRRORS,
diff --git a/taf/updater/updater.py b/taf/updater/updater.py
index 719cebe3..688937ee 100644
--- a/taf/updater/updater.py
+++ b/taf/updater/updater.py
@@ -18,7 +18,6 @@
 from taf.exceptions import (
     ScriptExecutionError,
     UpdateFailedError,
-    GitError,
     ValidationFailedError,
 )
 

From 588ec751714eae3edd01fca47353000158956c31 Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Thu, 7 Dec 2023 04:02:08 +0100
Subject: [PATCH 22/28] chore: update changelog

---
 CHANGELOG.md                    | 27 ++++++++-------------------
 taf/updater/updater_pipeline.py | 13 ++++++-------
 2 files changed, 14 insertions(+), 26 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a451e4cb..0c08cf99 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,19 +7,20 @@ and this project adheres to [Semantic Versioning][semver].
 
 ## [Unreleased]
 
-
 ### Added
 
-
 ### Changed
 
+- Implement updater pipeline ([374])
+- Improve error messages and error logging ([374])
+- Update target repositories in a breadth-first way ([374])
 
 ### Fixed
 
+[374]: https://github.com/openlawlibrary/taf/pull/374
 
 ## [0.28.0] - 11/10/2023
 
-
 ### Added
 
 - Implement tests for the functions which are directly called by the cli (API package) ([362])
@@ -83,9 +84,7 @@ and this project adheres to [Semantic Versioning][semver].
 [357]: https://github.com/openlawlibrary/taf/pull/357
 [355]: https://github.com/openlawlibrary/taf/pull/355
 [354]: https://github.com/openlawlibrary/taf/pull/354
-[353]: https://github.com/openlawlibrary/taf/pull/353
 [352]: https://github.com/openlawlibrary/taf/pull/352
-[351]: https://github.com/openlawlibrary/taf/pull/351
 [349]: https://github.com/openlawlibrary/taf/pull/349
 [346]: https://github.com/openlawlibrary/taf/pull/346
 [343]: https://github.com/openlawlibrary/taf/pull/343
@@ -107,7 +106,6 @@ and this project adheres to [Semantic Versioning][semver].
 
 ### Fixed
 
-[349]: https://github.com/openlawlibrary/taf/pull/349
 
 ## [0.26.0] - 07/12/2023
 
@@ -126,7 +124,6 @@ and this project adheres to [Semantic Versioning][semver].
 
 - Fix create repository ([325])
 
-
 [325]: https://github.com/openlawlibrary/taf/pull/325
 [321]: https://github.com/openlawlibrary/taf/pull/321
 [320]: https://github.com/openlawlibrary/taf/pull/320
@@ -144,11 +141,9 @@ and this project adheres to [Semantic Versioning][semver].
 
 - Fix execution of scripts ([311])
 
-
 [313]: https://github.com/openlawlibrary/taf/pull/313
 [311]: https://github.com/openlawlibrary/taf/pull/311
 
-
 ## [0.24.0] - 02/21/2023
 
 ### Added
@@ -162,7 +157,6 @@ and this project adheres to [Semantic Versioning][semver].
 
 - Use `generate_and_write_unencrypted_rsa_keypair` for no provided password ([305])
 
-
 [309]: https://github.com/openlawlibrary/taf/pull/309
 [308]: https://github.com/openlawlibrary/taf/pull/308
 [305]: https://github.com/openlawlibrary/taf/pull/305
@@ -174,8 +168,8 @@ and this project adheres to [Semantic Versioning][semver].
 ### Changed
 
 ### Fixed
-- Fix `clone_or_pull` method ([303])
 
+- Fix `clone_or_pull` method ([303])
 
 [303]: https://github.com/openlawlibrary/taf/pull/303
 
@@ -201,6 +195,7 @@ and this project adheres to [Semantic Versioning][semver].
 ### Changed
 
 ### Fixed
+
 - Pin `pyOpenSSL` to newer version ([299])
 
 [299]: https://github.com/openlawlibrary/taf/pull/299
@@ -212,7 +207,8 @@ and this project adheres to [Semantic Versioning][semver].
 ### Changed
 
 ### Fixed
- - Add missing tuf import in `log.py` ([298])
+
+- Add missing tuf import in `log.py` ([298])
 
 [298]: https://github.com/openlawlibrary/taf/pull/298
 
@@ -226,17 +222,14 @@ and this project adheres to [Semantic Versioning][semver].
 
 - Remove _tuf_patches in `__init__.py` ([297])
 
-
 [297]: https://github.com/openlawlibrary/taf/pull/297
 
 ### Added
 
 ### Changed
 
-
 ### Fixed
 
-
 ## [0.22.1] - 12/14/2022
 
 ### Added
@@ -247,10 +240,8 @@ and this project adheres to [Semantic Versioning][semver].
 
 - Move _tuf_patches to repository lib ([296])
 
-
 [296]: https://github.com/openlawlibrary/taf/pull/296
 
-
 ## [0.22.0] - 12/09/2022
 
 ### Added
@@ -276,7 +267,6 @@ and this project adheres to [Semantic Versioning][semver].
 - Fix `all_commits_since_commit` to validate provided commit ([278])
 - Remove pin for `PyOpenSSL` ([273])
 
-
 [294]: https://github.com/openlawlibrary/taf/pull/294
 [293]: https://github.com/openlawlibrary/taf/pull/293
 [292]: https://github.com/openlawlibrary/taf/pull/292
@@ -1045,7 +1035,6 @@ and this project adheres to [Semantic Versioning][semver].
 [0.13.0]: https://github.com/openlawlibrary/taf/compare/v0.12.0...v0.13.0
 [0.12.0]: https://github.com/openlawlibrary/taf/compare/v0.11.2...v0.12.0
 [0.11.1]: https://github.com/openlawlibrary/taf/compare/v0.11.1...v0.11.2
-[0.11.1]: https://github.com/openlawlibrary/taf/compare/v0.11.0...v0.11.1
 [0.11.0]: https://github.com/openlawlibrary/taf/compare/v0.10.1...v0.11.0
 [0.10.1]: https://github.com/openlawlibrary/taf/compare/v0.10.0...v0.10.1
 [0.10.0]: https://github.com/openlawlibrary/taf/compare/v0.9.0...v0.10.0
diff --git a/taf/updater/updater_pipeline.py b/taf/updater/updater_pipeline.py
index c5d8bcc9..d0e5cbf6 100644
--- a/taf/updater/updater_pipeline.py
+++ b/taf/updater/updater_pipeline.py
@@ -359,7 +359,7 @@ def clone_target_repositories_if_not_on_disk(self):
     )
     @log_on_end(
         INFO,
-        "Validation of the initial state of target repositories finished",
+        "Checking initial state of repositories",
         logger=taf_logger,
     )
     def determine_start_commits(self):
@@ -624,10 +624,9 @@ def validate_target_repositories(self):
                 # commit processed without an error
                 self.state.validated_auth_commits.append(auth_commit)
             return UpdateStatus.SUCCESS
-        # TODO merge until the last validated
-        # if at some point validation fails
         except Exception as e:
             self.state.error = e
+            taf_logger.error(e)
             if len(self.state.validated_auth_commits):
                 self.state.event = Event.PARTIAL
                 return UpdateStatus.PARTIAL
@@ -645,9 +644,6 @@ def _validate_current_repo_commit(
         target_commits_from_target_repo,
         current_auth_commit,
     ):
-        # TODO there is an error with fetched target commits when there is an unauthenticated commit
-        # at the end of branch of a repo that does not support unauthenticated commits
-
         target_commits_from_target_repos_on_branch = target_commits_from_target_repo[
             current_branch
         ]
@@ -749,7 +745,6 @@ def validate_and_set_additional_commits_of_target_repositories(self):
                         # that does not necessarily mean that the local repository is not up to date with the remote
                         # pull could've been run manually
                         # check where the current local head is
-                        # TODO I don't remember why this was done!
                         branch_current_head = repository.top_commit_of_branch(branch)
                         if branch_current_head in additional_commits:
                             additional_commits = additional_commits[
@@ -761,10 +756,12 @@ def validate_and_set_additional_commits_of_target_repositories(self):
             return self.state.update_status
         except UpdateFailedError as e:
             self.state.error = e
+            taf_logger.error(e)
             self.state.event = Event.PARTIAL
             return UpdateStatus.PARTIAL
         except Exception as e:
             self.state.error = e
+            taf_logger.error(e)
             self.state.event = Event.FAILED
             return UpdateStatus.FAILED
 
@@ -813,6 +810,7 @@ def merge_commits(self):
                 return self.state.update_status
         except Exception as e:
             self.state.error = e
+            taf_logger.error(e)
             self.state.event = Event.FAILED
             return UpdateStatus.FAILED
 
@@ -859,6 +857,7 @@ def set_target_repositories_data(self):
             return self.state.update_status
         except Exception as e:
             self.state.error = e
+            taf_logger.error(e)
             self.state.event = Event.FAILED
             return UpdateStatus.FAILED
 

From bfee646d022699f78444f4c332ef6de66702d309 Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Tue, 12 Dec 2023 00:50:27 +0100
Subject: [PATCH 23/28] fix: out of band validation minor fix, minor error
 handling fixes

---
 taf/updater/updater_pipeline.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/taf/updater/updater_pipeline.py b/taf/updater/updater_pipeline.py
index d0e5cbf6..c1fea35b 100644
--- a/taf/updater/updater_pipeline.py
+++ b/taf/updater/updater_pipeline.py
@@ -100,7 +100,7 @@ def run(self):
                 self.current_step = step
                 update_status = step()
                 if update_status == UpdateStatus.FAILED:
-                    break
+                    raise UpdateFailedError(self.state.error)
                 self.state.update_status = update_status
 
             except Exception as e:
@@ -111,7 +111,8 @@ def run(self):
 
     def handle_error(self, e):
         taf_logger.error(
-            "An error occurred while running step {}: {}",
+            "An error occurred while updating repository {} while running step {}: {}",
+            self.state.auth_repo_name,
             self.current_step.__name__,
             str(e),
         )
@@ -210,9 +211,9 @@ def clone_remote_and_run_tuf_updater(self):
             _run_tuf_updater(git_updater)
             self.state.existing_repo = self.state.users_auth_repo.is_git_repository_root
             self.state.validation_auth_repo = git_updater.validation_auth_repo
+            self.state.auth_commits_since_last_validated = list(git_updater.commits)
             self._validate_out_of_band_and_update_type()
 
-            self.state.auth_commits_since_last_validated = list(git_updater.commits)
             self.state.event = (
                 Event.CHANGED
                 if len(self.state.auth_commits_since_last_validated) > 1
@@ -333,7 +334,7 @@ def load_target_repositories(self):
     @log_on_start(
         INFO, "Cloning target repositories which are not on disk...", logger=taf_logger
     )
-    @log_on_start(INFO, "Finished cloning target repositories", logger=taf_logger)
+    @log_on_end(INFO, "Finished cloning target repositories", logger=taf_logger)
     def clone_target_repositories_if_not_on_disk(self):
         try:
             self.state.cloned_target_repositories = []
@@ -483,6 +484,11 @@ def get_target_repositories_commits(self):
                             taf_logger.error(msg)
                             raise UpdateFailedError(msg)
                 else:
+                    if self.only_validate:
+                        self.state.targets_data = {}
+                        msg = f"{repository.name} not on disk. Please run update to clone the repositories."
+                        taf_logger.error(msg)
+                        raise UpdateFailedError(msg)
                     repository.fetch(branch=branch)
 
                 old_head = self.state.old_heads_per_target_repos_branches[

From 8918757990b6f7b44e44c1be50d9472f8f55afde Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Tue, 12 Dec 2023 18:38:42 +0100
Subject: [PATCH 24/28] fix: fix target commits fetching

---
 taf/git.py                      |  2 +-
 taf/updater/updater_pipeline.py | 23 ++++++++---------------
 2 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/taf/git.py b/taf/git.py
index 35b200f9..49e7364c 100644
--- a/taf/git.py
+++ b/taf/git.py
@@ -293,7 +293,7 @@ def all_commits_on_branch(
             )
         if branch:
             branch_obj = repo.branches.get(branch)
-            if branch is None:
+            if branch_obj is None:
                 raise GitError(
                     self,
                     message=f"Error occurred while getting commits of branch {branch}. Branch does not exist",
diff --git a/taf/updater/updater_pipeline.py b/taf/updater/updater_pipeline.py
index c1fea35b..532a299e 100644
--- a/taf/updater/updater_pipeline.py
+++ b/taf/updater/updater_pipeline.py
@@ -342,10 +342,10 @@ def clone_target_repositories_if_not_on_disk(self):
                 is_git_repository = repository.is_git_repository_root
                 if not is_git_repository:
                     if self.only_validate:
-                        taf_logger.warning(
-                            "Target repositories must already exist when only validating repositories"
-                        )
-                        continue
+                        self.state.targets_data = {}
+                        msg = f"{repository.name} not on disk. Please run update to clone the repositories."
+                        taf_logger.error(msg)
+                        raise UpdateFailedError(msg)
                     repository.clone(no_checkout=True)
                     self.state.cloned_target_repositories.append(repository)
             return UpdateStatus.SUCCESS
@@ -460,9 +460,7 @@ def get_targets_data_from_auth_repo(self):
         self.state.target_branches_data_from_auth_repo = repo_branches
         return UpdateStatus.SUCCESS
 
-    @log_on_start(
-        DEBUG, "Getting fetched commits of target repositories", logger=taf_logger
-    )
+    @log_on_start(DEBUG, "Fetching commits of target repositories", logger=taf_logger)
     def get_target_repositories_commits(self):
         """Returns a list of newly fetched commits belonging to the specified branch."""
         self.state.fetched_commits_per_target_repos_branches = defaultdict(dict)
@@ -473,7 +471,7 @@ def get_target_repositories_commits(self):
             for branch in self.state.target_branches_data_from_auth_repo[
                 repository.name
             ]:
-                if repository.is_git_repository_root:
+                if repository not in self.state.cloned_target_repositories:
                     if self.only_validate:
                         branch_exists = repository.branch_exists(
                             branch, include_remotes=False
@@ -483,13 +481,8 @@ def get_target_repositories_commits(self):
                             msg = f"{repository.name} does not contain a local branch named {branch} and cannot be validated. Please update the repositories."
                             taf_logger.error(msg)
                             raise UpdateFailedError(msg)
-                else:
-                    if self.only_validate:
-                        self.state.targets_data = {}
-                        msg = f"{repository.name} not on disk. Please run update to clone the repositories."
-                        taf_logger.error(msg)
-                        raise UpdateFailedError(msg)
-                    repository.fetch(branch=branch)
+                    else:
+                        repository.fetch(branch=branch)
 
                 old_head = self.state.old_heads_per_target_repos_branches[
                     repository.name

From cd24900f3a0e05f4cb6dc45922c66e1cd679b4f5 Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Tue, 12 Dec 2023 19:47:45 +0100
Subject: [PATCH 25/28] test: fix failing create repository test

---
 taf/tests/test_api/test_create_repository.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/taf/tests/test_api/test_create_repository.py b/taf/tests/test_api/test_create_repository.py
index 326ff700..fbf011c9 100644
--- a/taf/tests/test_api/test_create_repository.py
+++ b/taf/tests/test_api/test_create_repository.py
@@ -120,4 +120,5 @@ def test_create_repository_when_add_repositories_json(
     for role in ("targets", "delegated_role", "inner_role"):
         assert role in targets_roles
     check_if_targets_signed(auth_repo, "targets", "repositories.json", "mirrors.json")
-    validate_repository(repo_path)
+    # we are not validating target repositories, just the authentication repository
+    validate_repository(repo_path, excluded_target_globs="*")

From 7c4c39b16c2efe5c07b76f66653ddecf4645d829 Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Wed, 13 Dec 2023 19:21:54 +0100
Subject: [PATCH 26/28] feat: define which updater steps should be run when
 updating and only validating

---
 taf/updater/updater_pipeline.py | 73 ++++++++++++++++++++++-----------
 1 file changed, 49 insertions(+), 24 deletions(-)

diff --git a/taf/updater/updater_pipeline.py b/taf/updater/updater_pipeline.py
index 532a299e..c490297d 100644
--- a/taf/updater/updater_pipeline.py
+++ b/taf/updater/updater_pipeline.py
@@ -35,6 +35,12 @@ class UpdateStatus(Enum):
     FAILED = 3
 
 
+class RunMode(Enum):
+    UPDATE = 1
+    LOCAL_VALIDATION = 2
+    ALL = 3
+
+
 @define
 class UpdateState:
     auth_commits_since_last_validated: List[Any] = field(factory=list)
@@ -90,18 +96,20 @@ def wrapper(self, *args, **kwargs):
 
 
 class Pipeline:
-    def __init__(self, steps):
+    def __init__(self, steps, run_mode):
         self.steps = steps
         self.current_step = None
+        self.run_mode = run_mode
 
     def run(self):
-        for step in self.steps:
+        for step, step_run_mode in self.steps:
             try:
-                self.current_step = step
-                update_status = step()
-                if update_status == UpdateStatus.FAILED:
-                    raise UpdateFailedError(self.state.error)
-                self.state.update_status = update_status
+                if step_run_mode == RunMode.ALL or step_run_mode == self.run_mode:
+                    self.current_step = step
+                    update_status = step()
+                    if update_status == UpdateStatus.FAILED:
+                        raise UpdateFailedError(self.state.error)
+                    self.state.update_status = update_status
 
             except Exception as e:
                 self.handle_error(e)
@@ -143,18 +151,23 @@ def __init__(
 
         super().__init__(
             steps=[
-                self.clone_remote_and_run_tuf_updater,
-                self.clone_or_fetch_users_auth_repo,
-                self.load_target_repositories,
-                self.clone_target_repositories_if_not_on_disk,
-                self.determine_start_commits,
-                self.get_targets_data_from_auth_repo,
-                self.get_target_repositories_commits,
-                self.validate_target_repositories,
-                self.validate_and_set_additional_commits_of_target_repositories,
-                self.merge_commits,
-                self.set_target_repositories_data,
-            ]
+                (self.clone_remote_and_run_tuf_updater, RunMode.ALL),
+                (self.clone_or_fetch_users_auth_repo, RunMode.UPDATE),
+                (self.load_target_repositories, RunMode.ALL),
+                (self.check_if_repositories_on_disk, RunMode.LOCAL_VALIDATION),
+                (self.clone_target_repositories_if_not_on_disk, RunMode.UPDATE),
+                (self.determine_start_commits, RunMode.ALL),
+                (self.get_targets_data_from_auth_repo, RunMode.ALL),
+                (self.get_target_repositories_commits, RunMode.ALL),
+                (self.validate_target_repositories, RunMode.ALL),
+                (
+                    self.validate_and_set_additional_commits_of_target_repositories,
+                    RunMode.ALL,
+                ),
+                (self.merge_commits, RunMode.UPDATE),
+                (self.set_target_repositories_data, RunMode.UPDATE),
+            ],
+            run_mode=RunMode.LOCAL_VALIDATION if only_validate else RunMode.UPDATE,
         )
 
         self.url = url
@@ -331,6 +344,23 @@ def load_target_repositories(self):
             taf_logger.error(e)
             return UpdateStatus.FAILED
 
+    @log_on_start(
+        INFO,
+        "Checking if all target repositories are already on disk...",
+        logger=taf_logger,
+    )
+    def check_if_repositories_on_disk(self):
+        for repository in self.state.target_repositories.values():
+            if not repository.is_git_repository_root:
+                is_git_repository = repository.is_git_repository_root
+                if not is_git_repository:
+                    if self.only_validate:
+                        self.state.targets_data = {}
+                        msg = f"{repository.name} not on disk. Please run update to clone the repositories."
+                        taf_logger.error(msg)
+                        raise UpdateFailedError(msg)
+        return UpdateStatus.SUCCESS
+
     @log_on_start(
         INFO, "Cloning target repositories which are not on disk...", logger=taf_logger
     )
@@ -341,11 +371,6 @@ def clone_target_repositories_if_not_on_disk(self):
             for repository in self.state.target_repositories.values():
                 is_git_repository = repository.is_git_repository_root
                 if not is_git_repository:
-                    if self.only_validate:
-                        self.state.targets_data = {}
-                        msg = f"{repository.name} not on disk. Please run update to clone the repositories."
-                        taf_logger.error(msg)
-                        raise UpdateFailedError(msg)
                     repository.clone(no_checkout=True)
                     self.state.cloned_target_repositories.append(repository)
             return UpdateStatus.SUCCESS

From ef0966aae253490e8c2307710f982013d3deb8e6 Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Wed, 13 Dec 2023 19:46:02 +0100
Subject: [PATCH 27/28] chore: minor changelog fix and remove a TODO

---
 CHANGELOG.md                    | 4 ++--
 taf/updater/updater_pipeline.py | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0c08cf99..c05c8ce7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1033,8 +1033,8 @@ and this project adheres to [Semantic Versioning][semver].
 [0.13.2]: https://github.com/openlawlibrary/taf/compare/v0.13.1...v0.13.2
 [0.13.1]: https://github.com/openlawlibrary/taf/compare/v0.13.0...v0.13.1
 [0.13.0]: https://github.com/openlawlibrary/taf/compare/v0.12.0...v0.13.0
-[0.12.0]: https://github.com/openlawlibrary/taf/compare/v0.11.2...v0.12.0
-[0.11.1]: https://github.com/openlawlibrary/taf/compare/v0.11.1...v0.11.2
+[0.12.0]: https://github.com/openlawlibrary/taf/compare/v0.11.1...v0.12.0
+[0.11.1]: https://github.com/openlawlibrary/taf/compare/v0.11.0...v0.11.1
 [0.11.0]: https://github.com/openlawlibrary/taf/compare/v0.10.1...v0.11.0
 [0.10.1]: https://github.com/openlawlibrary/taf/compare/v0.10.0...v0.10.1
 [0.10.0]: https://github.com/openlawlibrary/taf/compare/v0.9.0...v0.10.0
diff --git a/taf/updater/updater_pipeline.py b/taf/updater/updater_pipeline.py
index c490297d..5939ebee 100644
--- a/taf/updater/updater_pipeline.py
+++ b/taf/updater/updater_pipeline.py
@@ -696,7 +696,6 @@ def _validate_current_repo_commit(
             return current_target_commit
         if not _is_unauthenticated_allowed(repository):
             commit_date = users_auth_repo.get_commit_date(current_auth_commit)
-            # TODO check this, different errors
             raise UpdateFailedError(
                 f"Failure to validate {users_auth_repo.name} commit {current_auth_commit} committed on {commit_date}: \
 data repository {repository.name} was supposed to be at commit {current_commit} \

From 5974c0c81cef953bae40bb9bb46b64a80d90a3e8 Mon Sep 17 00:00:00 2001
From: Renata <rvaderna@openlawlib.org>
Date: Wed, 13 Dec 2023 20:27:47 +0100
Subject: [PATCH 28/28] chore: remvoe outdated todo

---
 taf/updater/updater_pipeline.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/taf/updater/updater_pipeline.py b/taf/updater/updater_pipeline.py
index 5939ebee..e865991a 100644
--- a/taf/updater/updater_pipeline.py
+++ b/taf/updater/updater_pipeline.py
@@ -732,9 +732,10 @@ def validate_and_set_additional_commits_of_target_repositories(self):
         However, no error will get reported if there are commits which have not yet been signed
         In case of repositories which can contain unauthenticated commits, they do not even need to get signed
         """
+        # only get additional commits if the validation was complete (not partial, up to a commit)
+        self.state.additional_commits_per_target_repos_branches = defaultdict(dict)
         if self.state.update_status != UpdateStatus.SUCCESS:
             return self.state.update_status
-        self.state.additional_commits_per_target_repos_branches = defaultdict(dict)
         try:
             for repository in self.state.target_repositories.values():
                 # this will only include branches that were, at least partially, validated (up until a certain point)
@@ -745,7 +746,6 @@ def validate_and_set_additional_commits_of_target_repositories(self):
                     repository.name
                 ].items():
                     last_validated_commit = validated_commits[-1]
-                    # TODO what to do if an error occurred while validating that branch
                     branch_commits = (
                         self.state.fetched_commits_per_target_repos_branches[
                             repository.name