From 8e82992446aec238e65c6c43a949474406203b9f Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Fri, 7 Jun 2024 10:00:01 -0400 Subject: [PATCH] Set `github-access-status` in superdataset's `.gitmodules` --- src/backups2datalad/adataset.py | 69 ++++++++++++++++++++++++------- src/backups2datalad/datasetter.py | 19 +++++++++ 2 files changed, 72 insertions(+), 16 deletions(-) diff --git a/src/backups2datalad/adataset.py b/src/backups2datalad/adataset.py index 73ab403..8f96ee0 100644 --- a/src/backups2datalad/adataset.py +++ b/src/backups2datalad/adataset.py @@ -183,6 +183,25 @@ async def is_dirty(self) -> bool: != "" ) + async def has_changes( + self, paths: Sequence[str | Path] = (), cached: bool = False + ) -> bool: + args: list[str | Path] = ["diff", "--quiet"] + if cached: + args.append("--cached") + if paths: + args.append("--") + args.extend(paths) + try: + await self.call_git(*args, quiet_rcs=[1]) + except subprocess.CalledProcessError as e: + if e.returncode == 1: + return True + else: + raise + else: + return False + async def get_repo_config(self, key: str, file: str | None = None) -> str | None: args = ["--file", file] if file is not None else [] try: @@ -193,6 +212,18 @@ async def get_repo_config(self, key: str, file: str | None = None) -> str | None else: raise + async def set_repo_config( + self, key: str, value: str, file: str | None = None + ) -> None: + args = ["--file", file] if file is not None else ["--local"] + await self.call_git( + "config", + *args, + "--replace-all", + key, + value, + ) + async def get_datalad_id(self) -> str: r = await self.get_repo_config("datalad.dataset.id", file=".datalad/config") assert r is not None @@ -206,13 +237,8 @@ async def get_embargo_status(self) -> EmbargoStatus: return EmbargoStatus(value) async def set_embargo_status(self, status: EmbargoStatus) -> None: - await self.call_git( - "config", - "--file", - ".datalad/config", - "--replace-all", - EMBARGO_STATUS_KEY, - status.value, + await self.set_repo_config( + EMBARGO_STATUS_KEY, status.value, file=".datalad/config" ) async def call_git(self, *args: str | Path, **kwargs: Any) -> None: @@ -289,6 +315,22 @@ async def commit( " Please check if all changes were staged." ) + async def commit_if_changed( + self, + message: str, + commit_date: datetime | None = None, + paths: Sequence[str | Path] = (), + check_dirty: bool = True, + ) -> None: + await self.call_git("add", "-A", *paths) + if await self.has_changes(paths=paths, cached=True): + await self.commit( + message, + commit_date=commit_date, + paths=paths, + check_dirty=check_dirty, + ) + async def push(self, to: str, jobs: int, data: str | None = None) -> None: waits = exp_wait(attempts=6, base=2.1) while True: @@ -512,7 +554,7 @@ async def create_github_sibling( (f"branch.{DEFAULT_BRANCH}.remote", "github"), (f"branch.{DEFAULT_BRANCH}.merge", f"refs/heads/{DEFAULT_BRANCH}"), ]: - await self.call_git("config", "--local", "--replace-all", key, value) + await self.set_repo_config(key, value) return True else: log.debug("GitHub remote already exists for %s", name) @@ -658,13 +700,10 @@ async def uninstall_subdatasets(self) -> None: async def add_submodule(self, path: str, url: str, datalad_id: str) -> None: await self.call_git("submodule", "add", "--", url, path) - await self.call_git( - "config", - "--file", - ".gitmodules", - "--replace-all", + await self.set_repo_config( f"submodule.{path}.datalad-id", datalad_id, + file=".gitmodules", ) await self.add(".gitmodules") @@ -684,9 +723,7 @@ async def populate_up_to_date(self) -> bool: async def update_populate_status(self) -> None: head = await self.get_commit_hash() - await self.call_git( - "config", "--local", "--replace-all", "dandi.populated", head - ) + await self.set_repo_config("dandi.populated", head) class ObjectType(Enum): diff --git a/src/backups2datalad/datasetter.py b/src/backups2datalad/datasetter.py index e2386f1..52dfdc3 100644 --- a/src/backups2datalad/datasetter.py +++ b/src/backups2datalad/datasetter.py @@ -80,14 +80,33 @@ async def update_from_backup( workers=self.config.workers, ) to_save: list[str] = [] + access_status: dict[str, str] = {} for d, changed in report.results: if changed: to_save.append(d.identifier) + if self.config.gh_org is not None: + access_status[d.identifier] = ( + "public" + if d.embargo_status is EmbargoStatus.OPEN + else "private" + ) if to_save: log.debug("Committing superdataset") superds.assert_no_duplicates_in_gitmodules() msg = await self.get_superds_commit_message(superds, to_save) await superds.save(message=msg, path=to_save) + if access_status: + for did, access in access_status.items(): + await superds.set_repo_config( + f"submodule.{did}.github-access-status", + access, + file=".gitmodules", + ) + await superds.commit_if_changed( + "[backups2datalad] Update github-access-status keys in .gitmodules", + paths=[".gitmodules"], + check_dirty=False, + ) superds.assert_no_duplicates_in_gitmodules() log.debug("Superdataset committed") if report.failed: