diff --git a/dandi/dandiapi.py b/dandi/dandiapi.py index 87f86e164..553ec672c 100644 --- a/dandi/dandiapi.py +++ b/dandi/dandiapi.py @@ -2023,6 +2023,19 @@ def match(self, pattern: str) -> bool: return False return True + @property + def download_url(self) -> str: + """ + .. versionadded:: 0.67.0 + + The URL from which the entry can be downloaded + """ + return str( + URL(self.client.get_url(f"/zarr/{self.zarr_id}/files/")).with_query( + {"prefix": str(self), "download": "true"} + ) + ) + def get_download_file_iter( self, chunk_size: int = MAX_CHUNK_SIZE ) -> Callable[[int], Iterator[bytes]]: @@ -2030,11 +2043,7 @@ def get_download_file_iter( Returns a function that when called (optionally with an offset into the file to start downloading at) returns a generator of chunks of the file """ - url = str( - URL(self.client.get_url(f"/zarr/{self.zarr_id}/files/")).with_query( - {"prefix": str(self), "download": "true"} - ) - ) + url = self.download_url def downloader(start_at: int = 0) -> Iterator[bytes]: lgr.debug("Starting download from %s", url) diff --git a/dandi/files/zarr.py b/dandi/files/zarr.py index 1b28a44a4..f2f79cbde 100644 --- a/dandi/files/zarr.py +++ b/dandi/files/zarr.py @@ -6,6 +6,7 @@ from contextlib import closing from dataclasses import dataclass, field, replace from datetime import datetime +import json import os import os.path from pathlib import Path @@ -558,13 +559,16 @@ def _upload_zarr_file( storage_session: RESTFullAPIClient, upload_url: str, item: UploadItem ) -> int: try: + headers = {"Content-MD5": item.base64_digest} + if item.content_type is not None: + headers["Content-Type"] = item.content_type with item.filepath.open("rb") as fp: storage_session.put( upload_url, data=fp, json_resp=False, retry_if=_retry_zarr_file, - headers={"Content-MD5": item.base64_digest}, + headers=headers, ) except Exception: post_upload_size_check(item.filepath, item.size, True) @@ -643,21 +647,33 @@ class UploadItem: filepath: Path digest: str size: int + content_type: str | None @classmethod def from_entry(cls, e: LocalZarrEntry, digest: str) -> UploadItem: + if e.name in {".zarray", ".zattrs", ".zgroup", ".zmetadata"}: + try: + with e.filepath.open("rb") as fp: + json.load(fp) + except Exception: + content_type = None + else: + content_type = "application/json" + else: + content_type = None return cls( entry_path=str(e), filepath=e.filepath, digest=digest, size=pre_upload_size_check(e.filepath), + content_type=content_type, ) @property def base64_digest(self) -> str: return b64encode(bytes.fromhex(self.digest)).decode("us-ascii") - def upload_request(self) -> dict[str, str]: + def upload_request(self) -> dict[str, str | None]: return {"path": self.entry_path, "base64md5": self.base64_digest} diff --git a/dandi/tests/test_files.py b/dandi/tests/test_files.py index e2f445701..668dfa39b 100644 --- a/dandi/tests/test_files.py +++ b/dandi/tests/test_files.py @@ -507,6 +507,18 @@ def test_upload_zarr_with_excluded_dotfiles( ] +def test_upload_zarr_entry_content_type(new_dandiset, tmp_path): + filepath = tmp_path / "example.zarr" + zarr.save(filepath, np.arange(1000), np.arange(1000, 0, -1)) + zf = dandi_file(filepath) + assert isinstance(zf, ZarrAsset) + asset = zf.upload(new_dandiset.dandiset, {"description": "A test Zarr"}) + assert isinstance(asset, RemoteZarrAsset) + e = asset.get_entry_by_path(".zgroup") + r = new_dandiset.client.get(e.download_url, json_resp=False) + assert r.headers["Content-Type"] == "application/json" + + def test_validate_deep_zarr(tmp_path: Path) -> None: zarr_path = tmp_path / "foo.zarr" zarr.save(zarr_path, np.arange(1000), np.arange(1000, 0, -1))