From c42f21708ac12e873d1cbefe82a3df0af125a8f7 Mon Sep 17 00:00:00 2001 From: Isaac Virshup Date: Thu, 11 Apr 2024 01:36:13 +1000 Subject: [PATCH 1/2] Update coverage upload (#1465) sibling to https://github.com/scverse/scanpy/pull/2996 --- .azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.azure-pipelines.yml b/.azure-pipelines.yml index c3781846d..87974a34d 100644 --- a/.azure-pipelines.yml +++ b/.azure-pipelines.yml @@ -95,7 +95,7 @@ jobs: sys.exit(0 if len(results) > 3000 else f"Error: only {len(results)} tests run") displayName: "Check if enough tests ran" - - task: PublishCodeCoverageResults@1 + - task: PublishCodeCoverageResults@2 inputs: codeCoverageTool: Cobertura summaryFileLocation: "test-data/coverage.xml" From a03e98437dc5410f9a4b2b0b4c966742d64be0bc Mon Sep 17 00:00:00 2001 From: Philipp A Date: Wed, 10 Apr 2024 17:38:56 +0200 Subject: [PATCH 2/2] Fix benchmark params (#1450) * Fix benchmark params * Apply suggestions from code review use mamba due to asv bug, first position now named first --------- Co-authored-by: Isaac Virshup --- benchmarks/benchmarks/readwrite.py | 69 +++++++++++++------------ benchmarks/benchmarks/sparse_dataset.py | 33 ++++++------ 2 files changed, 52 insertions(+), 50 deletions(-) diff --git a/benchmarks/benchmarks/readwrite.py b/benchmarks/benchmarks/readwrite.py index 6ad880460..7c06b560a 100644 --- a/benchmarks/benchmarks/readwrite.py +++ b/benchmarks/benchmarks/readwrite.py @@ -77,19 +77,20 @@ class H5ADInMemorySizeSuite: - params = [PBMC_3K_URL] - param_names = ["input_url"] + _urls = dict(pbmc3k=PBMC_3K_URL) + params = _urls.keys() + param_names = ["input_data"] - def setup(self, input_url): - self.filepath = pooch.retrieve(url=input_url, known_hash=None) + def setup(self, input_data: str): + self.filepath = pooch.retrieve(url=self._urls[input_data], known_hash=None) - def track_in_memory_size(self, input_url): + def track_in_memory_size(self, *_): adata = anndata.read_h5ad(self.filepath) adata_size = sys.getsizeof(adata) return adata_size - def track_actual_in_memory_size(self, input_url): + def track_actual_in_memory_size(self, *_): adata = anndata.read_h5ad(self.filepath) adata_size = get_actualsize(adata) @@ -97,23 +98,23 @@ def track_actual_in_memory_size(self, input_url): class H5ADReadSuite: - # params = [PBMC_REDUCED_PATH, PBMC_3K_PATH, BM_43K_CSR_PATH] - params = [PBMC_3K_URL] - param_names = ["input_url"] + _urls = dict(pbmc3k=PBMC_3K_URL) + params = _urls.keys() + param_names = ["input_data"] - def setup(self, input_url): - self.filepath = pooch.retrieve(url=input_url, known_hash=None) + def setup(self, input_data: str): + self.filepath = pooch.retrieve(url=self._urls[input_data], known_hash=None) - def time_read_full(self, input_url): + def time_read_full(self, *_): anndata.read_h5ad(self.filepath) - def peakmem_read_full(self, input_url): + def peakmem_read_full(self, *_): anndata.read_h5ad(self.filepath) - def mem_readfull_object(self, input_url): + def mem_readfull_object(self, *_): return anndata.read_h5ad(self.filepath) - def track_read_full_memratio(self, input_url): + def track_read_full_memratio(self, *_): mem_recording = memory_usage( (sedate(anndata.read_h5ad, 0.005), (self.filepath,)), interval=0.001 ) @@ -123,23 +124,23 @@ def track_read_full_memratio(self, input_url): print(base_size) return (np.max(mem_recording) - np.min(mem_recording)) / base_size - def peakmem_read_backed(self, input_url): + def peakmem_read_backed(self, *_): anndata.read_h5ad(self.filepath, backed="r") - def mem_read_backed_object(self, input_url): + def mem_read_backed_object(self, *_): return anndata.read_h5ad(self.filepath, backed="r") class H5ADWriteSuite: - # params = [PBMC_REDUCED_PATH, PBMC_3K_PATH, BM_43K_CSR_PATH] - params = [PBMC_3K_URL] - param_names = ["input_url"] + _urls = dict(pbmc3k=PBMC_3K_URL) + params = _urls.keys() + param_names = ["input_data"] - def setup(self, input_url): + def setup(self, input_data: str): mem_recording, adata = memory_usage( ( sedate(anndata.read_h5ad, 0.005), - (pooch.retrieve(input_url, known_hash=None),), + (pooch.retrieve(self._urls[input_data], known_hash=None),), ), retval=True, interval=0.001, @@ -149,40 +150,40 @@ def setup(self, input_url): self.tmpdir = tempfile.TemporaryDirectory() self.writepth = Path(self.tmpdir.name) / "out.h5ad" - def teardown(self, input_url): + def teardown(self, *_): self.tmpdir.cleanup() - def time_write_full(self, input_url): + def time_write_full(self, *_): self.adata.write_h5ad(self.writepth, compression=None) - def peakmem_write_full(self, input_url): + def peakmem_write_full(self, *_): self.adata.write_h5ad(self.writepth) - def track_peakmem_write_full(self, input_url): + def track_peakmem_write_full(self, *_): return get_peak_mem((sedate(self.adata.write_h5ad), (self.writepth,))) - def time_write_compressed(self, input_url): + def time_write_compressed(self, *_): self.adata.write_h5ad(self.writepth, compression="gzip") - def peakmem_write_compressed(self, input_url): + def peakmem_write_compressed(self, *_): self.adata.write_h5ad(self.writepth, compression="gzip") - def track_peakmem_write_compressed(self, input_url): + def track_peakmem_write_compressed(self, *_): return get_peak_mem( (sedate(self.adata.write_h5ad), (self.writepth,), {"compression": "gzip"}) ) class H5ADBackedWriteSuite(H5ADWriteSuite): - # params = [PBMC_REDUCED_PATH, PBMC_3K_PATH] - params = [PBMC_3K_URL] - param_names = ["input_url"] + _urls = dict(pbmc3k=PBMC_3K_URL) + params = _urls.keys() + param_names = ["input_data"] - def setup(self, input_url): + def setup(self, input_data): mem_recording, adata = memory_usage( ( sedate(anndata.read_h5ad, 0.005), - (pooch.retrieve(input_url, known_hash=None),), + (pooch.retrieve(self._urls[input_data], known_hash=None),), {"backed": "r"}, ), retval=True, diff --git a/benchmarks/benchmarks/sparse_dataset.py b/benchmarks/benchmarks/sparse_dataset.py index e0639f324..7d217d159 100644 --- a/benchmarks/benchmarks/sparse_dataset.py +++ b/benchmarks/benchmarks/sparse_dataset.py @@ -16,42 +16,43 @@ def make_alternating_mask(n): class SparseCSRContiguousSlice: + _slices = { + "0:1000": slice(0, 1000), + "0:9000": slice(0, 9000), + ":9000:-1": slice(None, 9000, -1), + "::-2": slice(None, None, 2), + "array": np.array([0, 5000, 9999]), + "arange": np.arange(0, 1000), + "first": 0, + "alternating": make_alternating_mask(10), + } params = ( [ (10_000, 10_000), # (10_000, 500) ], - [ - slice(0, 1000), - slice(0, 9000), - slice(None, 9000, -1), - slice(None, None, 2), - np.array([0, 5000, 9999]), - np.arange(0, 1000), - 0, - make_alternating_mask(10), - ], + _slices.keys(), ) param_names = ["shape", "slice"] - def setup(self, shape, slice): + def setup(self, shape: tuple[int, int], slice: str): X = sparse.random( *shape, density=0.01, format="csr", random_state=np.random.default_rng(42) ) - self.slice = slice + self.slice = self._slices[slice] g = zarr.group() write_elem(g, "X", X) self.x = sparse_dataset(g["X"]) self.adata = AnnData(self.x) - def time_getitem(self, shape, slice): + def time_getitem(self, *_): self.x[self.slice] - def peakmem_getitem(self, shape, slice): + def peakmem_getitem(self, *_): self.x[self.slice] - def time_getitem_adata(self, shape, slice): + def time_getitem_adata(self, *_): self.adata[self.slice] - def peakmem_getitem_adata(self, shape, slice): + def peakmem_getitem_adata(self, *_): self.adata[self.slice]