From 335dcede43ae01efd231f9e9918b13aca4d45a2a Mon Sep 17 00:00:00 2001 From: Behnam Robatmili Date: Fri, 19 Jan 2024 11:02:03 -0800 Subject: [PATCH] Early version of profiler harness Include a basic benchmark as the starting point and needed scripts --- .github/workflows/profiler.yml | 32 ++++++++++++ tools/perf_checker/perf_checker.py | 59 +++++++++++++++++++++++ tools/perf_checker/perf_checker.sh | 45 +++++++++++++++++ tools/perf_checker/test_anndata_export.py | 19 ++++++++ 4 files changed, 155 insertions(+) create mode 100644 .github/workflows/profiler.yml create mode 100644 tools/perf_checker/perf_checker.py create mode 100755 tools/perf_checker/perf_checker.sh create mode 100644 tools/perf_checker/test_anndata_export.py diff --git a/.github/workflows/profiler.yml b/.github/workflows/profiler.yml new file mode 100644 index 000000000..082a29a99 --- /dev/null +++ b/.github/workflows/profiler.yml @@ -0,0 +1,32 @@ +name: Profiler + +on: + pull_request: + +jobs: + run_profiler: + name: Run Profiler + strategy: + matrix: + os: [single-cell-8c64g-runner] + python-version: ["3.11"] + runs-on: ${{matrix.os}} + permissions: # these permissions must be set for AWS auth to work! + id-token: write + contents: read + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-region: us-west-2 + role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }} + role-session-name: PushDockerImage + + - name: Run all tests + run: | + ./tools/perf_checker/perf_checker.sh \ No newline at end of file diff --git a/tools/perf_checker/perf_checker.py b/tools/perf_checker/perf_checker.py new file mode 100644 index 000000000..96e753442 --- /dev/null +++ b/tools/perf_checker/perf_checker.py @@ -0,0 +1,59 @@ +import argparse +import json +import logging +import re +from typing import Any + +import attr +import profiler + + +def format_string(text: str) -> Any: + return re.sub("\n", " ", text) + + +# The script takes a command and a database path and looks +# the performance anomalies in the performance history of that +# command across the profiled runs. + +logger = logging.getLogger() +logger.setLevel(logging.DEBUG) + +parser = argparse.ArgumentParser() +parser.add_argument("command", type=str) +parser.add_argument("db_path", type=str) + +args = parser.parse_args() + +# Processes the set of previously written logs +# The threshold (ratio) of allowable performance degradation between profiling runs +threshold = 1.10 + +db = profiler.data.FileBasedProfileDB(args.db_path) +command_profiles = db.find(f"{args.command}") + + +if len(command_profiles) >= 2: + first_profile = command_profiles[0] + curr_profile = command_profiles[-1] + first_time = first_profile.elapsed_time_sec + curr_time = curr_profile.elapsed_time_sec + + formatted_first_profile = json.dumps(format_string(str(attr.asdict(first_profile)))) + formatted_curr_profile = json.dumps(format_string(str(attr.asdict(curr_profile)))) + + logging.info("****************************") + logging.info(f"Current time {curr_time} vs first time {first_time}") + logging.info("****************************") + logging.info(f"First profile: {formatted_first_profile}") + logging.info("****************************") + logging.info(f"Current profile: {formatted_curr_profile}") + logging.info("****************************") + logging.info( + f"TileDB version ver = first: {first_profile.tiledbsoma_version} curr: {curr_profile.tiledbsoma_version}" + ) + if float(curr_time) > threshold * float(first_time): + raise SystemExit(f"Major performance degradation detected on {args.benchmark}") + + if threshold * float(curr_time) < float(first_time): + logging.info(f"Major performance increase detected on {args.command}") diff --git a/tools/perf_checker/perf_checker.sh b/tools/perf_checker/perf_checker.sh new file mode 100755 index 000000000..f6d939759 --- /dev/null +++ b/tools/perf_checker/perf_checker.sh @@ -0,0 +1,45 @@ +.sh +#!/bin/sh +set -euox pipefail + +# Download the right version of python +sudo apt install python3.11 python3.11-venv -y +# Download gnu time tool +sudo apt-get update -y +sudo apt-get install -y time + +# Installing mount-s3 +sudo wget https://s3.amazonaws.com/mountpoint-s3-release/latest/x86_64/mount-s3.deb +sudo apt install -y ./mount-s3.deb + +# Setting up mount-s3. We use S3 file system as it is necessary to persist the +# profiling run data that are performed below +mkdir ./census-profiler-tests +mkdir ./s3_cache +mount-s3 census-profiler-tests ./census-profiler-tests --cache ./s3_cache --metadata-ttl 300 +dbpath=`pwd`/census-profiler-tests + +pip install psutil +pip install gitpython +pip install somacore +pip install tiledbsoma +pip install cellxgene_census + +# Download the repo including the profiler +cd ../ +git clone https://github.com/single-cell-data/TileDB-SOMA.git +# Downloading TileDB-SOMA (remove the next line once the branch is merged) +cd TileDB-SOMA/profiler +git checkout census_profiler +pip install . +cd ../../cellxgene-census/ + +# New benchmarks must be added to this list +declare -a benchmarks=("./tools/perf_checker/test_anndata_export.py") + +# Running all benchmarks and checking performance changes +for benchmark in ${benchmarks} +do + python -m profiler "python ${benchmark}" $dbpath + python ./tools/perf_checker/perf_checker.py "python ${benchmark}" $dbpath +done \ No newline at end of file diff --git a/tools/perf_checker/test_anndata_export.py b/tools/perf_checker/test_anndata_export.py new file mode 100644 index 000000000..4f80f5627 --- /dev/null +++ b/tools/perf_checker/test_anndata_export.py @@ -0,0 +1,19 @@ +from sys import stderr + +import cellxgene_census +import tiledbsoma as soma + +print("Starting bm 1", file=stderr) +census_S3_latest = dict(census_version="2024-01-01") + + +def main() -> None: + with cellxgene_census.open_soma(**census_S3_latest) as census: + with census["census_data"]["homo_sapiens"].axis_query( + measurement_name="RNA", + obs_query=soma.AxisQuery(value_filter="""tissue_general == 'eye'"""), + ) as query: + query.to_anndata(X_name="raw") + + +main()