Skip to content

Commit

Permalink
Early version of profiler harness
Browse files Browse the repository at this point in the history
Include a basic benchmark as the starting point and needed scripts
  • Loading branch information
beroy committed Feb 10, 2024
1 parent 01f1e6d commit 335dced
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 0 deletions.
32 changes: 32 additions & 0 deletions .github/workflows/profiler.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: Profiler

on:
pull_request:

jobs:
run_profiler:
name: Run Profiler
strategy:
matrix:
os: [single-cell-8c64g-runner]
python-version: ["3.11"]
runs-on: ${{matrix.os}}
permissions: # these permissions must be set for AWS auth to work!
id-token: write
contents: read

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 1

- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-region: us-west-2
role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
role-session-name: PushDockerImage

- name: Run all tests
run: |
./tools/perf_checker/perf_checker.sh
59 changes: 59 additions & 0 deletions tools/perf_checker/perf_checker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import argparse
import json
import logging
import re
from typing import Any

import attr
import profiler


def format_string(text: str) -> Any:
return re.sub("\n", " ", text)


# The script takes a command and a database path and looks
# the performance anomalies in the performance history of that
# command across the profiled runs.

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

parser = argparse.ArgumentParser()
parser.add_argument("command", type=str)
parser.add_argument("db_path", type=str)

args = parser.parse_args()

# Processes the set of previously written logs
# The threshold (ratio) of allowable performance degradation between profiling runs
threshold = 1.10

db = profiler.data.FileBasedProfileDB(args.db_path)
command_profiles = db.find(f"{args.command}")


if len(command_profiles) >= 2:
first_profile = command_profiles[0]
curr_profile = command_profiles[-1]
first_time = first_profile.elapsed_time_sec
curr_time = curr_profile.elapsed_time_sec

formatted_first_profile = json.dumps(format_string(str(attr.asdict(first_profile))))
formatted_curr_profile = json.dumps(format_string(str(attr.asdict(curr_profile))))

logging.info("****************************")
logging.info(f"Current time {curr_time} vs first time {first_time}")
logging.info("****************************")
logging.info(f"First profile: {formatted_first_profile}")
logging.info("****************************")
logging.info(f"Current profile: {formatted_curr_profile}")
logging.info("****************************")
logging.info(
f"TileDB version ver = first: {first_profile.tiledbsoma_version} curr: {curr_profile.tiledbsoma_version}"
)
if float(curr_time) > threshold * float(first_time):
raise SystemExit(f"Major performance degradation detected on {args.benchmark}")

if threshold * float(curr_time) < float(first_time):
logging.info(f"Major performance increase detected on {args.command}")
45 changes: 45 additions & 0 deletions tools/perf_checker/perf_checker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
.sh
#!/bin/sh
set -euox pipefail

# Download the right version of python
sudo apt install python3.11 python3.11-venv -y
# Download gnu time tool
sudo apt-get update -y
sudo apt-get install -y time

# Installing mount-s3
sudo wget https://s3.amazonaws.com/mountpoint-s3-release/latest/x86_64/mount-s3.deb
sudo apt install -y ./mount-s3.deb

# Setting up mount-s3. We use S3 file system as it is necessary to persist the
# profiling run data that are performed below
mkdir ./census-profiler-tests
mkdir ./s3_cache
mount-s3 census-profiler-tests ./census-profiler-tests --cache ./s3_cache --metadata-ttl 300
dbpath=`pwd`/census-profiler-tests

pip install psutil
pip install gitpython
pip install somacore
pip install tiledbsoma
pip install cellxgene_census

# Download the repo including the profiler
cd ../
git clone https://github.com/single-cell-data/TileDB-SOMA.git
# Downloading TileDB-SOMA (remove the next line once the branch is merged)
cd TileDB-SOMA/profiler
git checkout census_profiler
pip install .
cd ../../cellxgene-census/

# New benchmarks must be added to this list
declare -a benchmarks=("./tools/perf_checker/test_anndata_export.py")

# Running all benchmarks and checking performance changes
for benchmark in ${benchmarks}
do
python -m profiler "python ${benchmark}" $dbpath
python ./tools/perf_checker/perf_checker.py "python ${benchmark}" $dbpath
done
19 changes: 19 additions & 0 deletions tools/perf_checker/test_anndata_export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from sys import stderr

import cellxgene_census
import tiledbsoma as soma

print("Starting bm 1", file=stderr)
census_S3_latest = dict(census_version="2024-01-01")


def main() -> None:
with cellxgene_census.open_soma(**census_S3_latest) as census:
with census["census_data"]["homo_sapiens"].axis_query(
measurement_name="RNA",
obs_query=soma.AxisQuery(value_filter="""tissue_general == 'eye'"""),
) as query:
query.to_anndata(X_name="raw")


main()

0 comments on commit 335dced

Please sign in to comment.