-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Include a basic benchmark as the starting point and needed scripts
- Loading branch information
Showing
4 changed files
with
155 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
name: Profiler | ||
|
||
on: | ||
pull_request: | ||
|
||
jobs: | ||
run_profiler: | ||
name: Run Profiler | ||
strategy: | ||
matrix: | ||
os: [single-cell-8c64g-runner] | ||
python-version: ["3.11"] | ||
runs-on: ${{matrix.os}} | ||
permissions: # these permissions must be set for AWS auth to work! | ||
id-token: write | ||
contents: read | ||
|
||
steps: | ||
- uses: actions/checkout@v4 | ||
with: | ||
fetch-depth: 1 | ||
|
||
- name: Configure AWS Credentials | ||
uses: aws-actions/configure-aws-credentials@v1 | ||
with: | ||
aws-region: us-west-2 | ||
role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }} | ||
role-session-name: PushDockerImage | ||
|
||
- name: Run all tests | ||
run: | | ||
./tools/perf_checker/perf_checker.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
import argparse | ||
import json | ||
import logging | ||
import re | ||
from typing import Any | ||
|
||
import attr | ||
import profiler | ||
|
||
|
||
def format_string(text: str) -> Any: | ||
return re.sub("\n", " ", text) | ||
|
||
|
||
# The script takes a command and a database path and looks | ||
# the performance anomalies in the performance history of that | ||
# command across the profiled runs. | ||
|
||
logger = logging.getLogger() | ||
logger.setLevel(logging.DEBUG) | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument("command", type=str) | ||
parser.add_argument("db_path", type=str) | ||
|
||
args = parser.parse_args() | ||
|
||
# Processes the set of previously written logs | ||
# The threshold (ratio) of allowable performance degradation between profiling runs | ||
threshold = 1.10 | ||
|
||
db = profiler.data.FileBasedProfileDB(args.db_path) | ||
command_profiles = db.find(f"{args.command}") | ||
|
||
|
||
if len(command_profiles) >= 2: | ||
first_profile = command_profiles[0] | ||
curr_profile = command_profiles[-1] | ||
first_time = first_profile.elapsed_time_sec | ||
curr_time = curr_profile.elapsed_time_sec | ||
|
||
formatted_first_profile = json.dumps(format_string(str(attr.asdict(first_profile)))) | ||
formatted_curr_profile = json.dumps(format_string(str(attr.asdict(curr_profile)))) | ||
|
||
logging.info("****************************") | ||
logging.info(f"Current time {curr_time} vs first time {first_time}") | ||
logging.info("****************************") | ||
logging.info(f"First profile: {formatted_first_profile}") | ||
logging.info("****************************") | ||
logging.info(f"Current profile: {formatted_curr_profile}") | ||
logging.info("****************************") | ||
logging.info( | ||
f"TileDB version ver = first: {first_profile.tiledbsoma_version} curr: {curr_profile.tiledbsoma_version}" | ||
) | ||
if float(curr_time) > threshold * float(first_time): | ||
raise SystemExit(f"Major performance degradation detected on {args.benchmark}") | ||
|
||
if threshold * float(curr_time) < float(first_time): | ||
logging.info(f"Major performance increase detected on {args.command}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
.sh | ||
#!/bin/sh | ||
set -euox pipefail | ||
|
||
# Download the right version of python | ||
sudo apt install python3.11 python3.11-venv -y | ||
# Download gnu time tool | ||
sudo apt-get update -y | ||
sudo apt-get install -y time | ||
|
||
# Installing mount-s3 | ||
sudo wget https://s3.amazonaws.com/mountpoint-s3-release/latest/x86_64/mount-s3.deb | ||
sudo apt install -y ./mount-s3.deb | ||
|
||
# Setting up mount-s3. We use S3 file system as it is necessary to persist the | ||
# profiling run data that are performed below | ||
mkdir ./census-profiler-tests | ||
mkdir ./s3_cache | ||
mount-s3 census-profiler-tests ./census-profiler-tests --cache ./s3_cache --metadata-ttl 300 | ||
dbpath=`pwd`/census-profiler-tests | ||
|
||
pip install psutil | ||
pip install gitpython | ||
pip install somacore | ||
pip install tiledbsoma | ||
pip install cellxgene_census | ||
|
||
# Download the repo including the profiler | ||
cd ../ | ||
git clone https://github.com/single-cell-data/TileDB-SOMA.git | ||
# Downloading TileDB-SOMA (remove the next line once the branch is merged) | ||
cd TileDB-SOMA/profiler | ||
git checkout census_profiler | ||
pip install . | ||
cd ../../cellxgene-census/ | ||
|
||
# New benchmarks must be added to this list | ||
declare -a benchmarks=("./tools/perf_checker/test_anndata_export.py") | ||
|
||
# Running all benchmarks and checking performance changes | ||
for benchmark in ${benchmarks} | ||
do | ||
python -m profiler "python ${benchmark}" $dbpath | ||
python ./tools/perf_checker/perf_checker.py "python ${benchmark}" $dbpath | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
from sys import stderr | ||
|
||
import cellxgene_census | ||
import tiledbsoma as soma | ||
|
||
print("Starting bm 1", file=stderr) | ||
census_S3_latest = dict(census_version="2024-01-01") | ||
|
||
|
||
def main() -> None: | ||
with cellxgene_census.open_soma(**census_S3_latest) as census: | ||
with census["census_data"]["homo_sapiens"].axis_query( | ||
measurement_name="RNA", | ||
obs_query=soma.AxisQuery(value_filter="""tissue_general == 'eye'"""), | ||
) as query: | ||
query.to_anndata(X_name="raw") | ||
|
||
|
||
main() |