Skip to content

Commit

Permalink
Add anomaly perf benchmark tests (#3170)
Browse files Browse the repository at this point in the history
* Add anomaly perf benchmark tests

* Refine workflow

* Add options for model-category

* Remove num_classes / data_format setting
  • Loading branch information
goodsong81 committed Mar 21, 2024
1 parent 10f66e8 commit 21373fa
Show file tree
Hide file tree
Showing 10 changed files with 306 additions and 65 deletions.
48 changes: 47 additions & 1 deletion .github/workflows/perf_benchmark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ on:
type: choice
description: Model category to run benchmark
options:
- speed
- balance
- accuracy
- default # speed, balance, accuracy models only
- all # default + other models
default: default
Expand Down Expand Up @@ -50,6 +53,45 @@ on:
`pip install otx[full]@https://github.com/openvinotoolkit/training_extensions.git@{otx_ref}` will be executed before run,
and reverted after run. Works only for v2.x assuming CLI compatibility.
default: __CURRENT_BRANCH_COMMIT__
workflow_call:
inputs:
model-category:
type: string
description: Model category to run benchmark [speed, balance, accuracy, default, all]
default: default
data-group:
type: string
description: Data group to run benchmark [small, medium, large, all]
default: all
num-repeat:
type: number
description: Overrides default per-data-group number of repeat setting
default: 0
num-epoch:
type: number
description: Overrides default per-model number of epoch setting
default: 0
eval-upto:
type: string
description: The last operation to evaluate. 'optimize' means all. [train, export, optimize]
default: optimize
pytest-args:
type: string
description: |
Additional perf-benchmark pytest arguments.
"-k detection" -> detection task only
"--dry-run" -> print command w/o execution.
data-root:
type: string
description: Root directory containing validation data in CI server.
default: "/home/validation/data/v2/"
otx-ref:
type: string
description: |
Target OTX ref (tag / branch name / commit hash) on main repo to test. Defaults to the current branch.
`pip install otx[full]@https://github.com/openvinotoolkit/training_extensions.git@{otx_ref}` will be executed before run,
and reverted after run. Works only for v2.x assuming CLI compatibility.
default: __CURRENT_BRANCH_COMMIT__

# Declare default permissions as read only.
permissions: read-all
Expand All @@ -73,7 +115,7 @@ jobs:
- task-short: "vsp"
task: "visual_prompting"
name: Perf-Benchmark-${{ matrix.task-short }}
runs-on: [self-hosted, linux, x64, dmount-v2, perf]
runs-on: [self-hosted, linux, x64, dmount-v2]
timeout-minutes: 8640
steps:
- name: Checkout repository
Expand All @@ -85,6 +127,10 @@ jobs:
- name: Install tox
run: python -m pip install --require-hashes --no-deps -r .ci/tox-deps.txt
- name: Run Performance Test
env:
BENCHMARK_RESULTS_CLEAR: ${{ vars.BENCHMARK_RESULTS_CLEAR }}
GH_CTX_REF_NAME: ${{ github.ref_name }}
GH_CTX_SHA: ${{ github.sha }}
run: >
tox -vv -e perf-benchmark -- tests/perf/test_${{ matrix.task }}.py ${{ inputs.pytest-args }}
--model-category ${{ inputs.model-category }}
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/weekly.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,3 @@ jobs:
num-repeat: 0
num-epoch: 0
eval-upto: optimize
artifact-prefix: weekly-perf-benchmark
50 changes: 38 additions & 12 deletions tests/perf/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,6 @@ class Dataset:
name: str
path: Path
group: str
data_format: str
num_classes: int
num_repeat: int = 1
extra_overrides: dict | None = None

Expand Down Expand Up @@ -155,10 +153,6 @@ def run(
str(data_root),
"--work_dir",
str(sub_work_dir),
"--model.num_classes",
str(dataset.num_classes),
"--data.config.data_format",
dataset.data_format,
"--engine.device",
self.accelerator,
]
Expand All @@ -172,7 +166,10 @@ def run(
start_time = time()
self._run_command(command)
extra_metrics = {"train/e2e_time": time() - start_time}
self._rename_raw_data(work_dir=sub_work_dir / ".latest" / "train", replaces={"epoch": "train/epoch"})
self._rename_raw_data(
work_dir=sub_work_dir / ".latest" / "train",
replaces={"train_": "train/", "{pre}": "train/"},
)
self._log_metrics(
work_dir=sub_work_dir / ".latest" / "train",
tags=tags,
Expand All @@ -187,6 +184,10 @@ def run(
str(sub_work_dir),
]
self._run_command(command)
self._rename_raw_data(
work_dir=sub_work_dir / ".latest" / "test",
replaces={"test_": "test/", "{pre}": "test/"},
)
self._log_metrics(work_dir=sub_work_dir / ".latest" / "test", tags=tags, criteria=criteria)

# Export & test
Expand Down Expand Up @@ -215,7 +216,10 @@ def run(
]
self._run_command(command)

self._rename_raw_data(work_dir=sub_work_dir / ".latest" / "test", replaces={"test": "export"})
self._rename_raw_data(
work_dir=sub_work_dir / ".latest" / "test",
replaces={"test": "export", "{pre}": "export/"},
)
self._log_metrics(work_dir=sub_work_dir / ".latest" / "test", tags=tags, criteria=criteria)

# Optimize & test
Expand Down Expand Up @@ -250,7 +254,10 @@ def run(
]
self._run_command(command)

self._rename_raw_data(work_dir=sub_work_dir / ".latest" / "test", replaces={"test": "optimize"})
self._rename_raw_data(
work_dir=sub_work_dir / ".latest" / "test",
replaces={"test": "optimize", "{pre}": "optimize/"},
)
self._log_metrics(work_dir=sub_work_dir / ".latest" / "test", tags=tags, criteria=criteria)

# Force memory clean up
Expand Down Expand Up @@ -310,11 +317,25 @@ def _log_metrics(
metrics.to_csv(work_dir / "benchmark.raw.csv", index=False)

def _rename_raw_data(self, work_dir: Path, replaces: dict[str, str]) -> None:
replaces = {**self.NAME_MAPPING, **replaces}

def _rename_col(col_name: str) -> str:
for src_str, dst_str in replaces.items():
if src_str == "{pre}":
if not col_name.startswith(dst_str):
col_name = dst_str + col_name
elif src_str == "{post}":
if not col_name.endswith(dst_str):
col_name = col_name + dst_str
else:
col_name = col_name.replace(src_str, dst_str)
return col_name

csv_files = work_dir.glob("**/metrics.csv")
for csv_file in csv_files:
data = pd.read_csv(csv_file)
for src_str, dst_str in replaces.items():
data.columns = data.columns.str.replace(src_str, dst_str)
data = data.rename(columns=_rename_col) # Column names
data = data.replace(replaces) # Values
data.to_csv(csv_file, index=False)

@staticmethod
Expand All @@ -338,7 +359,7 @@ def load_result(result_path: Path) -> pd.DataFrame | None:
return pd.concat(results, ignore_index=True).set_index(["task", "model", "data_group", "data"])

@staticmethod
def average_result(data: pd.DataFrame, keys: list[str]) -> pd.DataFrame:
def average_result(data: pd.DataFrame, keys: list[str]) -> pd.DataFrame | None:
"""Average result w.r.t. given keys
Args:
Expand All @@ -348,6 +369,9 @@ def average_result(data: pd.DataFrame, keys: list[str]) -> pd.DataFrame:
Retruns:
pd.DataFrame: Averaged result table
"""
if data is None:
return None

# Flatten index
index_names = data.index.names
column_names = data.columns
Expand Down Expand Up @@ -391,3 +415,5 @@ def check(self, result: pd.DataFrame, criteria: list[Criterion]):

for criterion in criteria:
criterion(result_entry, target_entry)

NAME_MAPPING: dict[str, str] = {} # noqa: RUF012
8 changes: 5 additions & 3 deletions tests/perf/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ def pytest_addoption(parser):
"--model-category",
action="store",
default="all",
choices=("default", "all"),
help="Choose default|all. Defaults to all.",
choices=("speed", "balance", "accuracy", "default", "other", "all"),
help="Choose speed|balcence|accuracy|default|other|all. Defaults to all.",
)
parser.addoption(
"--data-group",
Expand Down Expand Up @@ -290,7 +290,9 @@ def fxt_mlflow_client(request: pytest.FixtureRequest) -> MlflowClient:
def fxt_model(request: pytest.FixtureRequest, fxt_model_category) -> Benchmark.Model:
"""Skip models according to user options."""
model: Benchmark.Model = request.param
if fxt_model_category == "default" and model.category == "other":
if fxt_model_category == "all":
return model
if (fxt_model_category == "default" and model.category == "other") or fxt_model_category != model.category:
pytest.skip(f"{model.category} category model")
return model

Expand Down
Loading

0 comments on commit 21373fa

Please sign in to comment.