Skip to content

Commit

Permalink
Remove action in benchmark workflow (#3486)
Browse files Browse the repository at this point in the history
  • Loading branch information
chuneuny-emily authored May 10, 2024
1 parent 19a937e commit 377de90
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 107 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/perf_benchmark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,6 @@ jobs:
task: "semantic_segmentation"
- task-short: "vsp"
task: "visual_prompting"
- task-short: "act"
task: "action"
name: Perf-Benchmark-${{ matrix.task-short }}
runs-on: [self-hosted, linux, x64, dmount-v2]
timeout-minutes: 8640
Expand Down
228 changes: 123 additions & 105 deletions tests/perf/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,16 @@
log = logging.getLogger(__name__)


class AggregateError(Exception):
def __init__(self, errors):
error_messages = []
for seed, error in errors:
error_messages.append(f"Seed {seed}: {error}")
error_message = "\n".join(error_messages)

super().__init__(f"Exceptions occurred in the following seeds:\n{error_message}")


class Benchmark:
"""Benchmark runner for OTX2.x.
Expand Down Expand Up @@ -151,138 +161,146 @@ def run(
if self.num_repeat > 0:
num_repeat = self.num_repeat # Override by global setting

exceptions = []
for seed in range(num_repeat):
sub_work_dir = work_dir / str(seed)
tags["seed"] = str(seed)

# Train & test
command = [
"otx",
"train",
"--config",
f"src/otx/recipe/{model.task}/{model.name}.yaml",
"--data_root",
str(data_root),
"--work_dir",
str(sub_work_dir),
"--engine.device",
self.accelerator,
]
for key, value in dataset.extra_overrides.get("train", {}).items():
command.append(f"--{key}")
command.append(str(value))
command.extend(["--seed", str(seed)])
# TODO(someone): Disable deterministic for instance segmentation as it causes OOM.
# https://github.com/pytorch/vision/issues/8168#issuecomment-1890599205
command.extend(["--deterministic", str(self.deterministic)])
if self.num_epoch > 0:
command.extend(["--max_epochs", str(self.num_epoch)])
start_time = time()
self._run_command(command)
extra_metrics = {"train/e2e_time": time() - start_time}
self._rename_raw_data(
work_dir=sub_work_dir / ".latest" / "train",
replaces={"train_": "train/", "{pre}": "train/"},
)
self._log_metrics(
work_dir=sub_work_dir / ".latest" / "train",
tags=tags,
criteria=criteria,
extra_metrics=extra_metrics,
)

command = [
"otx",
"test",
"--work_dir",
str(sub_work_dir),
]
for key, value in dataset.extra_overrides.get("test", {}).items():
command.append(f"--{key}")
command.append(str(value))
self._run_command(command)
self._rename_raw_data(
work_dir=sub_work_dir / ".latest" / "test",
replaces={"test_": "test/", "{pre}": "test/"},
)
self._log_metrics(work_dir=sub_work_dir / ".latest" / "test", tags=tags, criteria=criteria)

# Export & test
if self.eval_upto in ["export", "optimize"]:
command = [
"otx",
"export",
"--work_dir",
str(sub_work_dir),
]
for key, value in dataset.extra_overrides.get("export", {}).items():
command.append(f"--{key}")
command.append(str(value))
self._run_command(command)

exported_model_path = sub_work_dir / ".latest" / "export" / "exported_model.xml"
if not exported_model_path.exists():
exported_model_path = sub_work_dir / ".latest" / "export" / "exported_model_decoder.xml"
try:
sub_work_dir = work_dir / str(seed)
tags["seed"] = str(seed)

command = [ # NOTE: not working for h_label_cls. to be fixed
# Train & test
command = [
"otx",
"test",
"--checkpoint",
str(exported_model_path),
"train",
"--config",
f"src/otx/recipe/{model.task}/{model.name}.yaml",
"--data_root",
str(data_root),
"--work_dir",
str(sub_work_dir),
"--engine.device",
self.accelerator,
]
for key, value in dataset.extra_overrides.get("test", {}).items():
for key, value in dataset.extra_overrides.get("train", {}).items():
command.append(f"--{key}")
command.append(str(value))
command.extend(["--seed", str(seed)])
# TODO(someone): Disable deterministic for instance segmentation as it causes OOM.
# https://github.com/pytorch/vision/issues/8168#issuecomment-1890599205
command.extend(["--deterministic", str(self.deterministic)])
if self.num_epoch > 0:
command.extend(["--max_epochs", str(self.num_epoch)])
start_time = time()
self._run_command(command)

extra_metrics = {"train/e2e_time": time() - start_time}
self._rename_raw_data(
work_dir=sub_work_dir / ".latest" / "test",
replaces={"test": "export", "{pre}": "export/"},
work_dir=sub_work_dir / ".latest" / "train",
replaces={"train_": "train/", "{pre}": "train/"},
)
self._log_metrics(
work_dir=sub_work_dir / ".latest" / "train",
tags=tags,
criteria=criteria,
extra_metrics=extra_metrics,
)
self._log_metrics(work_dir=sub_work_dir / ".latest" / "test", tags=tags, criteria=criteria)

# Optimize & test
if self.eval_upto == "optimize":
command = [
"otx",
"optimize",
"--checkpoint",
str(exported_model_path),
"--work_dir",
str(sub_work_dir),
]
for key, value in dataset.extra_overrides.get("optimize", {}).items():
command.append(f"--{key}")
command.append(str(value))
self._run_command(command)

optimized_model_path = sub_work_dir / ".latest" / "optimize" / "optimized_model.xml"
if not optimized_model_path.exists():
optimized_model_path = sub_work_dir / ".latest" / "optimize" / "optimized_model_decoder.xml"

command = [
"otx",
"test",
"--checkpoint",
str(optimized_model_path),
"--work_dir",
str(sub_work_dir),
]
for key, value in dataset.extra_overrides.get("test", {}).items():
command.append(f"--{key}")
command.append(str(value))
self._run_command(command)

self._rename_raw_data(
work_dir=sub_work_dir / ".latest" / "test",
replaces={"test": "optimize", "{pre}": "optimize/"},
replaces={"test_": "test/", "{pre}": "test/"},
)
self._log_metrics(work_dir=sub_work_dir / ".latest" / "test", tags=tags, criteria=criteria)

# Force memory clean up
gc.collect()
# Export & test
if self.eval_upto in ["export", "optimize"]:
command = [
"otx",
"export",
"--work_dir",
str(sub_work_dir),
]
for key, value in dataset.extra_overrides.get("export", {}).items():
command.append(f"--{key}")
command.append(str(value))
self._run_command(command)

exported_model_path = sub_work_dir / ".latest" / "export" / "exported_model.xml"
if not exported_model_path.exists():
exported_model_path = sub_work_dir / ".latest" / "export" / "exported_model_decoder.xml"

command = [ # NOTE: not working for h_label_cls. to be fixed
"otx",
"test",
"--checkpoint",
str(exported_model_path),
"--work_dir",
str(sub_work_dir),
]
for key, value in dataset.extra_overrides.get("test", {}).items():
command.append(f"--{key}")
command.append(str(value))
self._run_command(command)

self._rename_raw_data(
work_dir=sub_work_dir / ".latest" / "test",
replaces={"test": "export", "{pre}": "export/"},
)
self._log_metrics(work_dir=sub_work_dir / ".latest" / "test", tags=tags, criteria=criteria)

# Optimize & test
if self.eval_upto == "optimize":
command = [
"otx",
"optimize",
"--checkpoint",
str(exported_model_path),
"--work_dir",
str(sub_work_dir),
]
for key, value in dataset.extra_overrides.get("optimize", {}).items():
command.append(f"--{key}")
command.append(str(value))
self._run_command(command)

optimized_model_path = sub_work_dir / ".latest" / "optimize" / "optimized_model.xml"
if not optimized_model_path.exists():
optimized_model_path = sub_work_dir / ".latest" / "optimize" / "optimized_model_decoder.xml"

command = [
"otx",
"test",
"--checkpoint",
str(optimized_model_path),
"--work_dir",
str(sub_work_dir),
]
for key, value in dataset.extra_overrides.get("test", {}).items():
command.append(f"--{key}")
command.append(str(value))
self._run_command(command)

self._rename_raw_data(
work_dir=sub_work_dir / ".latest" / "test",
replaces={"test": "optimize", "{pre}": "optimize/"},
)
self._log_metrics(work_dir=sub_work_dir / ".latest" / "test", tags=tags, criteria=criteria)

# Force memory clean up
gc.collect()
except Exception as e: # noqa: PERF203
exceptions.append((seed, str(e)))

if exceptions:
# Raise the custom exception with all collected errors
raise AggregateError(exceptions)

result = self.load_result(work_dir)
if result is None:
Expand Down

0 comments on commit 377de90

Please sign in to comment.