Skip to content

Commit

Permalink
Small benchmark changes
Browse files Browse the repository at this point in the history
  • Loading branch information
Kobzol committed Feb 9, 2024
1 parent 9e0fb7f commit 615833d
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 13 deletions.
11 changes: 7 additions & 4 deletions benchmarks/dask_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ def generate_descriptors(self) -> Iterable[BenchmarkDescriptor]:
hq_env = single_node_hq_cluster(hq_path, worker_threads=worker_threads)
dask_env = single_node_dask_cluster(worker_threads=worker_threads)

task_counts = [100, 1000, 5000, 10000, 25000, 50000]
task_counts = [50000] # [100, 1000, 5000, 10000, 25000, 50000]
repeat_count = 1

types = [
(hq_env, SleepHQ),
Expand All @@ -42,7 +43,9 @@ def generate_descriptors(self) -> Iterable[BenchmarkDescriptor]:
for task_count in task_counts:
sleep_duration = total_duration_single_thread / task_count
workload = workload_cls(task_count=task_count, sleep_duration=sleep_duration)
yield BenchmarkDescriptor(env_descriptor=env, workload=workload, timeout=timeout, repeat_count=2)
yield BenchmarkDescriptor(
env_descriptor=env, workload=workload, timeout=timeout, repeat_count=repeat_count
)

def postprocess(self, workdir: Path, database: Database):
import seaborn as sns
Expand All @@ -61,7 +64,7 @@ def parse_env(record: DatabaseRecord) -> str:
)

ax = sns.scatterplot(df, x="task-count", y="duration", hue="environment", marker="o")
ax.set(ylabel="Duration [s]", xlabel="Task count")
ax.set(ylabel="Duration [s]", xlabel="Task count", ylim=(0, None))
# ax.set(yscale="log")
render_chart_to_png(workdir / "dask-vs-hq-sleep.png")

Expand Down Expand Up @@ -106,7 +109,7 @@ def postprocess(self, workdir: Path, database: Database):
)

ax = sns.lineplot(df, x="task-count", y="duration", hue="environment", marker="o")
ax.set(ylabel="Duration [s]", xlabel="Task count")
ax.set(ylabel="Duration [s]", xlabel="Task count", ylim=(0, None))
# ax.set(yscale="log")
render_chart_to_png(workdir / "dask-vs-hq-empty.png")

Expand Down
23 changes: 14 additions & 9 deletions benchmarks/ligen_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import Dict, Any, List, Iterable

import dask
import dataclasses
import distributed
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -190,15 +191,17 @@ class DaskVsHqLigen(TestCase):
"""

def generate_descriptors(self) -> Iterable[BenchmarkDescriptor]:
hq_path = get_hq_binary()
hq_path = get_hq_binary(debug_symbols=True)

worker_threads = min(multiprocessing.cpu_count(), 64)
hq_env = single_node_hq_cluster(hq_path, worker_threads=worker_threads)
hq_env = dataclasses.replace(
single_node_hq_cluster(hq_path, worker_threads=worker_threads), generate_event_log=False
)
dask_env = single_node_dask_cluster(worker_threads=worker_threads)
timeout = datetime.timedelta(minutes=10)

input_smi = CURRENT_DIR / "datasets/ligen/artif-200.smi"
variants = [(1, 1), (4, 4), (8, 8), (32, 4)] # One molecule per task, one thread per task
input_smi = CURRENT_DIR / "datasets/ligen/artif-32.smi"
variants = [(1, 1), (4, 4)] # , (4, 4), (8, 8), (32, 4)] # One molecule per task, one thread per task

def gen_descriptions(env: EnvironmentDescriptor, workload_cls) -> List[BenchmarkDescriptor]:
for max_molecules, threads in variants:
Expand All @@ -214,9 +217,11 @@ def postprocess(self, workdir: Path, database: Database):
import seaborn as sns

df = analyze_results_utilization(database)
print(f"""UTILIZATION
print(
f"""UTILIZATION
{df}
""")
"""
)

df = (
DataFrameExtractor(database)
Expand Down Expand Up @@ -279,10 +284,10 @@ def benchmark_aggregated_vs_separate_tasks():
per input ligand, vs. when we use 4/8/16 ligands for each task.
"""
hq_path = get_hq_binary()
env = single_node_hq_cluster(hq_path, worker_threads=min(multiprocessing.cpu_count(), 64))
input_smi = get_dataset_path(Path("ligen/artif-200.smi"))
env = single_node_hq_cluster(hq_path, worker_threads=min(multiprocessing.cpu_count(), 64), version="base")
input_smi = get_dataset_path(Path("ligen/artif-2.smi"))

variants = [(1, 1), (4, 4), (8, 8)]
variants = [(1, 1)] # , (4, 4), (8, 8)]
descriptions = []
for max_molecules, num_threads in variants:
workload = LigenHQWorkload(smi_path=input_smi, max_molecules=max_molecules, screening_threads=num_threads)
Expand Down

0 comments on commit 615833d

Please sign in to comment.