-
-
Notifications
You must be signed in to change notification settings - Fork 3.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add transform hierarchy propagation benchmark #9442
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
use criterion::criterion_main; | ||
|
||
mod hierarchy; | ||
|
||
mod world_gen; | ||
|
||
criterion_main!( | ||
hierarchy::init::transform_hierarchy_init, | ||
hierarchy::propagation::transform_hierarchy_configurations, | ||
hierarchy::propagation::transform_hierarchy_sizes, | ||
); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
use bevy_app::App; | ||
|
||
use std::time::{Instant, Duration}; | ||
|
||
use criterion::*; | ||
|
||
use crate::world_gen::*; | ||
|
||
criterion_group!{ | ||
name = transform_hierarchy_init; | ||
config = Criterion::default() | ||
.warm_up_time(std::time::Duration::from_secs(3)) | ||
.measurement_time(std::time::Duration::from_secs(20)); | ||
targets = transform_init | ||
} | ||
|
||
/// This benchmark group tries to measure the cost of the initial transform propagation, | ||
/// i.e. the first time transform propagation runs after we just added all our entities. | ||
/// | ||
/// These benchmarks are probably not as useful as the transform update benchmarks | ||
/// since the benchmark implementation is a little fragile and rather slow (see comments below). | ||
/// They're included here nevertheless in case they're useful. | ||
fn transform_init(c: &mut Criterion) { | ||
let mut group = c.benchmark_group("transform_init"); | ||
|
||
// Reduce sample size and enable flat sampling to make sure this benchmark doesn't | ||
// take a lot longer than the simplified benchmark. | ||
group.sample_size(50); | ||
group.sampling_mode(SamplingMode::Flat); | ||
|
||
for (name, cfg) in &CONFIGS { | ||
let (result, mut app) = build_app(cfg, TransformUpdates::Disabled); | ||
|
||
group.throughput(Throughput::Elements(result.inserted_nodes as u64)); | ||
|
||
// Simplified benchmark for the initial propagation | ||
group.bench_function(BenchmarkId::new("reset", name), move |b| { | ||
// Building the World (in setup) takes a lot of time, so ideally we shouldn't do that | ||
// on every iteration since Criterion ideally wants to run the benchmark function in batches. | ||
// Unfortunately, we can't re-use an App directly in iter() because the World would no | ||
// longer be in its pristine, just initialized state from the second iteration onwards. | ||
// Furthermore, it's not possible to clone a pristine World since World doesn't implement | ||
// Clone. | ||
// As an alternative, we reuse the same App and reset it to a pseudo-pristine state by | ||
// simply marking all Parent, Children and Transform components as changed. | ||
// This should look like a pristine state to the propagation systems. | ||
// | ||
// Note: This is a tradeoff. The reset benchmark should deliver more reliable results | ||
// in the same time, while the reference benchmark below should be closer to the | ||
// real-world initialization cost. | ||
|
||
app.add_schedule(ResetSchedule, reset_schedule()); | ||
|
||
// Run Main schedule once to ensure initial updates are done | ||
// This is a little counterintuitive since the initial delay is exactly what we want to | ||
// measure - however, we have the ResetSchedule in place to hopefully replicate the | ||
// World in its pristine state on every iteration. | ||
// We therefore run update here to prevent the first iteration having additional work | ||
// due to possible incompleteness of the reset mechanism | ||
app.update(); | ||
|
||
b.iter_custom(|iters| { | ||
let mut total = Duration::ZERO; | ||
|
||
for _i in 0..iters { | ||
std::hint::black_box(app.world.run_schedule(ResetSchedule)); | ||
|
||
let start = Instant::now(); | ||
std::hint::black_box(app.world.run_schedule(bevy_app::Main)); | ||
let elapsed = start.elapsed(); | ||
|
||
app.world.clear_trackers(); | ||
|
||
total += elapsed; | ||
} | ||
|
||
total | ||
}); | ||
}); | ||
|
||
// Reference benchmark for the initial propagation - needs to rebuild the App | ||
// on every iteration, which makes the benchmark quite slow and results | ||
// in less precise results in the same time compared to the simplified benchmark. | ||
group.bench_with_input(BenchmarkId::new("reference", name), cfg, move |b, cfg| { | ||
// Use iter_batched_ref to prevent influence of Drop | ||
b.iter_batched_ref( | ||
|| { | ||
let (_result, app) = build_app(cfg, TransformUpdates::Disabled); | ||
app | ||
}, | ||
App::update, | ||
BatchSize::PerIteration, | ||
); | ||
}); | ||
} | ||
|
||
group.finish(); | ||
} | ||
|
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
@@ -0,0 +1,4 @@ | ||||
pub mod init; | ||||
|
||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||
pub mod propagation; | ||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,200 @@ | ||
use bevy_ecs::prelude::*; | ||
|
||
use std::time::{Instant, Duration}; | ||
|
||
use criterion::{*, measurement::WallTime}; | ||
|
||
use crate::world_gen::*; | ||
|
||
criterion_group!{ | ||
name = transform_hierarchy_configurations; | ||
config = Criterion::default() | ||
.warm_up_time(std::time::Duration::from_millis(500)) | ||
.measurement_time(std::time::Duration::from_secs(15)) | ||
; | ||
|
||
targets = transform_propagation_configurations | ||
} | ||
|
||
criterion_group!{ | ||
name = transform_hierarchy_sizes; | ||
config = Criterion::default() | ||
.warm_up_time(std::time::Duration::from_millis(300)) | ||
.measurement_time(std::time::Duration::from_secs(5)) | ||
.sample_size(50) | ||
; | ||
|
||
targets = transform_propagation_sizes | ||
} | ||
|
||
/// Inner transform propagation benchmark function | ||
/// This version only measures time spent during PostUpdate, therefore removing | ||
/// the impact of simulating transform updates which happen during the Update schedule. | ||
fn update_bench_postupdate_only(b: &mut Bencher<WallTime>, &(cfg, enable_update): &(&Cfg, TransformUpdates)) { | ||
let (_result, mut app) = build_app(cfg, enable_update); | ||
|
||
// Run Main schedule once to ensure initial updates are done | ||
app.update(); | ||
|
||
// We want to benchmark the transform updates in the PostUpdate schedule without | ||
// benchmarking the update function which is intended to simulate changes to Transform | ||
// in a typical game. | ||
// Therefore, we simply remove the PostUpdate and Last schedules here in order to | ||
// measure the time spent in PostUpdate itself, without the time spent in the | ||
// schedules before PostUpdate (PreUpdate, Update, ...) and the schedules after | ||
// PostUpdate (only Last currently). | ||
// If the schedules that are part of main change, this logic needs to be changed | ||
// accordingly. | ||
let mut schedules = app.world.get_resource_mut::<Schedules>().unwrap(); | ||
let (_, mut postupdate) = schedules.remove_entry(&bevy_app::PostUpdate).unwrap(); | ||
let (_, mut last) = schedules.remove_entry(&bevy_app::Last).unwrap(); | ||
|
||
b.iter_custom(|iters| { | ||
let mut total = Duration::ZERO; | ||
|
||
for _i in 0..iters { | ||
std::hint::black_box(app.world.run_schedule(bevy_app::Main)); | ||
|
||
let start = Instant::now(); | ||
std::hint::black_box(postupdate.run(&mut app.world)); | ||
let elapsed = start.elapsed(); | ||
|
||
std::hint::black_box({ | ||
last.run(&mut app.world); | ||
app.world.clear_trackers(); | ||
}); | ||
Comment on lines
+62
to
+65
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure the blackbox is necessary here. |
||
|
||
total += elapsed; | ||
} | ||
|
||
total | ||
}); | ||
} | ||
|
||
/// Inner transform propagation benchmark function | ||
/// | ||
/// Simpler alternative to update_bench_postupdate_only that is retained here | ||
/// for future reference. This benchmark includes the time spent simulating | ||
/// transform updates in the Update schedule which makes the comparison between | ||
/// noop and transform_updates benchmarks meaningful. | ||
fn update_bench_reference(b: &mut Bencher<WallTime>, &(cfg, enable_update): &(&Cfg, TransformUpdates)) { | ||
let (_result, mut app) = build_app(cfg, enable_update); | ||
|
||
// Run Main schedule once to ensure initial updates are done | ||
app.update(); | ||
|
||
b.iter(move || { app.update(); }); | ||
|
||
} | ||
|
||
fn inner_update_bench(b: &mut Bencher<WallTime>, bench_cfg: &(&Cfg, TransformUpdates)) { | ||
const UPDATE_BENCH_POSTUPDATE_ONLY: bool = false; | ||
Comment on lines
+90
to
+91
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a weird way of selecting benchmarks. And inconsistent with how the In my opinion "reference" benchmarks should be removed from this PR, since (from my testing) they only add a constant overhead. But it's OK if they stay in the PR as long as they are consistently declared :P |
||
|
||
if UPDATE_BENCH_POSTUPDATE_ONLY { | ||
update_bench_postupdate_only(b, bench_cfg); | ||
} else { | ||
update_bench_reference(b, bench_cfg); | ||
} | ||
} | ||
|
||
#[derive(Clone, Copy)] | ||
enum IdSource { | ||
Fixed(&'static str), | ||
NodeCount, | ||
} | ||
|
||
fn bench_single(group: &mut BenchmarkGroup<WallTime>, id_source: IdSource, cfg: &Cfg) { | ||
// Run build_app once to get an inserted node count | ||
let (result, _app) = build_app(cfg, TransformUpdates::Disabled); | ||
group.throughput(Throughput::Elements(result.inserted_nodes as u64)); | ||
|
||
let id = |function_name| { | ||
match id_source { | ||
IdSource::Fixed(id_str) => { | ||
BenchmarkId::new(function_name, id_str) | ||
}, | ||
IdSource::NodeCount => { | ||
BenchmarkId::new(function_name, result.inserted_nodes) | ||
}, | ||
} | ||
}; | ||
|
||
// Measures hierarchy propagation systems when some transforms are updated. | ||
group.bench_with_input(id("updates"), &(cfg, TransformUpdates::Enabled), inner_update_bench); | ||
|
||
// Measures hierarchy propagation systems when there are no changes | ||
// during the Update schedule. | ||
group.bench_with_input(id("noop"), &(cfg, TransformUpdates::Disabled), inner_update_bench); | ||
Comment on lines
+122
to
+127
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suggest replacing:
So the relationship between the benchmark result and the benchmark source code is a bit more evident. |
||
} | ||
|
||
fn bench_group<F>(c: &mut Criterion, name: &str, bench_function: F) | ||
where | ||
F: FnOnce(&mut BenchmarkGroup<WallTime>) -> () | ||
{ | ||
let mut group = c.benchmark_group(format!("transform_propagation_{}", name)); | ||
|
||
// Always use linear sampling for these benchmarks | ||
// (they are close enough in performance, and this way the iteration time plots are consistent) | ||
group.sampling_mode(SamplingMode::Linear); | ||
|
||
group.sample_size(50); | ||
|
||
group.warm_up_time(std::time::Duration::from_millis(400)); | ||
group.measurement_time(std::time::Duration::from_secs(5)); | ||
|
||
group.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic)); | ||
|
||
bench_function(&mut group); | ||
|
||
group.finish(); | ||
} | ||
|
||
fn bench_sizes<I>(c: &mut Criterion, name: &str, cfgs: I) | ||
where | ||
I: IntoIterator<Item = Cfg> | ||
{ | ||
bench_group(c, name, |group| { | ||
for cfg in cfgs { | ||
bench_single(group, IdSource::NodeCount, &cfg); | ||
} | ||
}); | ||
} | ||
|
||
fn transform_propagation_sizes(c: &mut Criterion) { | ||
bench_sizes(c, "large", (6u32..=18u32).map(|depth| { | ||
Cfg { | ||
test_case: TestCase::NonUniformTree { | ||
depth, | ||
branch_width: 8, | ||
}, | ||
update_filter: Default::default(), | ||
} | ||
})); | ||
bench_sizes(c, "deep", (8u32..=24u32).map(|depth| { | ||
Cfg { | ||
test_case: TestCase::NonUniformTree { | ||
depth, | ||
branch_width: 2, | ||
}, | ||
update_filter: Default::default(), | ||
} | ||
})); | ||
bench_sizes(c, "wide", (20u32..=470u32).step_by(30).map(|branch_width| { | ||
Cfg { | ||
test_case: TestCase::Tree { | ||
depth: 3, | ||
branch_width, | ||
}, | ||
update_filter: Default::default(), | ||
} | ||
})); | ||
} | ||
|
||
fn transform_propagation_configurations(c: &mut Criterion) { | ||
bench_group(c, "all_configurations", |group| { | ||
for (name, cfg) in &CONFIGS { | ||
bench_single(group, IdSource::Fixed(name), cfg); | ||
} | ||
}); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I still think this benchmark is useful. It gives an idea of the behavior on worst case situations (which do exists in games, eg: when spawning a new level, or complex models).
The computational cost of removing many entities was a factor in reverting a change (see #5423 (comment)). This means we care about computational cost of this sort of things.
I think that "full recomputation" or something similarly descriptive could be a better name though.