diff --git a/benches/Cargo.toml b/benches/Cargo.toml index a811b1a6ef039..dab416333161c 100644 --- a/benches/Cargo.toml +++ b/benches/Cargo.toml @@ -8,13 +8,18 @@ license = "MIT OR Apache-2.0" [dev-dependencies] glam = "0.24" -rand = "0.8" +rand = { version = "0.8", features = ["small_rng"] } rand_chacha = "0.3" criterion = { version = "0.3", features = ["html_reports"] } bevy_app = { path = "../crates/bevy_app" } +bevy_core = { path = "../crates/bevy_core" } +bevy_core_pipeline = { path = "../crates/bevy_core_pipeline" } bevy_ecs = { path = "../crates/bevy_ecs", features = ["multi-threaded"] } +bevy_hierarchy = { path = "../crates/bevy_hierarchy" } bevy_reflect = { path = "../crates/bevy_reflect" } bevy_tasks = { path = "../crates/bevy_tasks" } +bevy_time = { path = "../crates/bevy_time" } +bevy_transform = { path = "../crates/bevy_transform" } bevy_utils = { path = "../crates/bevy_utils" } bevy_math = { path = "../crates/bevy_math" } @@ -27,6 +32,11 @@ name = "change_detection" path = "benches/bevy_ecs/change_detection.rs" harness = false +[[bench]] +name = "transform_hierarchy" +path = "benches/bevy_transform/benches.rs" +harness = false + [[bench]] name = "ecs" path = "benches/bevy_ecs/benches.rs" diff --git a/benches/benches/bevy_transform/benches.rs b/benches/benches/bevy_transform/benches.rs new file mode 100644 index 0000000000000..1b824d1744ff0 --- /dev/null +++ b/benches/benches/bevy_transform/benches.rs @@ -0,0 +1,11 @@ +use criterion::criterion_main; + +mod hierarchy; + +mod world_gen; + +criterion_main!( + hierarchy::init::transform_hierarchy_init, + hierarchy::propagation::transform_hierarchy_configurations, + hierarchy::propagation::transform_hierarchy_sizes, +); diff --git a/benches/benches/bevy_transform/hierarchy/init.rs b/benches/benches/bevy_transform/hierarchy/init.rs new file mode 100644 index 0000000000000..38eb33cf0c1ac --- /dev/null +++ b/benches/benches/bevy_transform/hierarchy/init.rs @@ -0,0 +1,99 @@ +use bevy_app::App; + +use std::time::{Instant, Duration}; + +use criterion::*; + +use crate::world_gen::*; + +criterion_group!{ + name = transform_hierarchy_init; + config = Criterion::default() + .warm_up_time(std::time::Duration::from_secs(3)) + .measurement_time(std::time::Duration::from_secs(20)); + targets = transform_init +} + +/// This benchmark group tries to measure the cost of the initial transform propagation, +/// i.e. the first time transform propagation runs after we just added all our entities. +/// +/// These benchmarks are probably not as useful as the transform update benchmarks +/// since the benchmark implementation is a little fragile and rather slow (see comments below). +/// They're included here nevertheless in case they're useful. +fn transform_init(c: &mut Criterion) { + let mut group = c.benchmark_group("transform_init"); + + // Reduce sample size and enable flat sampling to make sure this benchmark doesn't + // take a lot longer than the simplified benchmark. + group.sample_size(50); + group.sampling_mode(SamplingMode::Flat); + + for (name, cfg) in &CONFIGS { + let (result, mut app) = build_app(cfg, TransformUpdates::Disabled); + + group.throughput(Throughput::Elements(result.inserted_nodes as u64)); + + // Simplified benchmark for the initial propagation + group.bench_function(BenchmarkId::new("reset", name), move |b| { + // Building the World (in setup) takes a lot of time, so ideally we shouldn't do that + // on every iteration since Criterion ideally wants to run the benchmark function in batches. + // Unfortunately, we can't re-use an App directly in iter() because the World would no + // longer be in its pristine, just initialized state from the second iteration onwards. + // Furthermore, it's not possible to clone a pristine World since World doesn't implement + // Clone. + // As an alternative, we reuse the same App and reset it to a pseudo-pristine state by + // simply marking all Parent, Children and Transform components as changed. + // This should look like a pristine state to the propagation systems. + // + // Note: This is a tradeoff. The reset benchmark should deliver more reliable results + // in the same time, while the reference benchmark below should be closer to the + // real-world initialization cost. + + app.add_schedule(ResetSchedule, reset_schedule()); + + // Run Main schedule once to ensure initial updates are done + // This is a little counterintuitive since the initial delay is exactly what we want to + // measure - however, we have the ResetSchedule in place to hopefully replicate the + // World in its pristine state on every iteration. + // We therefore run update here to prevent the first iteration having additional work + // due to possible incompleteness of the reset mechanism + app.update(); + + b.iter_custom(|iters| { + let mut total = Duration::ZERO; + + for _i in 0..iters { + std::hint::black_box(app.world.run_schedule(ResetSchedule)); + + let start = Instant::now(); + std::hint::black_box(app.world.run_schedule(bevy_app::Main)); + let elapsed = start.elapsed(); + + app.world.clear_trackers(); + + total += elapsed; + } + + total + }); + }); + + // Reference benchmark for the initial propagation - needs to rebuild the App + // on every iteration, which makes the benchmark quite slow and results + // in less precise results in the same time compared to the simplified benchmark. + group.bench_with_input(BenchmarkId::new("reference", name), cfg, move |b, cfg| { + // Use iter_batched_ref to prevent influence of Drop + b.iter_batched_ref( + || { + let (_result, app) = build_app(cfg, TransformUpdates::Disabled); + app + }, + App::update, + BatchSize::PerIteration, + ); + }); + } + + group.finish(); +} + diff --git a/benches/benches/bevy_transform/hierarchy/mod.rs b/benches/benches/bevy_transform/hierarchy/mod.rs new file mode 100644 index 0000000000000..1a0b2c1000490 --- /dev/null +++ b/benches/benches/bevy_transform/hierarchy/mod.rs @@ -0,0 +1,4 @@ +pub mod init; + +pub mod propagation; + diff --git a/benches/benches/bevy_transform/hierarchy/propagation.rs b/benches/benches/bevy_transform/hierarchy/propagation.rs new file mode 100644 index 0000000000000..9ebb99e4db3fc --- /dev/null +++ b/benches/benches/bevy_transform/hierarchy/propagation.rs @@ -0,0 +1,200 @@ +use bevy_ecs::prelude::*; + +use std::time::{Instant, Duration}; + +use criterion::{*, measurement::WallTime}; + +use crate::world_gen::*; + +criterion_group!{ + name = transform_hierarchy_configurations; + config = Criterion::default() + .warm_up_time(std::time::Duration::from_millis(500)) + .measurement_time(std::time::Duration::from_secs(15)) + ; + + targets = transform_propagation_configurations +} + +criterion_group!{ + name = transform_hierarchy_sizes; + config = Criterion::default() + .warm_up_time(std::time::Duration::from_millis(300)) + .measurement_time(std::time::Duration::from_secs(5)) + .sample_size(50) + ; + + targets = transform_propagation_sizes +} + +/// Inner transform propagation benchmark function +/// This version only measures time spent during PostUpdate, therefore removing +/// the impact of simulating transform updates which happen during the Update schedule. +fn update_bench_postupdate_only(b: &mut Bencher, &(cfg, enable_update): &(&Cfg, TransformUpdates)) { + let (_result, mut app) = build_app(cfg, enable_update); + + // Run Main schedule once to ensure initial updates are done + app.update(); + + // We want to benchmark the transform updates in the PostUpdate schedule without + // benchmarking the update function which is intended to simulate changes to Transform + // in a typical game. + // Therefore, we simply remove the PostUpdate and Last schedules here in order to + // measure the time spent in PostUpdate itself, without the time spent in the + // schedules before PostUpdate (PreUpdate, Update, ...) and the schedules after + // PostUpdate (only Last currently). + // If the schedules that are part of main change, this logic needs to be changed + // accordingly. + let mut schedules = app.world.get_resource_mut::().unwrap(); + let (_, mut postupdate) = schedules.remove_entry(&bevy_app::PostUpdate).unwrap(); + let (_, mut last) = schedules.remove_entry(&bevy_app::Last).unwrap(); + + b.iter_custom(|iters| { + let mut total = Duration::ZERO; + + for _i in 0..iters { + std::hint::black_box(app.world.run_schedule(bevy_app::Main)); + + let start = Instant::now(); + std::hint::black_box(postupdate.run(&mut app.world)); + let elapsed = start.elapsed(); + + std::hint::black_box({ + last.run(&mut app.world); + app.world.clear_trackers(); + }); + + total += elapsed; + } + + total + }); +} + +/// Inner transform propagation benchmark function +/// +/// Simpler alternative to update_bench_postupdate_only that is retained here +/// for future reference. This benchmark includes the time spent simulating +/// transform updates in the Update schedule which makes the comparison between +/// noop and transform_updates benchmarks meaningful. +fn update_bench_reference(b: &mut Bencher, &(cfg, enable_update): &(&Cfg, TransformUpdates)) { + let (_result, mut app) = build_app(cfg, enable_update); + + // Run Main schedule once to ensure initial updates are done + app.update(); + + b.iter(move || { app.update(); }); + +} + +fn inner_update_bench(b: &mut Bencher, bench_cfg: &(&Cfg, TransformUpdates)) { + const UPDATE_BENCH_POSTUPDATE_ONLY: bool = false; + + if UPDATE_BENCH_POSTUPDATE_ONLY { + update_bench_postupdate_only(b, bench_cfg); + } else { + update_bench_reference(b, bench_cfg); + } +} + +#[derive(Clone, Copy)] +enum IdSource { + Fixed(&'static str), + NodeCount, +} + +fn bench_single(group: &mut BenchmarkGroup, id_source: IdSource, cfg: &Cfg) { + // Run build_app once to get an inserted node count + let (result, _app) = build_app(cfg, TransformUpdates::Disabled); + group.throughput(Throughput::Elements(result.inserted_nodes as u64)); + + let id = |function_name| { + match id_source { + IdSource::Fixed(id_str) => { + BenchmarkId::new(function_name, id_str) + }, + IdSource::NodeCount => { + BenchmarkId::new(function_name, result.inserted_nodes) + }, + } + }; + + // Measures hierarchy propagation systems when some transforms are updated. + group.bench_with_input(id("updates"), &(cfg, TransformUpdates::Enabled), inner_update_bench); + + // Measures hierarchy propagation systems when there are no changes + // during the Update schedule. + group.bench_with_input(id("noop"), &(cfg, TransformUpdates::Disabled), inner_update_bench); +} + +fn bench_group(c: &mut Criterion, name: &str, bench_function: F) +where + F: FnOnce(&mut BenchmarkGroup) -> () +{ + let mut group = c.benchmark_group(format!("transform_propagation_{}", name)); + + // Always use linear sampling for these benchmarks + // (they are close enough in performance, and this way the iteration time plots are consistent) + group.sampling_mode(SamplingMode::Linear); + + group.sample_size(50); + + group.warm_up_time(std::time::Duration::from_millis(400)); + group.measurement_time(std::time::Duration::from_secs(5)); + + group.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic)); + + bench_function(&mut group); + + group.finish(); +} + +fn bench_sizes(c: &mut Criterion, name: &str, cfgs: I) +where + I: IntoIterator +{ + bench_group(c, name, |group| { + for cfg in cfgs { + bench_single(group, IdSource::NodeCount, &cfg); + } + }); +} + +fn transform_propagation_sizes(c: &mut Criterion) { + bench_sizes(c, "large", (6u32..=18u32).map(|depth| { + Cfg { + test_case: TestCase::NonUniformTree { + depth, + branch_width: 8, + }, + update_filter: Default::default(), + } + })); + bench_sizes(c, "deep", (8u32..=24u32).map(|depth| { + Cfg { + test_case: TestCase::NonUniformTree { + depth, + branch_width: 2, + }, + update_filter: Default::default(), + } + })); + bench_sizes(c, "wide", (20u32..=470u32).step_by(30).map(|branch_width| { + Cfg { + test_case: TestCase::Tree { + depth: 3, + branch_width, + }, + update_filter: Default::default(), + } + })); +} + +fn transform_propagation_configurations(c: &mut Criterion) { + bench_group(c, "all_configurations", |group| { + for (name, cfg) in &CONFIGS { + bench_single(group, IdSource::Fixed(name), cfg); + } + }); +} + diff --git a/benches/benches/bevy_transform/world_gen.rs b/benches/benches/bevy_transform/world_gen.rs new file mode 100644 index 0000000000000..0c3951c489f40 --- /dev/null +++ b/benches/benches/bevy_transform/world_gen.rs @@ -0,0 +1,598 @@ +//! Hierarchy and transform propagation benchmark, derived from the stress test example. +//! +//! For the configurations, see the stress test documentation. + +use bevy_transform::prelude::*; +use bevy_app::{App, Update}; +use bevy_ecs::{prelude::*, schedule::ScheduleLabel}; +use bevy_math::Vec3; +use bevy_core_pipeline::prelude::Camera2dBundle; +use bevy_core::{TaskPoolPlugin, TaskPoolOptions}; +use bevy_time::{Time, TimePlugin}; +use bevy_hierarchy::{Children, Parent, BuildWorldChildren}; +use bevy_utils::default; + +use rand::{Rng, rngs::SmallRng, SeedableRng}; + +#[derive(PartialEq, Clone, Copy)] +pub enum TransformUpdates { Enabled, Disabled } + +pub fn build_app(cfg: &Cfg, enable_update: TransformUpdates) -> (InsertResult, App) { + let mut app = App::new(); + + app.add_plugins(( + TaskPoolPlugin { + task_pool_options: TaskPoolOptions { + max_total_threads: 1, + ..default() + }, + }, + TransformPlugin, + )); + + if enable_update == TransformUpdates::Enabled { + app + .add_plugins(TimePlugin) + .add_systems(Update, update); + } + + // Finish Plugin setup - identical to what the ScheduleRunnerPlugin runner does + // We can't use the ScheduleRunnerPlugin since we run app.update() ourselves, + // and app.run() can't be called repeatedly when using RunMode::Once + + // Do any of the plugins we use in the benchmarks require any asynchronous + // initialization using task pools? + // Currently, this is never the case, but the code is kept here as a reference + // in case it becomes necessary in the future. + const ASYNC_PLUGIN_INIT: bool = true; + if ASYNC_PLUGIN_INIT { + while !app.ready() { + #[cfg(not(target_arch = "wasm32"))] + bevy_tasks::tick_global_task_pools_on_main_thread(); + } + } + // assert!(app.ready()); + + app.finish(); + app.cleanup(); + + // Run setup (what would normally happen in the Startup schedule) + let result = setup(&mut app.world, cfg); + + (result, app) +} + +/// pre-defined benchmark configurations with name +pub const CONFIGS: [(&str, Cfg); 9] = [ + ( + "large_tree", + Cfg { + test_case: TestCase::NonUniformTree { + depth: 18, + branch_width: 8, + }, + update_filter: UpdateFilter { + probability: 0.5, + min_depth: 0, + max_depth: u32::MAX, + }, + }, + ), + ( + "wide_tree", + Cfg { + test_case: TestCase::Tree { + depth: 3, + branch_width: 500, + }, + update_filter: UpdateFilter { + probability: 0.5, + min_depth: 0, + max_depth: u32::MAX, + }, + }, + ), + ( + "deep_tree", + Cfg { + test_case: TestCase::NonUniformTree { + depth: 25, + branch_width: 2, + }, + update_filter: UpdateFilter { + probability: 0.5, + min_depth: 0, + max_depth: u32::MAX, + }, + }, + ), + ( + "chain", + Cfg { + test_case: TestCase::Tree { + depth: 5000, // 2500, + branch_width: 1, + }, + update_filter: UpdateFilter { + probability: 0.5, + min_depth: 0, + max_depth: u32::MAX, + }, + }, + ), + ( + "update_leaves", + Cfg { + test_case: TestCase::Tree { + depth: 18, + branch_width: 2, + }, + update_filter: UpdateFilter { + probability: 0.5, + min_depth: 17, + max_depth: u32::MAX, + }, + }, + ), + ( + "update_shallow", + Cfg { + test_case: TestCase::Tree { + depth: 18, + branch_width: 2, + }, + update_filter: UpdateFilter { + probability: 0.5, + min_depth: 0, + max_depth: 8, + }, + }, + ), + ( + "humanoids_active", + Cfg { + test_case: TestCase::Humanoids { + active: 4000, + inactive: 0, + }, + update_filter: UpdateFilter { + probability: 1.0, + min_depth: 0, + max_depth: u32::MAX, + }, + }, + ), + ( + "humanoids_inactive", + Cfg { + test_case: TestCase::Humanoids { + active: 10, + inactive: 3990, + }, + update_filter: UpdateFilter { + probability: 1.0, + min_depth: 0, + max_depth: u32::MAX, + }, + }, + ), + ( + "humanoids_mixed", + Cfg { + test_case: TestCase::Humanoids { + active: 2000, + inactive: 2000, + }, + update_filter: UpdateFilter { + probability: 1.0, + min_depth: 0, + max_depth: u32::MAX, + }, + }, + ), +]; + +// Random seed for tree creation +// (kept constant to make benchmark results comparable) +const SEED: u64 = 0x94eb0d25004f5f17; + +/// test configuration +#[derive(Resource, Debug, Clone)] +pub struct Cfg { + /// which test case should be inserted + pub test_case: TestCase, + /// which entities should be updated + pub update_filter: UpdateFilter, +} + +#[allow(unused)] +#[derive(Debug, Clone)] +pub enum TestCase { + /// a uniform tree, exponentially growing with depth + Tree { + /// total depth + depth: u32, + /// number of children per node + branch_width: u32, + }, + /// a non uniform tree (one side is deeper than the other) + /// creates significantly less nodes than `TestCase::Tree` with the same parameters + NonUniformTree { + /// the maximum depth + depth: u32, + /// max number of children per node + branch_width: u32, + }, + /// one or multiple humanoid rigs + Humanoids { + /// number of active instances (uses the specified [`UpdateFilter`]) + active: u32, + /// number of inactive instances (always inactive) + inactive: u32, + }, +} + +/// a filter to restrict which nodes are updated +#[derive(Debug, Clone)] +pub struct UpdateFilter { + /// starting depth (inclusive) + min_depth: u32, + /// end depth (inclusive) + max_depth: u32, + /// probability of a node to get updated (evaluated at insertion time, not during update) + /// 0 (never) .. 1 (always) + probability: f32, +} + +impl Default for UpdateFilter { + fn default() -> Self { + UpdateFilter { + probability: 0.5, + min_depth: 0, + max_depth: u32::MAX, + } + } +} + +/// update component with some per-component value +#[derive(Component)] +struct UpdateValue(f32); + +/// update positions system +fn update(time: Res