bevyengine · hesiod · Aug 14, 2023 · Aug 20, 2023 · nicopap · Aug 24, 2023
diff --git a/benches/Cargo.toml b/benches/Cargo.toml
@@ -8,13 +8,18 @@ license = "MIT OR Apache-2.0"
 
 [dev-dependencies]
 glam = "0.24"
-rand = "0.8"
+rand = { version = "0.8", features = ["small_rng"] }
 rand_chacha = "0.3"
 criterion = { version = "0.3", features = ["html_reports"] }
 bevy_app = { path = "../crates/bevy_app" }
+bevy_core = { path = "../crates/bevy_core" }
+bevy_core_pipeline = { path = "../crates/bevy_core_pipeline" }
 bevy_ecs = { path = "../crates/bevy_ecs", features = ["multi-threaded"] }
+bevy_hierarchy = { path = "../crates/bevy_hierarchy" }
 bevy_reflect = { path = "../crates/bevy_reflect" }
 bevy_tasks = { path = "../crates/bevy_tasks" }
+bevy_time = { path = "../crates/bevy_time" }
+bevy_transform = { path = "../crates/bevy_transform" }
 bevy_utils = { path = "../crates/bevy_utils" }
 bevy_math = { path = "../crates/bevy_math" }
 
@@ -27,6 +32,11 @@ name = "change_detection"
 path = "benches/bevy_ecs/change_detection.rs"
 harness = false
 
+[[bench]]
+name = "transform_hierarchy"
+path = "benches/bevy_transform/benches.rs"
+harness = false
+
 [[bench]]
 name = "ecs"
 path = "benches/bevy_ecs/benches.rs"

diff --git a/benches/benches/bevy_transform/benches.rs b/benches/benches/bevy_transform/benches.rs
@@ -0,0 +1,11 @@
+use criterion::criterion_main;
+
+mod hierarchy;
+
+mod world_gen;
+
+criterion_main!(
+    hierarchy::init::transform_hierarchy_init,
+    hierarchy::propagation::transform_hierarchy_configurations,
+    hierarchy::propagation::transform_hierarchy_sizes,
+);
diff --git a/benches/benches/bevy_transform/hierarchy/init.rs b/benches/benches/bevy_transform/hierarchy/init.rs
@@ -0,0 +1,99 @@
+use bevy_app::App;
+
+use std::time::{Instant, Duration};
+
+use criterion::*;
+
+use crate::world_gen::*;
+
+criterion_group!{
+    name = transform_hierarchy_init;
+    config = Criterion::default()
+        .warm_up_time(std::time::Duration::from_secs(3))
+        .measurement_time(std::time::Duration::from_secs(20));
+    targets = transform_init
+}
+
+/// This benchmark group tries to measure the cost of the initial transform propagation,
+/// i.e. the first time transform propagation runs after we just added all our entities.
+///
+/// These benchmarks are probably not as useful as the transform update benchmarks
+/// since the benchmark implementation is a little fragile and rather slow (see comments below).
+/// They're included here nevertheless in case they're useful.
+fn transform_init(c: &mut Criterion) {
+    let mut group = c.benchmark_group("transform_init");
+
+    // Reduce sample size and enable flat sampling to make sure this benchmark doesn't
+    // take a lot longer than the simplified benchmark.
+    group.sample_size(50);
+    group.sampling_mode(SamplingMode::Flat);
+
+    for (name, cfg) in &CONFIGS {
+        let (result, mut app) = build_app(cfg, TransformUpdates::Disabled);
+
+        group.throughput(Throughput::Elements(result.inserted_nodes as u64));
+
+        // Simplified benchmark for the initial propagation
+        group.bench_function(BenchmarkId::new("reset", name), move |b| {
+            // Building the World (in setup) takes a lot of time, so ideally we shouldn't do that
+            // on every iteration since Criterion ideally wants to run the benchmark function in batches.
+            // Unfortunately, we can't re-use an App directly in iter() because the World would no
+            // longer be in its pristine, just initialized state from the second iteration onwards.
+            // Furthermore, it's not possible to clone a pristine World since World doesn't implement
+            // Clone.
+            // As an alternative, we reuse the same App and reset it to a pseudo-pristine state by
+            // simply marking all Parent, Children and Transform components as changed.
+            // This should look like a pristine state to the propagation systems.
+            //
+            // Note: This is a tradeoff. The reset benchmark should deliver more reliable results
+            // in the same time, while the reference benchmark below should be closer to the
+            // real-world initialization cost.
+
+            app.add_schedule(ResetSchedule, reset_schedule());
+
+            // Run Main schedule once to ensure initial updates are done
+            // This is a little counterintuitive since the initial delay is exactly what we want to
+            // measure - however, we have the ResetSchedule in place to hopefully replicate the
+            // World in its pristine state on every iteration.
+            // We therefore run update here to prevent the first iteration having additional work
+            // due to possible incompleteness of the reset mechanism
+            app.update();
+
+            b.iter_custom(|iters| {
+                let mut total = Duration::ZERO;
+
+                for _i in 0..iters {
+                    std::hint::black_box(app.world.run_schedule(ResetSchedule));
+
+                    let start = Instant::now();
+                    std::hint::black_box(app.world.run_schedule(bevy_app::Main));
+                    let elapsed = start.elapsed();
+
+                    app.world.clear_trackers();
+
+                    total += elapsed;
+                }
+
+                total
+            });
+        });
+
+        // Reference benchmark for the initial propagation - needs to rebuild the App
+        // on every iteration, which makes the benchmark quite slow and results
+        // in less precise results in the same time compared to the simplified benchmark.
+        group.bench_with_input(BenchmarkId::new("reference", name), cfg, move |b, cfg| {
+            // Use iter_batched_ref to prevent influence of Drop
+            b.iter_batched_ref(
+                || {
+                    let (_result, app) = build_app(cfg, TransformUpdates::Disabled);
+                    app
+                },
+                App::update,
+                BatchSize::PerIteration,
+            );
+        });
+    }
+
+    group.finish();
+}
+
diff --git a/benches/benches/bevy_transform/hierarchy/mod.rs b/benches/benches/bevy_transform/hierarchy/mod.rs
@@ -0,0 +1,4 @@
+pub mod init;
+
-
-
+pub mod propagation;
+
diff --git a/benches/benches/bevy_transform/hierarchy/propagation.rs b/benches/benches/bevy_transform/hierarchy/propagation.rs
@@ -0,0 +1,200 @@
+use bevy_ecs::prelude::*;
+
+use std::time::{Instant, Duration};
+
+use criterion::{*, measurement::WallTime};
+
+use crate::world_gen::*;
+
+criterion_group!{
+    name = transform_hierarchy_configurations;
+    config = Criterion::default()
+        .warm_up_time(std::time::Duration::from_millis(500))
+        .measurement_time(std::time::Duration::from_secs(15))
+        ;
+
+    targets = transform_propagation_configurations
+}
+
+criterion_group!{
+    name = transform_hierarchy_sizes;
+    config = Criterion::default()
+        .warm_up_time(std::time::Duration::from_millis(300))
+        .measurement_time(std::time::Duration::from_secs(5))
+        .sample_size(50)
+        ;
+
+    targets = transform_propagation_sizes
+}
+
+/// Inner transform propagation benchmark function
+/// This version only measures time spent during PostUpdate, therefore removing
+/// the impact of simulating transform updates which happen during the Update schedule.
+fn update_bench_postupdate_only(b: &mut Bencher<WallTime>, &(cfg, enable_update): &(&Cfg, TransformUpdates)) {
+    let (_result, mut app) = build_app(cfg, enable_update);
+
+    // Run Main schedule once to ensure initial updates are done
+    app.update();
+
+    // We want to benchmark the transform updates in the PostUpdate schedule without
+    // benchmarking the update function which is intended to simulate changes to Transform
+    // in a typical game.
+    // Therefore, we simply remove the PostUpdate and Last schedules here in order to
+    // measure the time spent in PostUpdate itself, without the time spent in the
+    // schedules before PostUpdate (PreUpdate, Update, ...) and the schedules after
+    // PostUpdate (only Last currently).
+    // If the schedules that are part of main change, this logic needs to be changed
+    // accordingly.
+    let mut schedules = app.world.get_resource_mut::<Schedules>().unwrap();
+    let (_, mut postupdate) = schedules.remove_entry(&bevy_app::PostUpdate).unwrap();
+    let (_, mut last) = schedules.remove_entry(&bevy_app::Last).unwrap();
+
+    b.iter_custom(|iters| {
+        let mut total = Duration::ZERO;
+
+        for _i in 0..iters {
+            std::hint::black_box(app.world.run_schedule(bevy_app::Main));
+
+            let start = Instant::now();
+            std::hint::black_box(postupdate.run(&mut app.world));
+            let elapsed = start.elapsed();
+
+            std::hint::black_box({
+                last.run(&mut app.world);
+                app.world.clear_trackers();
+            });
+
+            total += elapsed;
+        }
+
+        total
+    });
+}
+
+/// Inner transform propagation benchmark function
+///
+/// Simpler alternative to update_bench_postupdate_only that is retained here
+/// for future reference. This benchmark includes the time spent simulating
+/// transform updates in the Update schedule which makes the comparison between
+/// noop and transform_updates benchmarks meaningful.
+fn update_bench_reference(b: &mut Bencher<WallTime>, &(cfg, enable_update): &(&Cfg, TransformUpdates)) {
+    let (_result, mut app) = build_app(cfg, enable_update);
+
+    // Run Main schedule once to ensure initial updates are done
+    app.update();
+
+    b.iter(move || { app.update(); });
+
+}
+
+fn inner_update_bench(b: &mut Bencher<WallTime>, bench_cfg: &(&Cfg, TransformUpdates)) {
+    const UPDATE_BENCH_POSTUPDATE_ONLY: bool = false;
+
+    if UPDATE_BENCH_POSTUPDATE_ONLY {
+        update_bench_postupdate_only(b, bench_cfg);
+    } else {
+        update_bench_reference(b, bench_cfg);
+    }
+}
+
+#[derive(Clone, Copy)]
+enum IdSource {
+    Fixed(&'static str),
+    NodeCount,
+}
+
+fn bench_single(group: &mut BenchmarkGroup<WallTime>, id_source: IdSource, cfg: &Cfg) {
+    // Run build_app once to get an inserted node count
+    let (result, _app) = build_app(cfg, TransformUpdates::Disabled);
+    group.throughput(Throughput::Elements(result.inserted_nodes as u64));
+
+    let id = |function_name| {
+        match id_source {
+            IdSource::Fixed(id_str) => {
+                BenchmarkId::new(function_name, id_str)
+            },
+            IdSource::NodeCount => { 
+                BenchmarkId::new(function_name, result.inserted_nodes)
+            },
+        }
+    };
+
+    // Measures hierarchy propagation systems when some transforms are updated.
+    group.bench_with_input(id("updates"), &(cfg, TransformUpdates::Enabled), inner_update_bench);
+
+    // Measures hierarchy propagation systems when there are no changes
+    // during the Update schedule.
+    group.bench_with_input(id("noop"), &(cfg, TransformUpdates::Disabled), inner_update_bench);
+}
+
+fn bench_group<F>(c: &mut Criterion, name: &str, bench_function: F) 
+where
+    F: FnOnce(&mut BenchmarkGroup<WallTime>) -> ()
+{
+    let mut group = c.benchmark_group(format!("transform_propagation_{}", name));
+
+    // Always use linear sampling for these benchmarks
+    // (they are close enough in performance, and this way the iteration time plots are consistent)
+    group.sampling_mode(SamplingMode::Linear);
+
+    group.sample_size(50);
+
+    group.warm_up_time(std::time::Duration::from_millis(400));
+    group.measurement_time(std::time::Duration::from_secs(5));
+
+    group.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic));
+
+    bench_function(&mut group);
+
+    group.finish();
+}
+
+fn bench_sizes<I>(c: &mut Criterion, name: &str, cfgs: I) 
+where
+    I: IntoIterator<Item = Cfg>
+{
+    bench_group(c, name, |group| {
+        for cfg in cfgs {
+            bench_single(group, IdSource::NodeCount, &cfg);
+        }
+    });
+}
+
+fn transform_propagation_sizes(c: &mut Criterion) {
+    bench_sizes(c, "large", (6u32..=18u32).map(|depth| {
+        Cfg {
+            test_case: TestCase::NonUniformTree {
+                depth,
+                branch_width: 8,
+            },
+            update_filter: Default::default(),
+        }
+    }));
+    bench_sizes(c, "deep", (8u32..=24u32).map(|depth| {
+        Cfg {
+            test_case: TestCase::NonUniformTree {
+                depth,
+                branch_width: 2,
+            },
+            update_filter: Default::default(),
+        }
+    }));
+    bench_sizes(c, "wide", (20u32..=470u32).step_by(30).map(|branch_width| {
+        Cfg {
+            test_case: TestCase::Tree {
+                depth: 3,
+                branch_width,
+            },
+            update_filter: Default::default(),
+        }
+    }));
+}
+
+fn transform_propagation_configurations(c: &mut Criterion) {
+    bench_group(c, "all_configurations", |group| {
+        for (name, cfg) in &CONFIGS {
+            bench_single(group, IdSource::Fixed(name), cfg);
+        }
+    });
+}
+