diff --git a/yaml_examples/industry_demo/long.yaml b/yaml_examples/industry_demo/long.yaml new file mode 100644 index 0000000..55f32c1 --- /dev/null +++ b/yaml_examples/industry_demo/long.yaml @@ -0,0 +1,213 @@ +run_configurations: + "cpp-reference": + sbatch_config: + "nodes": 1 + "ntasks-per-node": 1 + "cpus-per-task": 1 + "exclusive": "mcs" + "mem": 60000 + module_loads: [] + environment_variables: {} + directory: "../0_cpp_versions/0_ref" + build_commands: + - "make no_yaml" + - "make -j 8" + run_command: "./test_HPCCG" + + "rust-reference": + sbatch_config: + "nodes": 1 + "ntasks-per-node": 1 + "cpus-per-task": 1 + "exclusive": "mcs" + "mem": 60000 # Rust seems to use more memory, this heavily bottlenecks if too small + module_loads: [] + environment_variables: {} + directory: "../5_iterators" + build_commands: + - "cargo build --release" + run_command: "./target/release/hpccg-rs" + + "cpp-openmp": + sbatch_config: + "nodes": 1 + "ntasks-per-node": 1 + "cpus-per-task": 32 + "exclusive": "mcs" + "mem": 60000 + module_loads: [] + environment_variables: {} + directory: "../0_cpp_versions/1_openmp" + build_commands: + - "make no_yaml" + - "make -j 8" + run_command: "./test_HPCCG" + + "rust-rayon": + sbatch_config: + "nodes": 1 + "ntasks-per-node": 1 + "cpus-per-task": 32 + "exclusive": "mcs" + "mem": 60000 + module_loads: [] + environment_variables: {} + directory: "../6_parallel" + build_commands: + - "cargo build --release" + run_command: "./target/release/hpccg-rs" + + "cpp-mpi": + sbatch_config: + "cpus-per-task": 1 + "exclusive": "mcs" + "mem": 60000 + module_loads: + - "cs402-mpi" + environment_variables: {} + directory: "../0_cpp_versions/2_mpi" + build_commands: + - "make no_yaml" + - "make -j 8" + run_command: "mpirun ./test_HPCCG" + + "rust-mpi": + sbatch_config: + "cpus-per-task": 1 + "exclusive": "mcs" + "mem": 60000 + module_loads: + - "cs402-mpi" + environment_variables: {} + directory: "../7_mpi" + build_commands: + - "cargo build --release" + run_command: "mpirun ./target/release/hpccg-rs" + + "cpp-hybrid": + sbatch_config: + "nodes": 1 + "exclusive": "mcs" + "mem": 60000 + module_loads: + - "cs402-mpi" + environment_variables: {} + directory: "../0_cpp_versions/3_hybrid" + build_commands: + - "make no_yaml" + - "make -j 8" + run_command: "mpirun ./test_HPCCG" + + "rust-hybrid": + sbatch_config: + "nodes": 1 + "exclusive": "mcs" + "mem": 60000 + module_loads: + - "cs402-mpi" + environment_variables: {} + directory: "../8_hybrid" + build_commands: + - "cargo build --release" + run_command: "mpirun ./target/release/hpccg-rs" + +benches: + "serial": + run_configurations: + - "cpp-reference" + - "rust-reference" + reruns: + number: 1 + unaggregatable_metrics: + - "Num threads" + # number: 5 + # highest_discard: 2 + # lowest_discard: 0 + matrix: + args: + - "100 100 100" + - "150 150 150" + - "200 200 200" + - "250 250 250" + analysis: + metrics: + "Mesh x size": "nx: (\\d+)" + "Wall time (s)": "real\\s([\\d\\.]+)\nuser" + line_plots: + - title: "Reference Implementation Comparison" + x: "Mesh x size" + y: "Wall time (s)" + + "parallel": + run_configurations: + - "cpp-openmp" + - "rust-rayon" + reruns: + number: 1 + unaggregatable_metrics: + - "Num threads" + - "Mesh x size" + # number: 5 + # highest_discard: 2 + # lowest_discard: 0 + matrix: + args: + - "100 100 100" + - "150 150 150" + - "200 200 200" + - "250 250 250" + # - "300 300 300" + # - "400 400 400" + environment_variables: + - { "OMP_NUM_THREADS": 1, "RAYON_NUM_THREADS": 1 } + - { "OMP_NUM_THREADS": 16, "RAYON_NUM_THREADS": 16 } + - { "OMP_NUM_THREADS": 32, "RAYON_NUM_THREADS": 32 } + analysis: + metrics: + "Mesh x size": "nx: (\\d+)" + "Num threads": "=== RUN INSTANTIATION ===\n\\{.*environment_variables: \\{.*OMP_NUM_THREADS: (\\d+),.*\\}" + "Wall time (s)": "real\\s([\\d\\.]+)\nuser" + line_plots: + - title: "Parallel Implementation Comparison" + x: "Mesh x size" + y: "Wall time (s)" + split_metrics: + - "Num threads" + + "mpi-best-config": + enabled: False + run_configurations: + - "cpp-mpi" + - "rust-mpi" + reruns: + number: 1 + unaggregatable_metrics: + - "Num threads" + - "Nodes" + - "Tasks per Node" + # number: 5 + # highest_discard: 2 + # lowest_discard: 0 + matrix: + args: + - "50 50 50" + - "100 100 100" + - "150 150 150" + - "200 200 200" + - "250 250 250" + sbatch_config: + - { "nodes": 1, "ntasks-per-node": 1 } + # - { "nodes": 2, "ntasks-per-node": 1 } + analysis: + metrics: + "Mesh x size": "nx: (\\d+)" + "Nodes": "=== RUN INSTANTIATION ===\n\\{.*sbatch_config: \\{.*nodes: (\\d+).*\\}" + "Tasks per Node": "=== RUN INSTANTIATION ===\n\\{.*sbatch_config: \\{.*ntasks-per-node: (\\d+).*\\}" + "Wall time (s)": "real\\s([\\d\\.]+)\nuser" + line_plots: + - title: "Parallel Implementation Comparison" + x: "Mesh x size" + y: "Wall time (s)" + split_metrics: + - "Nodes" + - "Tasks per Node" diff --git a/yaml_examples/industry_demo/short.yaml b/yaml_examples/industry_demo/short.yaml new file mode 100644 index 0000000..0deb7ea --- /dev/null +++ b/yaml_examples/industry_demo/short.yaml @@ -0,0 +1,85 @@ +run_configurations: + "cpp-reference": + sbatch_config: + "nodes": 1 + "ntasks-per-node": 1 + "cpus-per-task": 16 + "exclusive": "mcs" + "mem": 60000 + module_loads: [] + environment_variables: {} + directory: "../0_cpp_versions/0_ref" + build_commands: + - "make no_yaml" + - "make -j 8" + run_command: "./test_HPCCG" + + "cpp-openmp": + sbatch_config: + "nodes": 1 + "ntasks-per-node": 1 + "cpus-per-task": 16 + "exclusive": "mcs" + "mem-per-cpu": 1875 + module_loads: [] + environment_variables: + "OMP_NUM_THREADS": 16 + directory: "../0_cpp_versions/1_openmp" + build_commands: + - "make no_yaml" + - "make -j 8" + run_command: "./test_HPCCG" + + + +benches: + "short-example": + run_configurations: + - "cpp-reference" + - "cpp-openmp" + reruns: + unaggregatable_metrics: + - "Mesh x size" + - "Mesh y size" + - "Mesh z size" + - "Total FLOPs" + - "ddot FLOPs" + - "waxpby FLOPs" + - "sparsemv FLOPs" + matrix: + args: + - "50 50 50" + - "100 100 100" + - "150 150 150" + - "200 200 200" + analysis: + line_plots: + - title: "C++ Versions Comparison" + x: "Mesh x size" + y: "Wall time (s)" + bar_charts: + - title: "C++ Versions Comparison" + y: "Wall time (s)" + split_metrics: + - "Mesh x size" + - title: "C++ Versions Comparison @ 200x200x200" + y: "Wall time (s)" + fix_metrics: + "Mesh x size": 200 + metrics: + "Mesh x size": "nx: (\\d+)" + "Mesh y size": "ny: (\\d+)" + "Mesh z size": "nz: (\\d+)" + "Total time (s)": "Time Summary:[\\s\\S]*Total\\s*: ([\\d\\.]+)[\\s\\S]*\nFLOPS Summary" + "ddot time (s)": "Time Summary:[\\s\\S]*DDOT\\s*: ([\\d\\.]+)[\\s\\S]*\nFLOPS Summary" + "waxpby time (s)": "Time Summary:[\\s\\S]*WAXPBY\\s*: ([\\d\\.]+)[\\s\\S]*\nFLOPS Summary" + "sparsemv time (s)": "Time Summary:[\\s\\S]*SPARSEMV\\s*: ([\\d\\.]+)[\\s\\S]*\nFLOPS Summary" + "Total FLOPs": "FLOPS Summary:[\\s\\S]*Total\\s*: ([\\d\\.]+)[\\s\\S]*\nMFLOPS Summary" + "ddot FLOPs": "FLOPS Summary:[\\s\\S]*DDOT\\s*: ([\\d\\.]+)[\\s\\S]*\nMFLOPS Summary" + "waxpby FLOPs": "FLOPS Summary:[\\s\\S]*WAXPBY\\s*: ([\\d\\.]+)[\\s\\S]*\nMFLOPS Summary" + "sparsemv FLOPs": "FLOPS Summary:[\\s\\S]*SPARSEMV\\s*: ([\\d\\.]+)[\\s\\S]*\nMFLOPS Summary" + "Total MFLOPs/s": "MFLOPS Summary:[\\s\\S]*Total\\s*: ([\\d\\.]+)" + "ddot MFLOPs/s": "MFLOPS Summary:[\\s\\S]*DDOT\\s*: ([\\d\\.]+)" + "waxpby MFLOPs/s": "MFLOPS Summary:[\\s\\S]*WAXPBY\\s*: ([\\d\\.]+)" + "sparsemv MFLOPs/s": "MFLOPS Summary:[\\s\\S]*SPARSEMV\\s*: ([\\d\\.]+)" + "Wall time (s)": "real\\s([\\d\\.]+)\nuser" diff --git a/yaml_examples/industry_demo/strong_weak_scaling.yaml b/yaml_examples/industry_demo/strong_weak_scaling.yaml new file mode 100644 index 0000000..09d543d --- /dev/null +++ b/yaml_examples/industry_demo/strong_weak_scaling.yaml @@ -0,0 +1,86 @@ +run_configurations: + "cpp-hybrid": + sbatch_config: + "nodes": 1 + "cpus-per-task": 2 + "exclusive": "mcs" + "mem": 60000 + module_loads: + - "cs402-mpi" + environment_variables: + "OMP_NUM_THREADS": 2 + directory: "../0_cpp_versions/3_hybrid" + build_commands: + - "make no_yaml" + - "make -j 8" + run_command: "mpirun ./test_HPCCG" + + "rust-hybrid": + sbatch_config: + "nodes": 1 + "ntasks-per-node": 2 + "exclusive": "mcs" + "mem": 60000 + module_loads: + - "cs402-mpi" + environment_variables: + "RAYON_NUM_THREADS": 2 + directory: "../8_hybrid" + build_commands: + - "cargo build --release" + run_command: "mpirun ./target/release/hpccg-rs" + +benches: + "strong-scaling": + run_configurations: + - "cpp-hybrid" + - "rust-hybrid" + reruns: + number: 2 + highest_discard: 0 + lowest_discard: 0 + matrix: + [args, sbatch_config]: + - ["64 64 1024", { "ntasks": 1 }] + - ["64 64 512", { "ntasks": 2 }] + - ["64 64 256", { "ntasks": 4 }] + - ["64 64 128", { "ntasks": 8 }] + - ["64 64 64", { "ntasks": 16 }] + - ["64 64 32", { "ntasks": 32 }] + - ["64 64 16", {"ntasks": 64}] + analysis: + metrics: + "Wall time (s)": "real\\s([\\d\\.]+)\nuser" + "MPI Ranks": "=== RUN INSTANTIATION ===\n\\{.*sbatch_config: \\{.*ntasks: (\\d+).*\\}" + line_plots: + - title: "Strong Scaling Plot" + x: "MPI Ranks" + y: "Wall time (s)" + + "weak-scaling": + run_configurations: + - "cpp-hybrid" + - "rust-hybrid" + reruns: + number: 2 + highest_discard: 0 + lowest_discard: 0 + matrix: + args: + - "64 64 64" + sbatch_config: + - { "ntasks": 1 } + - { "ntasks": 2 } + - { "ntasks": 4 } + - { "ntasks": 8 } + - { "ntasks": 16 } + - { "ntasks": 32 } + #- {"ntasks": 64} + analysis: + metrics: + "Wall time (s)": "real\\s([\\d\\.]+)\nuser" + "MPI Ranks": "=== RUN INSTANTIATION ===\n\\{.*sbatch_config: \\{.*ntasks: (\\d+).*\\}" + line_plots: + - title: "Weak Scaling Plot" + x: "MPI Ranks" + y: "Wall time (s)"