diff --git a/.github/workflows/build-hpc.yml b/.github/workflows/build-hpc.yml
index b9d227d06..07f31d12d 100644
--- a/.github/workflows/build-hpc.yml
+++ b/.github/workflows/build-hpc.yml
@@ -76,6 +76,9 @@ jobs:
               module load {{module}}
             {% endfor %}
 
+            export CMAKE_TEST_LAUNCHER="srun;-n;1"
+            export DR_HOOK_ASSERT_MPI_INITIALIZED=0
+
             {% for name, options in dependencies.items() %}
                 mkdir -p {{name}}
                 pushd {{name}}
@@ -107,4 +110,4 @@ jobs:
               {{ cmake_options|join(' ') }}
 
             cmake --build build
-            ctest --test-dir build
+            ctest --test-dir build --output-on-failure
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ee7843b81..6cc1adcca 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,6 +16,26 @@ find_package( ecbuild 3.4 REQUIRED HINTS ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CUR
 project( ectrans LANGUAGES C CXX Fortran )
 include( ectrans_macros )
 
+# CMake 3.29 adds CMAKE_TEST_LAUNCHER defined either as CMake variable or environment.
+# This launcher is a semi-colon-separted list of arguments that is used to launch serial tasks,
+# and can be defined during the CMake configuration.
+# This is e.g. required for GPU tests that need access to slurm resources:
+#    export CMAKE_TEST_LAUNCHER="srun;-n;1"
+# To run the tests then:
+#    salloc -q <queue> --gpus-per-task=1 -n <nproc>    ctest <ctest-args>
+# Before cmake 3.29 this could only be achieved with CMAKE_CROSSCOMPILING_EMULATOR.
+# This next snippet ensures forward compatibility
+if( ${CMAKE_VERSION} VERSION_LESS "3.29" )
+  if( DEFINED CMAKE_TEST_LAUNCHER )
+    set(CMAKE_CROSSCOMPILING_EMULATOR ${CMAKE_TEST_LAUNCHER})
+  elseif(DEFINED ENV{CMAKE_TEST_LAUNCHER})
+    set(CMAKE_CROSSCOMPILING_EMULATOR $ENV{CMAKE_TEST_LAUNCHER})
+  endif()
+endif()
+if( CMAKE_CROSSCOMPILING_EMULATOR )
+  set( CMAKE_TEST_LAUNCHER ${CMAKE_CROSSCOMPILING_EMULATOR} )
+endif()
+
 set(CMAKE_CXX_STANDARD 17)
 
 ecbuild_enable_fortran( REQUIRED NO_MODULE_DIRECTORY )
diff --git a/cmake/project_summary.cmake b/cmake/project_summary.cmake
index be8f8b78c..cb321aae6 100644
--- a/cmake/project_summary.cmake
+++ b/cmake/project_summary.cmake
@@ -35,6 +35,9 @@ ecbuild_info( "    LAPACK_LIBRARIES         : [${LAPACK_LIBRARIES}]" )
     endif()
 ecbuild_info( "FFTW" )
 ecbuild_info( "    FFTW_LIBRARIES           : [${FFTW_LIBRARIES}]" )
+  if( CMAKE_TEST_LAUNCHER )
+ecbuild_info( "CMAKE_TEST_LAUNCHER          : [${CMAKE_TEST_LAUNCHER}]" )
+  endif()
 ecbuild_info( "---------------------------------------------------------" )
 
 
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 20eecd70a..8ada09b68 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -87,6 +87,12 @@ endif()
 # This tests CPU and/or GPU versions, depending on which are enabled
 # --------------------------------------------------------------------------------------------------
 
+macro(ectrans_set_test_properties target)
+  if( "${target}" MATCHES "gpu" )
+    set_tests_properties(${target} PROPERTIES LABELS "gpu;Fortran")
+  endif()
+endmacro()
+
 # Determine which benchmarks are available
 set( benchmarks "" )
 if( TARGET ectrans-benchmark-cpu-dp )
@@ -137,6 +143,7 @@ foreach( benchmark ${benchmarks} )
             MPI ${mpi}
             OMP ${omp}
         )
+        ectrans_set_test_properties( ${base_title}_nfld0 )
       endif()
 
       # Check it works with 10 3D scalar fields
@@ -146,6 +153,7 @@ foreach( benchmark ${benchmarks} )
           MPI ${mpi}
           OMP ${omp}
       )
+      ectrans_set_test_properties( ${base_title}_nfld10 )
 
       # Check it works with 10 3D scalar fields and 20 levels
       ecbuild_add_test( TARGET ${base_title}_nfld10_nlev20
@@ -154,6 +162,7 @@ foreach( benchmark ${benchmarks} )
           MPI ${mpi}
           OMP ${omp}
       )
+      ectrans_set_test_properties( ${base_title}_nfld10_nlev20 )
 
       # Check it works with 10 3D scalar fields, 20 levels, and scalar derivatives
       ecbuild_add_test( TARGET ${base_title}_nfld10_nlev20_scders
@@ -162,6 +171,7 @@ foreach( benchmark ${benchmarks} )
           MPI ${mpi}
           OMP ${omp}
       )
+      ectrans_set_test_properties( ${base_title}_nfld10_nlev20_scders )
 
       # Check it works with 10 3D scalar fields, 20 levels, and wind transforms
       ecbuild_add_test( TARGET ${base_title}_nfld10_nlev20_vordiv
@@ -170,6 +180,7 @@ foreach( benchmark ${benchmarks} )
           MPI ${mpi}
           OMP ${omp}
       )
+      ectrans_set_test_properties( ${base_title}_nfld10_nlev20_vordiv )
 
       # Check it works with 10 3D scalar fields, 20 levels, wind transforms, and wind derivatives
       ecbuild_add_test( TARGET ${base_title}_nfld10_nlev20_vordiv_uvders
@@ -178,6 +189,7 @@ foreach( benchmark ${benchmarks} )
           MPI ${mpi}
           OMP ${omp}
       )
+      ectrans_set_test_properties( ${base_title}_nfld10_nlev20_vordiv_uvders )
 
       # Check it works with 10 3D scalar fields, 20 levels, and NPROMA=16
       ecbuild_add_test( TARGET ${base_title}_nfld10_nlev20_nproma16
@@ -186,6 +198,7 @@ foreach( benchmark ${benchmarks} )
           MPI ${mpi}
           OMP ${omp}
       )
+      ectrans_set_test_properties( ${base_title}_nfld10_nlev20_nproma16 )
 
       if( ${benchmark} MATCHES "cpu" )
         # Check it works with 10 3D scalar fields, 20 levels, and the fast Legendre tranform (CPU only)
@@ -195,6 +208,7 @@ foreach( benchmark ${benchmarks} )
             MPI ${mpi}
             OMP ${omp}
         )
+        ectrans_set_test_properties( ${base_title}_nfld10_nlev20_flt )
       endif()
     endforeach()
   endforeach()
@@ -245,6 +259,7 @@ if( HAVE_TRANSI )
       LINKER_LANGUAGE C
       DEFINITIONS GPU_VERSION
       ENVIRONMENT TRANS_USE_MPI=0 )
+    set_tests_properties(ectrans_test_transi_program_gpu PROPERTIES LABELS gpu)
   endif()
 
   ecbuild_add_test( TARGET ectrans_test_transi_timings