diff --git a/core/teca_thread_util.cxx b/core/teca_thread_util.cxx index 5c12dbf18..40b87930e 100644 --- a/core/teca_thread_util.cxx +++ b/core/teca_thread_util.cxx @@ -528,16 +528,22 @@ int thread_parameters(MPI_Comm comm, int base_core_id, int n_requested, // thread pool size is based on core and process count int nlg = 0; + // map threads to physical cores + nlg = cores_per_node % n_procs; + n_threads = cores_per_node / n_procs + (proc_id < nlg ? 1 : 0); if (n_requested > 0) { - // user specified override + // use exactly this many n_threads = n_requested; } + else if (n_requested < -1) + { + // use at most this many + n_threads = std::min(-n_requested, n_threads); + n_requested = n_threads; + } else { - // map threads to physical cores - nlg = cores_per_node % n_procs; - n_threads = cores_per_node/n_procs + (proc_id < nlg ? 1 : 0); } // if the user runs more MPI ranks than cores some of the ranks diff --git a/core/teca_thread_util.h b/core/teca_thread_util.h index 33f0db276..c420a8431 100644 --- a/core/teca_thread_util.h +++ b/core/teca_thread_util.h @@ -37,8 +37,10 @@ namespace teca_thread_util * be bound to to acheive this. Passing n_requested >= 1 * specifies a run time override. This indicates that * caller wants to use a specific number of threads, - * rather than one per physical core. In this case the - * affinity map is also constructed. + * rather than one per physical core. Passing + * n_requested < -1 specifies a maximum to use if + * sufficient cores are available. In all cases the + * affinity map is constructed. * * @param[in] n_threads_per_device the number of threads that should service * GPUs. If 0 the run will be CPU only. If -1 @@ -60,10 +62,11 @@ namespace teca_thread_util * of threads one can use such that there is one * thread per phycial core taking into account all * ranks running on the node. if n_requested is >= 1 - * n_threads will be set to n_requested. This allows a - * run time override for cases when the caller knows - * how she wants to schedule things. if an error - * occurs and n_requested is -1 this will be set to 1. + * n_threads will explicitly be set to n_requested. If + * n_requested < -1 at most -n_requested threads will + * be used. Fewer threads will be used if there are + * insufficient cores available. if an error occurs + * and n_requested is -1 this will be set to 1. * * @param[out] affinity an affinity map, describing for each of n_threads, * a core id that the thread can be bound to. if