From 26e20ea5025592134bce58e660c1030d91e7cc1f Mon Sep 17 00:00:00 2001 From: TRAHAY Francois Date: Fri, 14 Feb 2025 11:22:42 +0100 Subject: [PATCH] Refactoring the multiple tracing mechanisms. Old system implemented 2 methods for tracing events: - many macros allow to record fxt events that depict various events in starpu (task submission, data transfers, scheduling points, memory management, synchronization, ...) - a few function calls allow to invoke external tracing tools (eg. APEX, EZTrace) callbacks. This is mainly for task submission/execution and data transfers. New system: - calls to fxt macros (_STARPU_TRACE_*) are replaced with calls to functions (_starpu_trace_) implemented in src/profiling/starpu_tracing.c - the tracing functions may then call a tracing tool (eg. fxt, APEX, EZTrace, ...) to record an event Tracing tools can now record a wide variety of events describing StarPU internals (the ones that were currently recorded with fxt). The drawback is that StarPU will perform a function call for each tracing point, even if no tracing tool is running. Experiments show that this does not impact performance (with running tests/microbench/tasks_overhead.c) --- ChangeLog | 1 + README.dev | 12 +- configure.ac | 88 + doc/doxygen_dev/Makefile.am | 2 +- doc/doxygen_dev/doxygen-config.cfg.in | 2 +- mpi/src/starpu_mpi_fxt.h | 2 +- mpi/src/starpu_mpi_task_insert.c | 20 +- mpi/src/starpu_mpi_task_insert_fortran.c | 14 +- src/Makefile.am | 10 +- src/common/fxt.h | 1583 ----------- src/common/starpu_spinlock.c | 1 - src/common/starpu_spinlock.h | 27 +- src/common/thread.c | 94 +- src/common/utils.c | 38 + src/common/utils.h | 2 + src/core/dependencies/cg.h | 4 +- src/core/dependencies/implicit_data_deps.c | 6 +- src/core/dependencies/tags.c | 10 +- src/core/dependencies/task_deps.c | 10 +- src/core/jobs.c | 30 +- src/core/jobs.h | 5 +- src/core/sched_ctx.c | 12 +- src/core/sched_ctx.h | 1 + src/core/sched_policy.c | 33 +- src/core/sched_policy.h | 5 +- src/core/task.c | 33 +- src/core/task.h | 4 + src/core/topology.h | 1 - src/core/workers.c | 50 +- src/core/workers.h | 2 +- src/datawizard/coherency.c | 22 +- src/datawizard/coherency.h | 1 - src/datawizard/copy_driver.c | 15 +- src/datawizard/data_request.c | 4 +- src/datawizard/filters.c | 8 +- src/datawizard/interfaces/data_interface.c | 10 +- src/datawizard/malloc.c | 7 +- src/datawizard/memalloc.c | 38 +- src/datawizard/memory_manager.c | 7 +- src/datawizard/memory_nodes.c | 4 +- src/datawizard/user_interactions.c | 4 +- src/debug/traces/starpu_fxt.h | 2 +- src/drivers/cpu/driver_cpu.c | 142 +- src/drivers/cuda/driver_cuda.c | 131 +- src/drivers/cuda/driver_cuda0.c | 10 +- src/drivers/cuda/driver_cuda1.c | 26 +- src/drivers/driver_common/driver_common.c | 21 +- src/drivers/hip/driver_hip.c | 108 +- src/drivers/max/driver_max_fpga.c | 20 +- src/drivers/max/driver_max_fpga.h | 1 - src/drivers/mp_common/source_common.c | 20 +- src/drivers/mpi/driver_mpi_source.c | 2 +- src/drivers/opencl/driver_opencl.c | 117 +- src/drivers/tcpip/driver_tcpip_source.c | 2 +- src/profiling/{ => callbacks}/callbacks.c | 2 +- src/profiling/{ => callbacks}/callbacks.h | 0 src/{common => profiling/fxt}/fxt.c | 50 +- src/profiling/fxt/fxt.h | 817 ++++++ src/profiling/profiling.c | 6 +- src/profiling/starpu_tracing.c | 2313 +++++++++++++++++ src/profiling/starpu_tracing.h | 341 +++ src/sched_policies/component_prio.c | 28 +- src/sched_policies/component_sched.c | 8 +- src/sched_policies/component_worker.c | 4 +- .../deque_modeling_policy_data_aware.c | 1 - .../eager_central_priority_policy.c | 1 - src/sched_policies/fifo_queues.c | 1 - src/sched_policies/heteroprio.c | 1 - src/sched_policies/parallel_eager.c | 4 +- src/sched_policies/parallel_heft.c | 2 +- src/sched_policies/work_stealing_policy.c | 2 +- src/util/execute_on_all.c | 15 - src/util/starpu_task_insert_utils.c | 8 +- tools/starpu_fxt_stats.c | 4 +- tools/starpu_fxt_tool.c | 3 +- tools/starpu_perfmodel_plot.c | 2 +- 76 files changed, 4036 insertions(+), 2401 deletions(-) delete mode 100644 src/common/fxt.h rename src/profiling/{ => callbacks}/callbacks.c (99%) rename src/profiling/{ => callbacks}/callbacks.h (100%) rename src/{common => profiling/fxt}/fxt.c (92%) create mode 100644 src/profiling/fxt/fxt.h create mode 100644 src/profiling/starpu_tracing.c create mode 100644 src/profiling/starpu_tracing.h diff --git a/ChangeLog b/ChangeLog index ecb048206b..90ded85a85 100644 --- a/ChangeLog +++ b/ChangeLog @@ -22,6 +22,7 @@ Changes: * Allow large sizes for vector, matrix, block, tensor and ndim data interfaces, and use proper MPI datatypes to exchange them. * Add soon_callback in tasks. + * Refactor the multiple tracing mechanisms Small changes: * Fix build system for StarPU Python interface diff --git a/README.dev b/README.dev index e0ecad3854..d83dbde17e 100644 --- a/README.dev +++ b/README.dev @@ -194,16 +194,20 @@ Writing a new driver is essentially: -Adding a new FXT state +Adding a new trace event ---------------------- This consists in: -- Adding a code number in src/common/fxt.h +- Adding a new function in src/profiling/starpu_tracing.h and in src/profiling/starpu_tracing.c -- Adding the callable runtime macro in src/common/fxt.h +- Calling this function in the wanted place in the runtime -- Calling these macros in the wanted place in the runtime +Implementing this event with FxT consists in + +- Adding a code number in src/profiling/fxt/fxt.h + +- Adding the callable runtime macro in src/profiling/fxt/fxt.h - Adding a paje state in states_list src/debug/traces/starpu_fxt.c and in src/debug/traces/starpu_paje.c diff --git a/configure.ac b/configure.ac index 1d9e2ee380..4e4f464ee0 100644 --- a/configure.ac +++ b/configure.ac @@ -6,6 +6,7 @@ # Copyright (C) 2017-2017 Guillaume Beauchamp # Copyright (C) 2013-2013 Thibaut Lambert # Copyright (C) 2011-2011 Télécom Sud Paris +# Copyright (C) 2023-2025 École de Technologie Supérieure (ETS, Montréal) # # StarPU is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by @@ -157,6 +158,91 @@ fi AC_MSG_CHECKING([for profiling tool support]) AC_MSG_RESULT($enable_prof_tool) +# Taskstubs +AC_ARG_ENABLE(taskstubs, [AS_HELP_STRING([--enable-taskstubs], + [enable support for TaskStubs])], + enable_taskstubs=$enableval, enable_taskstubs=no) +if test x$enable_taskstubs = xyes; then + AC_DEFINE(STARPU_PROF_TASKSTUBS, [1], [Define this to enable TaskStubs support]) +fi +AC_MSG_CHECKING([for TaskStubs support]) +AC_MSG_RESULT($enable_taskstubs) + +AC_ARG_WITH(taskstubs-dir, + [AS_HELP_STRING([--with-taskstubs-dir=], + [specify TaskStubs installation directory])], + [ + taskstubs_dir="$withval" + # in case this was not explicit yet + enable_taskstubs=yes + ], taskstubs_dir=no) + +AC_ARG_WITH(taskstubs-include-dir, + [AS_HELP_STRING([--with-taskstubs-include-dir=], + [specify TaskStubs includes directory])], + [ + taskstubs_include_dir="$withval" + # in case this was not explicit yet + enable_taskstubs=yes + ], taskstubs_include_dir=no) + +AC_ARG_WITH(taskstubs-lib-dir, + [AS_HELP_STRING([--with-taskstubs-lib-dir=], + [specify TaskStubs libs directory])], + [ + taskstubs_lib_dir="$withval" + # in case this was not explicit yet + enable_taskstubs=yes + ], taskstubs_lib_dir=no) + +if test x$enable_taskstubs = xyes ; then + AC_DEFINE(STARPU_PROF_TASKSTUBS, [1], [Define this to enable TaskStubs support]) + PKG_CHECK_MODULES([TASKSTUBS], [TASKSTUBS], [have_pkg_taskstubs=yes], [have_pkg_taskstubs=no]) + + if test x$have_pkg_taskstubs = xyes; then + have_taskstubs=yes + else + if test "$taskstubs_include_dir" = "no" ; then + if test "$taskstubs_dir" != "no" ; then + taskstubs_include_dir="$taskstubs_dir/include/" + fi + fi + + if test "$taskstubs_lib_dir" = "no" ; then + if test "$taskstubs_dir" != "no" ; then + taskstubs_lib_dir="$taskstubs_dir/lib" + fi + fi + + if test "$taskstubs_include_dir" != "no" ; then + AC_MSG_NOTICE(Using TaskStubs include dir $taskstubs_include_dir) + TASKSTUBS_CFLAGS="-I$taskstubs_include_dir -I$taskstubs_include_dir/timer_plugin $TASKSTUBS_CFLAGS" + else + AC_MSG_WARN([TaskStubs dir not found]) + fi + + if test "$taskstubs_lib_dir" != "no" ; then + AC_MSG_NOTICE(Using TaskStubs lib dir $taskstubs_lib_dir) + TASKSTUBS_LIBS="-L$taskstubs_lib_dir/ -ltimer_plugin" + else + AC_MSG_WARN([TaskStubs lib not found]) + fi + + if test "$taskstubs_lib_dir" != "no" ; then + if test "$taskstubs_lib_dir" != "no" ; then + have_taskstubs=yes + fi + fi + fi + + if test -n "$TASKSTUBS_CFLAGS" ; then + CFLAGS="$TASKSTUBS_CFLAGS $CFLAGS" + fi + if test -n "$TASKSTUBS_LIBS" ; then + LDFLAGS="$TASKSTUBS_LIBS $LDFLAGS" + fi +fi + ############################################################################### # # # Recursive tasks support # @@ -4788,6 +4874,8 @@ AC_MSG_NOTICE([ BLAS library: $blas_lib hwloc: $have_valid_hwloc FxT trace enabled: $enable_fxt + Profiling tool: $enable_prof_tool + TaskStubs enabled: $enable_taskstubs Documentation HTML: $enable_build_doc Documentation PDF: $enable_build_doc_pdf diff --git a/doc/doxygen_dev/Makefile.am b/doc/doxygen_dev/Makefile.am index 6c81f769af..10935cb5bf 100644 --- a/doc/doxygen_dev/Makefile.am +++ b/doc/doxygen_dev/Makefile.am @@ -73,11 +73,11 @@ dox_inputs = $(DOX_CONFIG) \ $(top_srcdir)/src/parallel_worker/starpu_parallel_worker_create.h \ $(top_srcdir)/src/profiling/profiling.h \ $(top_srcdir)/src/profiling/bound.h \ + $(top_srcdir)/src/profiling/fxt/fxt.h \ $(top_srcdir)/src/util/starpu_data_cpy.h \ $(top_srcdir)/src/util/openmp_runtime_support.h \ $(top_srcdir)/src/util/starpu_task_insert_utils.h \ $(top_srcdir)/src/common/graph.h \ - $(top_srcdir)/src/common/fxt.h \ $(top_srcdir)/src/common/starpu_spinlock.h \ $(top_srcdir)/src/common/rbtree_i.h \ $(top_srcdir)/src/common/rbtree.h \ diff --git a/doc/doxygen_dev/doxygen-config.cfg.in b/doc/doxygen_dev/doxygen-config.cfg.in index fd25efba88..240072e371 100644 --- a/doc/doxygen_dev/doxygen-config.cfg.in +++ b/doc/doxygen_dev/doxygen-config.cfg.in @@ -51,11 +51,11 @@ INPUT = @top_srcdir@/doc/doxygen_dev/chapters/000_introduction. @top_srcdir@/src/parallel_worker/starpu_parallel_worker_create.h \ @top_srcdir@/src/profiling/profiling.h \ @top_srcdir@/src/profiling/bound.h \ + @top_srcdir@/src/profiling/fxt/fxt.h \ @top_srcdir@/src/util/starpu_data_cpy.h \ @top_srcdir@/src/util/openmp_runtime_support.h \ @top_srcdir@/src/util/starpu_task_insert_utils.h \ @top_srcdir@/src/common/graph.h \ - @top_srcdir@/src/common/fxt.h \ @top_srcdir@/src/common/starpu_spinlock.h \ @top_srcdir@/src/common/rbtree_i.h \ @top_srcdir@/src/common/rbtree.h \ diff --git a/mpi/src/starpu_mpi_fxt.h b/mpi/src/starpu_mpi_fxt.h index d76f4187ea..9cd54c6a0f 100644 --- a/mpi/src/starpu_mpi_fxt.h +++ b/mpi/src/starpu_mpi_fxt.h @@ -20,7 +20,7 @@ #include #include -#include +#include /** @file */ diff --git a/mpi/src/starpu_mpi_task_insert.c b/mpi/src/starpu_mpi_task_insert.c index d93d6a294e..0ae0b707f2 100644 --- a/mpi/src/starpu_mpi_task_insert.c +++ b/mpi/src/starpu_mpi_task_insert.c @@ -331,7 +331,7 @@ int _starpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nod int prio = 0; int select_node_policy = STARPU_MPI_NODE_SELECTION_CURRENT_POLICY; - _STARPU_TRACE_TASK_MPI_DECODE_START(); + _starpu_trace_task_mpi_decode_start(); _STARPU_MPI_MALLOC(descrs, nb_allocated_data * sizeof(struct starpu_data_descr)); nb_data = 0; @@ -354,7 +354,7 @@ int _starpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nod { free(descrs); va_end(varg_list_copy); - _STARPU_TRACE_TASK_MPI_DECODE_END(); + _starpu_trace_task_mpi_decode_end(); return ret; } } @@ -412,7 +412,7 @@ int _starpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nod { free(descrs); va_end(varg_list_copy); - _STARPU_TRACE_TASK_MPI_DECODE_END(); + _starpu_trace_task_mpi_decode_end(); return ret; } } @@ -443,7 +443,7 @@ int _starpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nod { free(descrs); va_end(varg_list_copy); - _STARPU_TRACE_TASK_MPI_DECODE_END(); + _starpu_trace_task_mpi_decode_end(); return ret; } } @@ -718,7 +718,7 @@ int _starpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nod *nb_data_p = nb_data; *prio_p = prio; - _STARPU_TRACE_TASK_MPI_DECODE_END(); + _starpu_trace_task_mpi_decode_end(); return 0; } @@ -741,9 +741,10 @@ int _starpu_mpi_task_build_v(MPI_Comm comm, int me, struct starpu_codelet *codel if (ret < 0) return ret; - _STARPU_TRACE_TASK_MPI_PRE_START(); + _starpu_trace_task_mpi_pre_start(); if (exchange_needed) *exchange_needed = 0; + /* Send and receive data as requested */ for(i=0 ; i -#include -#include -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#include -#include - -#ifdef STARPU_USE_FXT -#include -#include -#endif - -#pragma GCC visibility push(hidden) - -/* some key to identify the worker kind */ -#define _STARPU_FUT_WORKER_KEY(kind) (kind + 0x100) -#define _STARPU_FUT_KEY_WORKER(key) (key - 0x100) - -#define _STARPU_FUT_WORKER_INIT_START 0x5100 -#define _STARPU_FUT_WORKER_INIT_END 0x5101 - -#define _STARPU_FUT_START_CODELET_BODY 0x5102 -#define _STARPU_FUT_END_CODELET_BODY 0x5103 - -#define _STARPU_FUT_JOB_PUSH 0x5104 -#define _STARPU_FUT_JOB_POP 0x5105 - -#define _STARPU_FUT_UPDATE_TASK_CNT 0x5106 - -#define _STARPU_FUT_START_FETCH_INPUT_ON_TID 0x5107 -#define _STARPU_FUT_END_FETCH_INPUT_ON_TID 0x5108 -#define _STARPU_FUT_START_PUSH_OUTPUT_ON_TID 0x5109 -#define _STARPU_FUT_END_PUSH_OUTPUT_ON_TID 0x5110 - -#define _STARPU_FUT_TAG 0x5111 -#define _STARPU_FUT_TAG_DEPS 0x5112 - -#define _STARPU_FUT_TASK_DEPS 0x5113 - -#define _STARPU_FUT_DATA_COPY 0x5114 -#define _STARPU_FUT_WORK_STEALING 0x5115 - -#define _STARPU_FUT_WORKER_DEINIT_START 0x5116 -#define _STARPU_FUT_WORKER_DEINIT_END 0x5117 - -#define _STARPU_FUT_WORKER_SLEEP_START 0x5118 -#define _STARPU_FUT_WORKER_SLEEP_END 0x5119 - -#define _STARPU_FUT_TASK_SUBMIT 0x511a -#define _STARPU_FUT_CODELET_DATA_HANDLE 0x511b - -#define _STARPU_FUT_MODEL_NAME 0x511c - -#define _STARPU_FUT_DATA_NAME 0x511d -#define _STARPU_FUT_DATA_COORDINATES 0x511e -#define _STARPU_FUT_HANDLE_DATA_UNREGISTER 0x511f - -#define _STARPU_FUT_CODELET_DATA_HANDLE_NUMA_ACCESS 0x5120 - -#define _STARPU_FUT_NEW_MEM_NODE 0x5122 - -#define _STARPU_FUT_START_CALLBACK 0x5123 -#define _STARPU_FUT_END_CALLBACK 0x5124 - -#define _STARPU_FUT_TASK_DONE 0x5125 -#define _STARPU_FUT_TAG_DONE 0x5126 - -#define _STARPU_FUT_START_ALLOC 0x5127 -#define _STARPU_FUT_END_ALLOC 0x5128 - -#define _STARPU_FUT_START_ALLOC_REUSE 0x5129 -#define _STARPU_FUT_END_ALLOC_REUSE 0x5130 - -#define _STARPU_FUT_USED_MEM 0x512a - -#define _STARPU_FUT_TASK_NAME 0x512b - -#define _STARPU_FUT_DATA_WONT_USE 0x512c - -#define _STARPU_FUT_TASK_COLOR 0x512d - -#define _STARPU_FUT_DATA_DOING_WONT_USE 0x512e - -#define _STARPU_FUT_TASK_LINE 0x512f - -#define _STARPU_FUT_START_MEMRECLAIM 0x5131 -#define _STARPU_FUT_END_MEMRECLAIM 0x5132 - -#define _STARPU_FUT_START_DRIVER_COPY 0x5133 -#define _STARPU_FUT_END_DRIVER_COPY 0x5134 - -#define _STARPU_FUT_START_DRIVER_COPY_ASYNC 0x5135 -#define _STARPU_FUT_END_DRIVER_COPY_ASYNC 0x5136 - -#define _STARPU_FUT_START_PROGRESS_ON_TID 0x5137 -#define _STARPU_FUT_END_PROGRESS_ON_TID 0x5138 - -#define _STARPU_FUT_USER_EVENT 0x5139 - -#define _STARPU_FUT_SET_PROFILING 0x513a - -#define _STARPU_FUT_TASK_WAIT_FOR_ALL 0x513b - -#define _STARPU_FUT_EVENT 0x513c -#define _STARPU_FUT_THREAD_EVENT 0x513d - -#define _STARPU_FUT_CODELET_DETAILS 0x513e -#define _STARPU_FUT_CODELET_DATA 0x513f - -#define _STARPU_FUT_LOCKING_MUTEX 0x5140 -#define _STARPU_FUT_MUTEX_LOCKED 0x5141 - -#define _STARPU_FUT_UNLOCKING_MUTEX 0x5142 -#define _STARPU_FUT_MUTEX_UNLOCKED 0x5143 - -#define _STARPU_FUT_TRYLOCK_MUTEX 0x5144 - -#define _STARPU_FUT_RDLOCKING_RWLOCK 0x5145 -#define _STARPU_FUT_RWLOCK_RDLOCKED 0x5146 - -#define _STARPU_FUT_WRLOCKING_RWLOCK 0x5147 -#define _STARPU_FUT_RWLOCK_WRLOCKED 0x5148 - -#define _STARPU_FUT_UNLOCKING_RWLOCK 0x5149 -#define _STARPU_FUT_RWLOCK_UNLOCKED 0x514a - -#define _STARPU_FUT_LOCKING_SPINLOCK 0x514b -#define _STARPU_FUT_SPINLOCK_LOCKED 0x514c - -#define _STARPU_FUT_UNLOCKING_SPINLOCK 0x514d -#define _STARPU_FUT_SPINLOCK_UNLOCKED 0x514e - -#define _STARPU_FUT_TRYLOCK_SPINLOCK 0x514f - -#define _STARPU_FUT_COND_WAIT_BEGIN 0x5150 -#define _STARPU_FUT_COND_WAIT_END 0x5151 - -#define _STARPU_FUT_MEMORY_FULL 0x5152 - -#define _STARPU_FUT_DATA_LOAD 0x5153 - -#define _STARPU_FUT_START_UNPARTITION_ON_TID 0x5154 -#define _STARPU_FUT_END_UNPARTITION_ON_TID 0x5155 - -#define _STARPU_FUT_START_FREE 0x5156 -#define _STARPU_FUT_END_FREE 0x5157 - -#define _STARPU_FUT_START_WRITEBACK 0x5158 -#define _STARPU_FUT_END_WRITEBACK 0x5159 - -#define _STARPU_FUT_SCHED_COMPONENT_PUSH_PRIO 0x515a -#define _STARPU_FUT_SCHED_COMPONENT_POP_PRIO 0x515b - -#define _STARPU_FUT_START_WRITEBACK_ASYNC 0x515c -#define _STARPU_FUT_END_WRITEBACK_ASYNC 0x515d - -#define _STARPU_FUT_HYPERVISOR_BEGIN 0x5160 -#define _STARPU_FUT_HYPERVISOR_END 0x5161 - -#define _STARPU_FUT_BARRIER_WAIT_BEGIN 0x5162 -#define _STARPU_FUT_BARRIER_WAIT_END 0x5163 - -#define _STARPU_FUT_WORKER_SCHEDULING_START 0x5164 -#define _STARPU_FUT_WORKER_SCHEDULING_END 0x5165 -#define _STARPU_FUT_WORKER_SCHEDULING_PUSH 0x5166 -#define _STARPU_FUT_WORKER_SCHEDULING_POP 0x5167 - -#define _STARPU_FUT_START_EXECUTING 0x5168 -#define _STARPU_FUT_END_EXECUTING 0x5169 - -#define _STARPU_FUT_SCHED_COMPONENT_NEW 0x516a -#define _STARPU_FUT_SCHED_COMPONENT_CONNECT 0x516b -#define _STARPU_FUT_SCHED_COMPONENT_PUSH 0x516c -#define _STARPU_FUT_SCHED_COMPONENT_PULL 0x516d - -#define _STARPU_FUT_TASK_SUBMIT_START 0x516e -#define _STARPU_FUT_TASK_SUBMIT_END 0x516f - -#define _STARPU_FUT_TASK_BUILD_START 0x5170 -#define _STARPU_FUT_TASK_BUILD_END 0x5171 - -#define _STARPU_FUT_TASK_MPI_DECODE_START 0x5172 -#define _STARPU_FUT_TASK_MPI_DECODE_END 0x5173 - -#define _STARPU_FUT_TASK_MPI_PRE_START 0x5174 -#define _STARPU_FUT_TASK_MPI_PRE_END 0x5175 - -#define _STARPU_FUT_TASK_MPI_POST_START 0x5176 -#define _STARPU_FUT_TASK_MPI_POST_END 0x5177 - -#define _STARPU_FUT_TASK_WAIT_START 0x5178 -#define _STARPU_FUT_TASK_WAIT_END 0x5179 - -#define _STARPU_FUT_TASK_WAIT_FOR_ALL_START 0x517a -#define _STARPU_FUT_TASK_WAIT_FOR_ALL_END 0x517b - -#define _STARPU_FUT_HANDLE_DATA_REGISTER 0x517c - -#define _STARPU_FUT_START_FETCH_INPUT 0x517e -#define _STARPU_FUT_END_FETCH_INPUT 0x517f - -#define _STARPU_FUT_TASK_THROTTLE_START 0x5180 -#define _STARPU_FUT_TASK_THROTTLE_END 0x5181 - -#define _STARPU_FUT_DATA_STATE_INVALID 0x5182 -#define _STARPU_FUT_DATA_STATE_OWNER 0x5183 -#define _STARPU_FUT_DATA_STATE_SHARED 0x5184 - -#define _STARPU_FUT_DATA_REQUEST_CREATED 0x5185 -#define _STARPU_FUT_PAPI_TASK_EVENT_VALUE 0x5186 -#define _STARPU_FUT_TASK_EXCLUDE_FROM_DAG 0x5187 - -#define _STARPU_FUT_TASK_END_DEP 0x5188 - -#ifdef STARPU_RECURSIVE_TASKS -#define _STARPU_FUT_RECURSIVE_TASK 0x5189 -#endif - -#define _STARPU_FUT_START_PARALLEL_SYNC 0x518a -#define _STARPU_FUT_END_PARALLEL_SYNC 0x518b - -/* Predefined FUT key masks */ -#define _STARPU_FUT_KEYMASK_META FUT_KEYMASK0 -#define _STARPU_FUT_KEYMASK_USER FUT_KEYMASK1 -#define _STARPU_FUT_KEYMASK_TASK FUT_KEYMASK2 -#define _STARPU_FUT_KEYMASK_TASK_VERBOSE FUT_KEYMASK3 -#define _STARPU_FUT_KEYMASK_DATA FUT_KEYMASK4 -#define _STARPU_FUT_KEYMASK_DATA_VERBOSE FUT_KEYMASK5 -#define _STARPU_FUT_KEYMASK_WORKER FUT_KEYMASK6 -#define _STARPU_FUT_KEYMASK_WORKER_VERBOSE FUT_KEYMASK7 -#define _STARPU_FUT_KEYMASK_DSM FUT_KEYMASK8 -#define _STARPU_FUT_KEYMASK_DSM_VERBOSE FUT_KEYMASK9 -#define _STARPU_FUT_KEYMASK_SCHED FUT_KEYMASK10 -#define _STARPU_FUT_KEYMASK_SCHED_VERBOSE FUT_KEYMASK11 -#define _STARPU_FUT_KEYMASK_LOCK FUT_KEYMASK12 -#define _STARPU_FUT_KEYMASK_LOCK_VERBOSE FUT_KEYMASK13 -#define _STARPU_FUT_KEYMASK_EVENT FUT_KEYMASK14 -#define _STARPU_FUT_KEYMASK_EVENT_VERBOSE FUT_KEYMASK15 -#define _STARPU_FUT_KEYMASK_MPI FUT_KEYMASK16 -#define _STARPU_FUT_KEYMASK_MPI_VERBOSE FUT_KEYMASK17 -#define _STARPU_FUT_KEYMASK_HYP FUT_KEYMASK18 -#define _STARPU_FUT_KEYMASK_HYP_VERBOSE FUT_KEYMASK19 -#define _STARPU_FUT_KEYMASK_TASK_VERBOSE_EXTRA FUT_KEYMASK20 -#define _STARPU_FUT_KEYMASK_MPI_VERBOSE_EXTRA FUT_KEYMASK21 -/* When doing modifications to keymasks: - * - also adapt _starpu_profile_get_user_keymask() in src/common/fxt.c - * - adapt KEYMASKALL_DEFAULT in src/common/fxt.c - * - adapt the documentation in 501_environment_variable.doxy and/or - * 380_offline_performance_tools.doxy */ - -extern unsigned long _starpu_job_cnt; - -static inline unsigned long _starpu_fxt_get_job_id(void) -{ - unsigned long ret = STARPU_ATOMIC_ADDL(&_starpu_job_cnt, 1); - STARPU_ASSERT_MSG(ret != 0, "Oops, job_id wrapped! There are too many tasks for tracking them for profiling"); - return ret; -} - -#ifdef STARPU_USE_FXT - -/* Some versions of FxT do not include the declaration of the function */ -#ifdef HAVE_ENABLE_FUT_FLUSH -#if !HAVE_DECL_ENABLE_FUT_FLUSH -void enable_fut_flush(); -#endif -#endif -#ifdef HAVE_FUT_SET_FILENAME -#if !HAVE_DECL_FUT_SET_FILENAME -void fut_set_filename(char *filename); -#endif -#endif - -extern int _starpu_fxt_started STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; -extern int _starpu_fxt_willstart STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; -extern starpu_pthread_mutex_t _starpu_fxt_started_mutex STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; -extern starpu_pthread_cond_t _starpu_fxt_started_cond STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; - -/** Wait until FXT is started (or not). Returns if FXT was started */ -static inline int _starpu_fxt_wait_initialisation() -{ - STARPU_PTHREAD_MUTEX_LOCK(&_starpu_fxt_started_mutex); - while (_starpu_fxt_willstart && !_starpu_fxt_started) - STARPU_PTHREAD_COND_WAIT(&_starpu_fxt_started_cond, &_starpu_fxt_started_mutex); - STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_fxt_started_mutex); - - return _starpu_fxt_started; -} - -extern unsigned long _starpu_submit_order; - -static inline unsigned long _starpu_fxt_get_submit_order(void) -{ - unsigned long ret = STARPU_ATOMIC_ADDL(&_starpu_submit_order, 1); - STARPU_ASSERT_MSG(_starpu_submit_order != 0, "Oops, submit_order wrapped! There are too many tasks for tracking them for profiling"); - return ret; -} - -long _starpu_gettid(void) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; - -int _starpu_generate_paje_trace_read_option(const char *option, struct starpu_fxt_options *options) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; - -/** Initialize the FxT library. */ -void _starpu_fxt_init_profiling(uint64_t trace_buffer_size); - -/** Stop the FxT library, and generate the trace file. */ -void _starpu_stop_fxt_profiling(void); - -/** In case we use MPI, tell the profiling system how many processes are used. */ -void _starpu_profiling_set_mpi_worldsize(int worldsize) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; - -/** Generate the trace file. Used when catching signals SIGINT and SIGSEGV */ -void _starpu_fxt_dump_file(void); - -#ifdef FUT_NEEDS_COMMIT -#define _STARPU_FUT_COMMIT(size) fut_commitstampedbuffer(size) -#else -#define _STARPU_FUT_COMMIT(size) do { } while (0) -#endif - -#ifdef FUT_RAW_ALWAYS_PROBE1STR -#define _STARPU_FUT_ALWAYS_PROBE1STR(CODE, P1, str) FUT_RAW_ALWAYS_PROBE1STR(CODE, P1, str) -#else -#define _STARPU_FUT_ALWAYS_PROBE1STR(CODE, P1, str) \ -do { \ - if(STARPU_UNLIKELY(fut_active)) { \ - /* No more than FXT_MAX_PARAMS args are allowed */ \ - /* we add a \0 just in case ... */ \ - size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 1)*sizeof(unsigned long));\ - unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ - unsigned nbargs = 1 + nbargs_str; \ - size_t total_len = FUT_SIZE(nbargs); \ - unsigned long *futargs = \ - fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ - *(futargs++) = (unsigned long)(P1); \ - snprintf((char *)futargs, len, "%s", str); \ - ((char *)futargs)[len - 1] = '\0'; \ - _STARPU_FUT_COMMIT(total_len); \ - }} while (0) -#endif - -#ifdef FUT_FULL_PROBE1STR -#define _STARPU_FUT_FULL_PROBE1STR(KEYMASK, CODE, P1, str) FUT_FULL_PROBE1STR(CODE, P1, str) -#else -/** Sometimes we need something a little more specific than the wrappers from - * FxT: these macro permit to put add an event with 3 (or 4) numbers followed - * by a string. */ -#define _STARPU_FUT_FULL_PROBE1STR(KEYMASK, CODE, P1, str) \ -do { \ - if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ - _STARPU_FUT_ALWAYS_PROBE1STR(CODE, P1, str); \ - } \ -} while (0) -#endif - -#ifdef FUT_ALWAYS_PROBE2STR -#define _STARPU_FUT_ALWAYS_PROBE2STR(CODE, P1, P2, str) FUT_RAW_ALWAYS_PROBE2STR(CODE, P1, P2, str) -#else -#define _STARPU_FUT_ALWAYS_PROBE2STR(CODE, P1, P2, str) \ -do { \ - /* No more than FXT_MAX_PARAMS args are allowed */ \ - /* we add a \0 just in case ... */ \ - size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 2)*sizeof(unsigned long));\ - unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ - unsigned nbargs = 2 + nbargs_str; \ - size_t total_len = FUT_SIZE(nbargs); \ - unsigned long *futargs = \ - fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ - *(futargs++) = (unsigned long)(P1); \ - *(futargs++) = (unsigned long)(P2); \ - snprintf((char *)futargs, len, "%s", str); \ - ((char *)futargs)[len - 1] = '\0'; \ - _STARPU_FUT_COMMIT(total_len); \ -} while (0) -#endif - -#ifdef FUT_FULL_PROBE2STR -#define _STARPU_FUT_FULL_PROBE2STR(KEYMASK, CODE, P1, P2, str) FUT_FULL_PROBE2STR(CODE, P1, P2, str) -#else -#define _STARPU_FUT_FULL_PROBE2STR(KEYMASK, CODE, P1, P2, str) \ -do { \ - if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ - _STARPU_FUT_ALWAYS_PROBE2STR(CODE, P1, P2, str); \ - } \ -} while (0) -#endif - -#ifdef FUT_ALWAYS_PROBE3STR -#define _STARPU_FUT_ALWAYS_PROBE3STR(CODE, P1, P2, P3, str) FUT_RAW_ALWAYS_PROBE3STR(CODE, P1, P2, P3, str) -#else -#define _STARPU_FUT_ALWAYS_PROBE3STR(CODE, P1, P2, P3, str) \ -do { \ - /* No more than FXT_MAX_PARAMS args are allowed */ \ - /* we add a \0 just in case ... */ \ - size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 3)*sizeof(unsigned long));\ - unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ - unsigned nbargs = 3 + nbargs_str; \ - size_t total_len = FUT_SIZE(nbargs); \ - unsigned long *futargs = \ - fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ - *(futargs++) = (unsigned long)(P1); \ - *(futargs++) = (unsigned long)(P2); \ - *(futargs++) = (unsigned long)(P3); \ - snprintf((char *)futargs, len, "%s", str); \ - ((char *)futargs)[len - 1] = '\0'; \ - _STARPU_FUT_COMMIT(total_len); \ -} while (0) -#endif - -#ifdef FUT_FULL_PROBE3STR -#define _STARPU_FUT_FULL_PROBE3STR(KEYMASK, CODE, P1, P2, P3, str) FUT_FULL_PROBE3STR(CODE, P1, P2, P3, str) -#else -#define _STARPU_FUT_FULL_PROBE3STR(KEYMASK, CODE, P1, P2, P3, str) \ -do { \ - if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ - _STARPU_FUT_ALWAYS_PROBE3STR(CODE, P1, P2, P3, str); \ - } \ -} while (0) -#endif - -#ifdef FUT_ALWAYS_PROBE4STR -#define _STARPU_FUT_ALWAYS_PROBE4STR(CODE, P1, P2, P3, P4, str) FUT_RAW_ALWAYS_PROBE4STR(CODE, P1, P2, P3, P4, str) -#else -#define _STARPU_FUT_ALWAYS_PROBE4STR(CODE, P1, P2, P3, P4, str) \ -do { \ - /* No more than FXT_MAX_PARAMS args are allowed */ \ - /* we add a \0 just in case ... */ \ - size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 4)*sizeof(unsigned long));\ - unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ - unsigned nbargs = 4 + nbargs_str; \ - size_t total_len = FUT_SIZE(nbargs); \ - unsigned long *futargs = \ - fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ - *(futargs++) = (unsigned long)(P1); \ - *(futargs++) = (unsigned long)(P2); \ - *(futargs++) = (unsigned long)(P3); \ - *(futargs++) = (unsigned long)(P4); \ - snprintf((char *)futargs, len, "%s", str); \ - ((char *)futargs)[len - 1] = '\0'; \ - _STARPU_FUT_COMMIT(total_len); \ -} while (0) -#endif - -#ifdef FUT_FULL_PROBE4STR -#define _STARPU_FUT_FULL_PROBE4STR(KEYMASK, CODE, P1, P2, P3, P4, str) FUT_FULL_PROBE4STR(CODE, P1, P2, P3, P4, str) -#else -#define _STARPU_FUT_FULL_PROBE4STR(KEYMASK, CODE, P1, P2, P3, P4, str) \ -do { \ - if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ - _STARPU_FUT_ALWAYS_PROBE4STR(CODE, P1, P2, P3, P4, str); \ - } \ -} while (0) -#endif - -#ifdef FUT_ALWAYS_PROBE5STR -#define _STARPU_FUT_ALWAYS_PROBE5STR(CODE, P1, P2, P3, P4, P5, str) FUT_RAW_ALWAYS_PROBE5STR(CODE, P1, P2, P3, P4, P5, str) -#else -#define _STARPU_FUT_ALWAYS_PROBE5STR(CODE, P1, P2, P3, P4, P5, str) \ -do { \ - /* No more than FXT_MAX_PARAMS args are allowed */ \ - /* we add a \0 just in case ... */ \ - size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 5)*sizeof(unsigned long));\ - unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ - unsigned nbargs = 5 + nbargs_str; \ - size_t total_len = FUT_SIZE(nbargs); \ - unsigned long *futargs = \ - fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ - *(futargs++) = (unsigned long)(P1); \ - *(futargs++) = (unsigned long)(P2); \ - *(futargs++) = (unsigned long)(P3); \ - *(futargs++) = (unsigned long)(P4); \ - *(futargs++) = (unsigned long)(P5); \ - snprintf((char *)futargs, len, "%s", str); \ - ((char *)futargs)[len - 1] = '\0'; \ - _STARPU_FUT_COMMIT(total_len); \ -} while (0) -#endif - -#ifdef FUT_FULL_PROBE5STR -#define _STARPU_FUT_FULL_PROBE5STR(KEYMASK, CODE, P1, P2, P3, P4, P5, str) FUT_FULL_PROBE5STR(CODE, P1, P2, P3, P4, P5, str) -#else -#define _STARPU_FUT_FULL_PROBE5STR(KEYMASK, CODE, P1, P2, P3, P4, P5, str) \ -do { \ - if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ - _STARPU_FUT_ALWAYS_PROBE5STR(CODE, P1, P2, P3, P4, P5, str); \ - } \ -} while (0) -#endif - -#ifdef FUT_ALWAYS_PROBE6STR -#define _STARPU_FUT_ALWAYS_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str) FUT_RAW_ALWAYS_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str) -#else -#define _STARPU_FUT_ALWAYS_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str) \ -do { \ - /* No more than FXT_MAX_PARAMS args are allowed */ \ - /* we add a \0 just in case ... */ \ - size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 6)*sizeof(unsigned long));\ - unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ - unsigned nbargs = 6 + nbargs_str; \ - size_t total_len = FUT_SIZE(nbargs); \ - unsigned long *futargs = \ - fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ - *(futargs++) = (unsigned long)(P1); \ - *(futargs++) = (unsigned long)(P2); \ - *(futargs++) = (unsigned long)(P3); \ - *(futargs++) = (unsigned long)(P4); \ - *(futargs++) = (unsigned long)(P5); \ - *(futargs++) = (unsigned long)(P6); \ - snprintf((char *)futargs, len, "%s", str); \ - ((char *)futargs)[len - 1] = '\0'; \ - _STARPU_FUT_COMMIT(total_len); \ -} while (0) -#endif - -#ifdef FUT_FULL_PROBE6STR -#define _STARPU_FUT_FULL_PROBE6STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, str) FUT_FULL_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str) -#else -#define _STARPU_FUT_FULL_PROBE6STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, str) \ -do { \ - if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ - _STARPU_FUT_ALWAYS_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str); \ - } \ -} while (0) -#endif - -#ifdef FUT_ALWAYS_PROBE7STR -#define _STARPU_FUT_ALWAYS_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str) FUT_RAW_ALWAYS_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str) -#else -#define _STARPU_FUT_ALWAYS_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str) \ -do { \ - /* No more than FXT_MAX_PARAMS args are allowed */ \ - /* we add a \0 just in case ... */ \ - size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 7)*sizeof(unsigned long));\ - unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ - unsigned nbargs = 7 + nbargs_str; \ - size_t total_len = FUT_SIZE(nbargs); \ - unsigned long *futargs = \ - fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ - *(futargs++) = (unsigned long)(P1); \ - *(futargs++) = (unsigned long)(P2); \ - *(futargs++) = (unsigned long)(P3); \ - *(futargs++) = (unsigned long)(P4); \ - *(futargs++) = (unsigned long)(P5); \ - *(futargs++) = (unsigned long)(P6); \ - *(futargs++) = (unsigned long)(P7); \ - snprintf((char *)futargs, len, "%s", str); \ - ((char *)futargs)[len - 1] = '\0'; \ - _STARPU_FUT_COMMIT(total_len); \ -} while (0) -#endif - -#ifdef FUT_FULL_PROBE7STR -#define _STARPU_FUT_FULL_PROBE7STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, P7, str) FUT_FULL_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str) -#else -#define _STARPU_FUT_FULL_PROBE7STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, P7, str) \ -do { \ - if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ - _STARPU_FUT_ALWAYS_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str); \ - } \ -} while (0) -#endif - -#ifndef FUT_RAW_PROBE7 -#define FUT_RAW_PROBE7(CODE,P1,P2,P3,P4,P5,P6,P7) do { \ - if(STARPU_UNLIKELY(fut_active)) { \ - unsigned long *__args __attribute__((unused))= \ - fut_getstampedbuffer(CODE, \ - FUT_SIZE(7)); \ - *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4);*(__args++)=(unsigned long)(P5);*(__args++)=(unsigned long)(P6);*(__args++)=(unsigned long)(P7); \ - _STARPU_FUT_COMMIT(FUT_SIZE(7)); \ - } \ - } while (0) -#endif - -#ifndef FUT_RAW_ALWAYS_PROBE1 -#define FUT_RAW_ALWAYS_PROBE1(CODE,P1) do { \ - unsigned long *__args __attribute__((unused))= \ - fut_getstampedbuffer(CODE, \ - FUT_SIZE(1)); \ - *(__args++)=(unsigned long)(P1); \ - fut_commitstampedbuffer(FUT_SIZE(1)); \ - } while (0) -#endif -#define FUT_DO_ALWAYS_PROBE1(CODE,P1) do { \ - FUT_RAW_ALWAYS_PROBE1(FUT_CODE(CODE, 1),P1); \ -} while (0) - -#ifndef FUT_RAW_ALWAYS_PROBE2 -#define FUT_RAW_ALWAYS_PROBE2(CODE,P1,P2) do { \ - unsigned long *__args __attribute__((unused))= \ - fut_getstampedbuffer(CODE, \ - FUT_SIZE(2)); \ - *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2); \ - fut_commitstampedbuffer(FUT_SIZE(2)); \ - } while (0) -#endif -#define FUT_DO_ALWAYS_PROBE2(CODE,P1,P2) do { \ - FUT_RAW_ALWAYS_PROBE2(FUT_CODE(CODE, 2),P1,P2); \ -} while (0) - -#ifndef FUT_RAW_ALWAYS_PROBE3 -#define FUT_RAW_ALWAYS_PROBE3(CODE,P1,P2,P3) do { \ - unsigned long *__args __attribute__((unused))= \ - fut_getstampedbuffer(CODE, \ - FUT_SIZE(3)); \ - *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3); \ - fut_commitstampedbuffer(FUT_SIZE(3)); \ - } while (0) -#endif -#define FUT_DO_ALWAYS_PROBE3(CODE,P1,P2,P3) do { \ - FUT_RAW_ALWAYS_PROBE3(FUT_CODE(CODE, 3),P1,P2,P3); \ -} while (0) - -#ifndef FUT_RAW_ALWAYS_PROBE4 -#define FUT_RAW_ALWAYS_PROBE4(CODE,P1,P2,P3,P4) do { \ - unsigned long *__args __attribute__((unused))= \ - fut_getstampedbuffer(CODE, \ - FUT_SIZE(4)); \ - *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4); \ - fut_commitstampedbuffer(FUT_SIZE(4)); \ - } while (0) -#endif -#define FUT_DO_ALWAYS_PROBE4(CODE,P1,P2,P3,P4) do { \ - FUT_RAW_ALWAYS_PROBE4(FUT_CODE(CODE, 4),P1,P2,P3,P4); \ -} while (0) - -#ifndef FUT_RAW_ALWAYS_PROBE5 -#define FUT_RAW_ALWAYS_PROBE5(CODE,P1,P2,P3,P4,P5) do { \ - unsigned long *__args __attribute__((unused))= \ - fut_getstampedbuffer(CODE, \ - FUT_SIZE(5)); \ - *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4);*(__args++)=(unsigned long)(P5); \ - fut_commitstampedbuffer(FUT_SIZE(5)); \ - } while (0) -#endif -#define FUT_DO_ALWAYS_PROBE5(CODE,P1,P2,P3,P4,P5) do { \ - FUT_RAW_ALWAYS_PROBE5(FUT_CODE(CODE, 5),P1,P2,P3,P4,P5); \ -} while (0) - -#ifndef FUT_RAW_ALWAYS_PROBE6 -#define FUT_RAW_ALWAYS_PROBE6(CODE,P1,P2,P3,P4,P5,P6) do { \ - unsigned long *__args __attribute__((unused))= \ - fut_getstampedbuffer(CODE, \ - FUT_SIZE(6)); \ - *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4);*(__args++)=(unsigned long)(P5);*(__args++)=(unsigned long)(P6); \ - fut_commitstampedbuffer(FUT_SIZE(6)); \ - } while (0) -#endif -#define FUT_DO_ALWAYS_PROBE6(CODE,P1,P2,P3,P4,P5,P6) do { \ - FUT_RAW_ALWAYS_PROBE6(FUT_CODE(CODE, 6),P1,P2,P3,P4,P5,P6); \ -} while (0) - -#ifndef FUT_RAW_ALWAYS_PROBE7 -#define FUT_RAW_ALWAYS_PROBE7(CODE,P1,P2,P3,P4,P5,P6,P7) do { \ - unsigned long *__args __attribute__((unused))= \ - fut_getstampedbuffer(CODE, \ - FUT_SIZE(7)); \ - *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4);*(__args++)=(unsigned long)(P5);*(__args++)=(unsigned long)(P6);*(__args++)=(unsigned long)(P7); \ - fut_commitstampedbuffer(FUT_SIZE(7)); \ - } while (0) -#endif -#define FUT_DO_ALWAYS_PROBE7(CODE,P1,P2,P3,P4,P5,P6,P7) do { \ - FUT_RAW_ALWAYS_PROBE7(FUT_CODE(CODE, 7),P1,P2,P3,P4,P5,P6,P7); \ -} while (0) - -#ifndef FUT_RAW_ALWAYS_PROBE8 -#define FUT_RAW_ALWAYS_PROBE8(CODE,P1,P2,P3,P4,P5,P6,P7,P8) do { \ - unsigned long *__args __attribute__((unused))= \ - fut_getstampedbuffer(CODE, \ - FUT_SIZE(8)); \ - *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4);*(__args++)=(unsigned long)(P5);*(__args++)=(unsigned long)(P6);*(__args++)=(unsigned long)(P7);*(__args++)=(unsigned long)(P8); \ - fut_commitstampedbuffer(FUT_SIZE(8)); \ - } while (0) -#endif -#define FUT_DO_ALWAYS_PROBE8(CODE,P1,P2,P3,P4,P5,P6,P7,P8) do { \ - FUT_RAW_ALWAYS_PROBE8(FUT_CODE(CODE, 8),P1,P2,P3,P4,P5,P6,P7,P8); \ -} while (0) - -#ifndef FUT_RAW_ALWAYS_PROBE9 -#define FUT_RAW_ALWAYS_PROBE9(CODE,P1,P2,P3,P4,P5,P6,P7,P8,P9) do { \ - unsigned long *__args __attribute__((unused))= \ - fut_getstampedbuffer(CODE, \ - FUT_SIZE(9)); \ - *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4);*(__args++)=(unsigned long)(P5);*(__args++)=(unsigned long)(P6);*(__args++)=(unsigned long)(P7);*(__args++)=(unsigned long)(P8);*(__args++)=(unsigned long)(P9); \ - fut_commitstampedbuffer(FUT_SIZE(9)); \ - } while (0) -#endif -#define FUT_DO_ALWAYS_PROBE9(CODE,P1,P2,P3,P4,P5,P6,P7,P8,P9) do { \ - FUT_RAW_ALWAYS_PROBE9(FUT_CODE(CODE, 9),P1,P2,P3,P4,P5,P6,P7,P8,P9); \ -} while (0) - -/* full probes */ -#ifndef FUT_FULL_PROBE0 -#define FUT_FULL_PROBE0(KEYMASK,CODE) do { \ - if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ - FUT_RAW_ALWAYS_PROBE0(FUT_CODE(CODE, 0)); \ - } \ -} while(0) -#endif - -#ifndef FUT_FULL_PROBE1 -#define FUT_FULL_PROBE1(KEYMASK,CODE,P1) do { \ - if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ - FUT_RAW_ALWAYS_PROBE1(FUT_CODE(CODE, 1),P1); \ - } \ -} while(0) -#endif - -#ifndef FUT_FULL_PROBE2 -#define FUT_FULL_PROBE2(KEYMASK,CODE,P1,P2) do { \ - if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ - FUT_RAW_ALWAYS_PROBE2(FUT_CODE(CODE, 2),P1,P2); \ - } \ -} while(0) -#endif - -#ifndef FUT_FULL_PROBE3 -#define FUT_FULL_PROBE3(KEYMASK,CODE,P1,P2,P3) do { \ - if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ - FUT_RAW_ALWAYS_PROBE3(FUT_CODE(CODE, 3),P1,P2,P3); \ - } \ -} while(0) -#endif - -#ifndef FUT_FULL_PROBE4 -#define FUT_FULL_PROBE4(KEYMASK,CODE,P1,P2,P3,P4) do { \ - if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ - FUT_RAW_ALWAYS_PROBE4(FUT_CODE(CODE, 4),P1,P2,P3,P4); \ - } \ -} while(0) -#endif - -#ifndef FUT_FULL_PROBE5 -#define FUT_FULL_PROBE5(KEYMASK,CODE,P1,P2,P3,P4,P5) do { \ - if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ - FUT_RAW_ALWAYS_PROBE5(FUT_CODE(CODE, 5),P1,P2,P3,P4,P5); \ - } \ -} while(0) -#endif - -#ifndef FUT_FULL_PROBE6 -#define FUT_FULL_PROBE6(KEYMASK,CODE,P1,P2,P3,P4,P5,P6) do { \ - if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ - FUT_RAW_ALWAYS_PROBE6(FUT_CODE(CODE, 6),P1,P2,P3,P4,P5,P6); \ - } \ -} while(0) -#endif - -#ifndef FUT_FULL_PROBE7 -#define FUT_FULL_PROBE7(KEYMASK,CODE,P1,P2,P3,P4,P5,P6,P7) do { \ - if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ - FUT_RAW_ALWAYS_PROBE7(FUT_CODE(CODE, 7),P1,P2,P3,P4,P5,P6,P7); \ - } \ -} while(0) -#endif - -#ifndef FUT_FULL_PROBE8 -#define FUT_FULL_PROBE8(KEYMASK,CODE,P1,P2,P3,P4,P5,P6,P7,P8) do { \ - if(KEYMASK & fut_active) { \ - FUT_RAW_ALWAYS_PROBE8(FUT_CODE(CODE, 8),P1,P2,P3,P4,P5,P6,P7,P8); \ - } \ -} while(0) -#endif - -#ifndef FUT_FULL_PROBE9 -#define FUT_FULL_PROBE9(KEYMASK,CODE,P1,P2,P3,P4,P5,P6,P7,P8,P9) do { \ - if(KEYMASK & fut_active) { \ - FUT_RAW_ALWAYS_PROBE9(FUT_CODE(CODE, 9),P1,P2,P3,P4,P5,P6,P7,P8,P9); \ - } \ -} while(0) -#endif - -#define _STARPU_TRACE_NEW_MEM_NODE(nodeid) do {\ - if (_starpu_fxt_started) \ - FUT_DO_ALWAYS_PROBE2(_STARPU_FUT_NEW_MEM_NODE, nodeid, _starpu_gettid()); \ -} while (0) - -#define _STARPU_TRACE_REGISTER_THREAD(cpuid) do {\ - if (_starpu_fxt_started) \ - FUT_DO_ALWAYS_PROBE2(FUT_NEW_LWP_CODE, cpuid, _starpu_gettid()); \ -} while (0) - -#define _STARPU_TRACE_WORKER_INIT_START(workerkind, workerid, devid, memnode, bindid, sync) do {\ - if (_starpu_fxt_started) \ - FUT_DO_ALWAYS_PROBE7(_STARPU_FUT_WORKER_INIT_START, _STARPU_FUT_WORKER_KEY(workerkind), workerid, devid, memnode, bindid, sync, _starpu_gettid()); \ -} while (0) - -#define _STARPU_TRACE_WORKER_INIT_END(__workerid) do {\ - if (_starpu_fxt_started) \ - FUT_DO_ALWAYS_PROBE2(_STARPU_FUT_WORKER_INIT_END, _starpu_gettid(), (__workerid)); \ -} while (0) - -#define _STARPU_TRACE_START_CODELET_BODY(job, nimpl, perf_arch, workerid, rank) \ -do { \ - if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK|_STARPU_FUT_KEYMASK_TASK_VERBOSE|_STARPU_FUT_KEYMASK_DATA|_STARPU_FUT_KEYMASK_TASK_VERBOSE_EXTRA) & fut_active)) { \ - int mem_node = workerid == -1 ? -1 : (int)starpu_worker_get_memory_node(workerid); \ - int codelet_null = (job)->task->cl == NULL; \ - int nowhere = ((job)->task->where == STARPU_NOWHERE) || ((job)->task->cl != NULL && (job)->task->cl->where == STARPU_NOWHERE); \ - enum starpu_node_kind kind = workerid == -1 ? STARPU_UNUSED : starpu_worker_get_memory_node_kind(starpu_worker_get_type(workerid)); \ - FUT_FULL_PROBE6(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_START_CODELET_BODY, (job)->job_id, ((job)->task)->sched_ctx, workerid, mem_node, _starpu_gettid(), (codelet_null == 1 || nowhere == 1)); \ - if (rank == 0 && (job)->task->cl && !nowhere) \ - { \ - const int __nbuffers = STARPU_TASK_GET_NBUFFERS((job)->task); \ - char __buf[FXT_MAX_PARAMS*sizeof(long)]; \ - int __i; \ - for (__i = 0; __i < __nbuffers; __i++) \ - { \ - starpu_data_handle_t __handle = STARPU_TASK_GET_HANDLE((job)->task, __i); \ - void *__interface = _STARPU_TASK_GET_INTERFACES((job)->task)[__i]; \ - if (__interface && __handle->ops->describe) \ - { \ - __handle->ops->describe(__interface, __buf, sizeof(__buf)); \ - _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_DATA, _STARPU_FUT_CODELET_DATA, workerid, _starpu_gettid(), __buf); \ - } \ - FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_CODELET_DATA_HANDLE, (job)->job_id, (__handle), _starpu_data_get_size(__handle), STARPU_TASK_GET_MODE((job)->task, __i)); \ - /* Regarding the memory location: - * - if the data interface doesn't provide to_pointer operation, NULL will be returned - * and the location will be -1, which is fine; - * - we have to check whether the memory is on an actual NUMA node (and not on GPU - * memory, for instance); - * - looking at memory location before executing the task isn't the best choice: - * the page can be not allocated yet. A solution would be to get the memory - * location at the end of the task, but there is no FxT probe where we iterate over - * handles, after task execution. - * */ \ - FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_TASK_VERBOSE_EXTRA, _STARPU_FUT_CODELET_DATA_HANDLE_NUMA_ACCESS, (job)->job_id, (__i), kind == STARPU_CPU_RAM && starpu_task_get_current_data_node(__i) >= 0 ? starpu_get_memory_location_bitmap(starpu_data_handle_to_pointer(__handle, (unsigned) starpu_task_get_current_data_node(__i)), starpu_data_get_size(__handle)) : -1); \ - } \ - } \ - if (!(codelet_null == 1 || nowhere == 1)) \ - { \ - const size_t __job_size = (perf_arch == NULL) ? 0 : _starpu_job_get_data_size((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job)); \ - const uint32_t __job_hash = (perf_arch == NULL) ? 0 : _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job)); \ - FUT_FULL_PROBE8(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_CODELET_DETAILS, ((job)->task)->sched_ctx, __job_size, __job_hash, (job)->task->flops / 1000 / ((job)->task->cl && job->task->cl->type != STARPU_SEQ ? j->task_size : 1), (job)->task->tag_id, workerid, ((job)->job_id), _starpu_gettid()); \ - } \ - } \ -} while(0) - -#define _STARPU_TRACE_END_CODELET_BODY(job, nimpl, perf_arch, workerid, rank) \ -do { \ - if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active)) { \ - const size_t job_size = (perf_arch == NULL) ? 0 : _starpu_job_get_data_size((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job)); \ - const uint32_t job_hash = (perf_arch == NULL) ? 0 : _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job)); \ - char _archname[32]=""; \ - if (perf_arch) starpu_perfmodel_get_arch_name(perf_arch, _archname, 32, 0); \ - int nowhere = ((job)->task->where == STARPU_NOWHERE) || ((job)->task->cl != NULL && (job)->task->cl->where == STARPU_NOWHERE); \ - int codelet_null = (job)->task->cl == NULL; \ - _STARPU_FUT_FULL_PROBE6STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_END_CODELET_BODY, (job)->job_id, (job_size), (job_hash), workerid, _starpu_gettid(), (codelet_null == 1 || nowhere == 1), _archname); \ - } \ -} while(0) - -#define _STARPU_TRACE_START_EXECUTING(job) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_START_EXECUTING, _starpu_gettid(), (job)->job_id); - -#define _STARPU_TRACE_END_EXECUTING(job) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_END_EXECUTING, _starpu_gettid(), (job)->job_id); - -#define _STARPU_TRACE_START_PARALLEL_SYNC(job) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_START_PARALLEL_SYNC, _starpu_gettid(), (job)->job_id); - -#define _STARPU_TRACE_END_PARALLEL_SYNC(job) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_END_PARALLEL_SYNC, _starpu_gettid(), (job)->job_id); - -#define _STARPU_TRACE_START_CALLBACK(job) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_START_CALLBACK, job, _starpu_gettid()); - -#define _STARPU_TRACE_END_CALLBACK(job) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_END_CALLBACK, job, _starpu_gettid()); - -#define _STARPU_TRACE_JOB_PUSH(task, prio) \ - FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_JOB_PUSH, _starpu_get_job_associated_to_task(task)->job_id, prio, _starpu_gettid()); - -#define _STARPU_TRACE_JOB_POP(task, prio) \ - FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_JOB_POP, _starpu_get_job_associated_to_task(task)->job_id, prio, _starpu_gettid()); - -#define _STARPU_TRACE_UPDATE_TASK_CNT(counter) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_UPDATE_TASK_CNT, counter, _starpu_gettid()) - -#define _STARPU_TRACE_START_FETCH_INPUT(job) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_FETCH_INPUT_ON_TID, job, _starpu_gettid()); - -#define _STARPU_TRACE_END_FETCH_INPUT(job) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_FETCH_INPUT_ON_TID, job, _starpu_gettid()); - -#define _STARPU_TRACE_START_PUSH_OUTPUT(job) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_PUSH_OUTPUT_ON_TID, job, _starpu_gettid()); - -#define _STARPU_TRACE_END_PUSH_OUTPUT(job) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_PUSH_OUTPUT_ON_TID, job, _starpu_gettid()); - -#define _STARPU_TRACE_WORKER_END_FETCH_INPUT(job, id) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_END_FETCH_INPUT, job, id); - -#define _STARPU_TRACE_WORKER_START_FETCH_INPUT(job, id) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_START_FETCH_INPUT, job, id); - -#define _STARPU_TRACE_TAG(tag, job) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TAG, tag, (job)->job_id) - -#define _STARPU_TRACE_TAG_DEPS(tag_child, tag_parent) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TAG_DEPS, tag_child, tag_parent) - -#define _STARPU_TRACE_TASK_DEPS(job_prev, job_succ) \ - _STARPU_FUT_FULL_PROBE4STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_DEPS, (job_prev)->job_id, (job_succ)->job_id, (job_succ)->task->type, 1, "task") - -#define _STARPU_TRACE_TASK_END_DEP(job_prev, job_succ) \ - FUT_DO_PROBE2(_STARPU_FUT_TASK_END_DEP, (job_prev)->job_id, (job_succ)->job_id) - -#define _STARPU_TRACE_GHOST_TASK_DEPS(ghost_prev_id, job_succ) \ - _STARPU_FUT_FULL_PROBE4STR(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_DEPS, (ghost_prev_id), (job_succ)->job_id, (job_succ)->task->type, 1, "ghost") - -#ifdef STARPU_RECURSIVE_TASKS -#define _STARPU_TRACE_RECURSIVE_TASK_DEPS(prev_id, job_succ) \ - _STARPU_FUT_FULL_PROBE4STR(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_DEPS, (prev_id), (job_succ)->job_id, (job_succ)->task->type, 1, "recursive_task") -#endif - -#define _STARPU_TRACE_TASK_EXCLUDE_FROM_DAG(job) \ - do { \ - unsigned exclude_from_dag = (job)->exclude_from_dag; \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_EXCLUDE_FROM_DAG, (job)->job_id, (long unsigned)exclude_from_dag); \ -} while(0) - -#define _STARPU_TRACE_TASK_NAME_LINE_COLOR(job) \ - do { \ - _STARPU_TRACE_TASK_COLOR(job); \ - _STARPU_TRACE_TASK_NAME(job); \ - _STARPU_TRACE_TASK_LINE(job); \ - } while(0) - -#define _STARPU_TRACE_TASK_LINE(job) \ - do { \ - if ((job)->task->file) \ - _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_LINE, (job)->job_id, (job)->task->line, (job)->task->file); \ -} while(0) - -#ifdef STARPU_RECURSIVE_TASKS -#define _STARPU_TRACE_RECURSIVE_TASK(job) \ -do { \ - if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active)) { \ - unsigned int is_recursive_task=(job)->is_recursive_task; \ - unsigned long recursive_task_parent=(job)->task->recursive_task_parent; \ - FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_RECURSIVE_TASK, (job)->job_id, is_recursive_task, recursive_task_parent); \ - } \ -} while(0) -#endif - -#define _STARPU_TRACE_TASK_NAME(job) \ -do { \ - if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active)) { \ - const char *model_name = _starpu_job_get_model_name((job)); \ - const char *name = _starpu_job_get_task_name((job)); \ - if (name) \ - { \ - _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_NAME, (job)->job_id, _starpu_gettid(), name); \ - } \ - else { \ - _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_NAME, (job)->job_id, _starpu_gettid(), "unknown");\ - } \ - if (model_name) \ - _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_MODEL_NAME, (job)->job_id, _starpu_gettid(), model_name); \ - } \ -} while(0) - -#define _STARPU_TRACE_TASK_COLOR(job) \ -do { \ - if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active)) { \ - if ((job)->task->color != 0) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_COLOR, (job)->job_id, (job)->task->color); \ - else if ((job)->task->cl && (job)->task->cl->color != 0) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_COLOR, (job)->job_id, (job)->task->cl->color); \ - } \ -} while(0) - -#define _STARPU_TRACE_TASK_DONE(job) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_DONE, (job)->job_id, _starpu_gettid()) - -#define _STARPU_TRACE_TAG_DONE(tag) \ -do { \ - if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active)) { \ - struct _starpu_job *job = (tag)->job; \ - const char *model_name = _starpu_job_get_task_name((job)); \ - if (model_name) \ - { \ - _STARPU_FUT_FULL_PROBE3STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TAG_DONE, (tag)->id, _starpu_gettid(), 1, model_name); \ - } \ - else { \ - FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TAG_DONE, (tag)->id, _starpu_gettid(), 0);\ - } \ - } \ -} while(0) - -#define _STARPU_TRACE_DATA_NAME(handle, name) \ - _STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_DATA_NAME, handle, name) - -#define _STARPU_TRACE_DATA_COORDINATES(handle, dim, v) do {\ - switch (dim) { \ - case 1: FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_DATA_COORDINATES, handle, dim, v[0]); break; \ - case 2: FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_DATA_COORDINATES, handle, dim, v[0], v[1]); break; \ - case 3: FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_DATA_COORDINATES, handle, dim, v[0], v[1], v[2]); break; \ - case 4: FUT_FULL_PROBE6(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_DATA_COORDINATES, handle, dim, v[0], v[1], v[2], v[3]); break; \ - default: FUT_FULL_PROBE7(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_DATA_COORDINATES, handle, dim, v[0], v[1], v[2], v[3], v[4]); break; \ - } \ -} while (0) - -#define _STARPU_TRACE_DATA_COPY(src_node, dst_node, size) \ - FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_DATA_COPY, src_node, dst_node, size) - -#define _STARPU_TRACE_DATA_WONT_USE(handle) \ - FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_DATA, _STARPU_FUT_DATA_WONT_USE, handle, _starpu_fxt_get_submit_order(), _starpu_fxt_get_job_id(), _starpu_gettid()) - -#define _STARPU_TRACE_DATA_DOING_WONT_USE(handle) \ - FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_DATA_DOING_WONT_USE, handle) - -#define _STARPU_TRACE_START_DRIVER_COPY(src_node, dst_node, size, com_id, prefetch, handle) \ - FUT_FULL_PROBE6(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_DRIVER_COPY, src_node, dst_node, size, com_id, prefetch, handle) - -#define _STARPU_TRACE_END_DRIVER_COPY(src_node, dst_node, size, com_id, prefetch) \ - FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_DRIVER_COPY, src_node, dst_node, size, com_id, prefetch) - -#define _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_DRIVER_COPY_ASYNC, src_node, dst_node) - -#define _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_DRIVER_COPY_ASYNC, src_node, dst_node) - -#define _STARPU_TRACE_WORK_STEALING(empty_q, victim_q) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_SCHED_VERBOSE, _STARPU_FUT_WORK_STEALING, empty_q, victim_q) - -#define _STARPU_TRACE_WORKER_DEINIT_START do {\ - if (_starpu_fxt_started) \ - FUT_DO_ALWAYS_PROBE1(_STARPU_FUT_WORKER_DEINIT_START, _starpu_gettid()); \ -} while(0) - -#define _STARPU_TRACE_WORKER_DEINIT_END(workerkind) do {\ - if (_starpu_fxt_started) \ - FUT_DO_ALWAYS_PROBE2(_STARPU_FUT_WORKER_DEINIT_END, _STARPU_FUT_WORKER_KEY(workerkind), _starpu_gettid()); \ -} while(0) - -#define _STARPU_TRACE_WORKER_SCHEDULING_START \ - FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_WORKER_SCHEDULING_START, _starpu_gettid()); - -#define _STARPU_TRACE_WORKER_SCHEDULING_END \ - FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_WORKER_SCHEDULING_END, _starpu_gettid()); - -#define _STARPU_TRACE_WORKER_SCHEDULING_PUSH \ - FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_WORKER_SCHEDULING_PUSH, _starpu_gettid()); - -#define _STARPU_TRACE_WORKER_SCHEDULING_POP \ - FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_WORKER_SCHEDULING_POP, _starpu_gettid()); - -#define _STARPU_TRACE_WORKER_SLEEP_START \ - FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER, _STARPU_FUT_WORKER_SLEEP_START, _starpu_gettid()); - -#define _STARPU_TRACE_WORKER_SLEEP_END \ - FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER, _STARPU_FUT_WORKER_SLEEP_END, _starpu_gettid()); - -#define _STARPU_TRACE_TASK_SUBMIT(job, iter, subiter) \ - FUT_FULL_PROBE7(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_SUBMIT, (job)->job_id, iter, subiter, (job)->task->no_submitorder?0:_starpu_fxt_get_submit_order(), (job)->task->priority, (job)->task->type, _starpu_gettid()); - -#define _STARPU_TRACE_TASK_SUBMIT_START() \ - FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_SUBMIT_START, _starpu_gettid()); - -#define _STARPU_TRACE_TASK_SUBMIT_END() \ - FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_SUBMIT_END, _starpu_gettid()); - -#define _STARPU_TRACE_TASK_THROTTLE_START() \ - FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_THROTTLE_START, _starpu_gettid()); - -#define _STARPU_TRACE_TASK_THROTTLE_END() \ - FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_THROTTLE_END, _starpu_gettid()); - -#define _STARPU_TRACE_TASK_BUILD_START() \ - FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_BUILD_START, _starpu_gettid()); - -#define _STARPU_TRACE_TASK_BUILD_END() \ - FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_BUILD_END, _starpu_gettid()); - -#define _STARPU_TRACE_TASK_MPI_DECODE_START() \ - FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_DECODE_START, _starpu_gettid()); - -#define _STARPU_TRACE_TASK_MPI_DECODE_END() \ - FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_DECODE_END, _starpu_gettid()); - -#define _STARPU_TRACE_TASK_MPI_PRE_START() \ - FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_PRE_START, _starpu_gettid()); - -#define _STARPU_TRACE_TASK_MPI_PRE_END() \ - FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_PRE_END, _starpu_gettid()); - -#define _STARPU_TRACE_TASK_MPI_POST_START() \ - FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_POST_START, _starpu_gettid()); - -#define _STARPU_TRACE_TASK_MPI_POST_END() \ - FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_POST_END, _starpu_gettid()); - -#define _STARPU_TRACE_TASK_WAIT_START(job) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_WAIT_START, (job)->job_id, _starpu_gettid()); - -#define _STARPU_TRACE_TASK_WAIT_END() \ - FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_WAIT_END, _starpu_gettid()); - -#define _STARPU_TRACE_TASK_WAIT_FOR_ALL_START() \ - FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_WAIT_FOR_ALL_START, _starpu_gettid()); - -#define _STARPU_TRACE_TASK_WAIT_FOR_ALL_END() \ - FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_WAIT_FOR_ALL_END, _starpu_gettid()); - -#define _STARPU_TRACE_START_ALLOC(memnode, size, handle, is_prefetch) \ - FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_ALLOC, memnode, _starpu_gettid(), size, handle, is_prefetch); - -#define _STARPU_TRACE_END_ALLOC(memnode, handle, r) \ - FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_ALLOC, memnode, _starpu_gettid(), handle, r); - -#define _STARPU_TRACE_START_ALLOC_REUSE(memnode, size, handle, is_prefetch) \ - FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_START_ALLOC_REUSE, memnode, _starpu_gettid(), size, handle, is_prefetch); - -#define _STARPU_TRACE_END_ALLOC_REUSE(memnode, handle, r) \ - FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_END_ALLOC_REUSE, memnode, _starpu_gettid(), handle, r); - -#define _STARPU_TRACE_START_FREE(memnode, size, handle) \ - FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_START_FREE, memnode, _starpu_gettid(), size, handle); - -#define _STARPU_TRACE_END_FREE(memnode, handle) \ - FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_END_FREE, memnode, _starpu_gettid(), handle); - -#define _STARPU_TRACE_START_WRITEBACK(memnode, handle) \ - FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_START_WRITEBACK, memnode, _starpu_gettid(), handle); - -#define _STARPU_TRACE_END_WRITEBACK(memnode, handle) \ - FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_END_WRITEBACK, memnode, _starpu_gettid(), handle); - -#define _STARPU_TRACE_USED_MEM(memnode,used) \ - FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_USED_MEM, memnode, used, _starpu_gettid()); - -#define _STARPU_TRACE_START_MEMRECLAIM(memnode,is_prefetch) \ - FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_START_MEMRECLAIM, memnode, is_prefetch, _starpu_gettid()); - -#define _STARPU_TRACE_END_MEMRECLAIM(memnode, is_prefetch) \ - FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_END_MEMRECLAIM, memnode, is_prefetch, _starpu_gettid()); - -#define _STARPU_TRACE_START_WRITEBACK_ASYNC(memnode) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_START_WRITEBACK_ASYNC, memnode, _starpu_gettid()); - -#define _STARPU_TRACE_END_WRITEBACK_ASYNC(memnode) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_END_WRITEBACK_ASYNC, memnode, _starpu_gettid()); - -#define _STARPU_TRACE_PAPI_TASK_EVENT(event_id, task, value) \ - FUT_DO_PROBE3(_STARPU_FUT_PAPI_TASK_EVENT_VALUE, event_id, _starpu_get_job_associated_to_task(task)->job_id, value) - -/* We skip these events because they are called so often that they cause FxT to - * fail and make the overall trace unreadable anyway. */ -#define _STARPU_TRACE_START_PROGRESS(memnode) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_START_PROGRESS_ON_TID, memnode, _starpu_gettid()); - -#define _STARPU_TRACE_END_PROGRESS(memnode) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_END_PROGRESS_ON_TID, memnode, _starpu_gettid()); - -#define _STARPU_TRACE_USER_EVENT(code) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_USER, _STARPU_FUT_USER_EVENT, code, _starpu_gettid()); - -#define _STARPU_TRACE_META(S) \ - FUT_FULL_PROBESTR(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_EVENT,S) - -#define _STARPU_TRACE_SET_PROFILING(status) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_SET_PROFILING, status, _starpu_gettid()); - -#define _STARPU_TRACE_TASK_WAIT_FOR_ALL \ - FUT_FULL_PROBE0(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_WAIT_FOR_ALL) - -#define _STARPU_TRACE_EVENT_ALWAYS(S) do {\ - if (_starpu_fxt_started) \ - FUT_DO_ALWAYS_PROBESTR(_STARPU_FUT_EVENT,S) \ -} while(0) - -#define _STARPU_TRACE_EVENT(S) \ - FUT_FULL_PROBESTR(_STARPU_FUT_KEYMASK_EVENT, _STARPU_FUT_EVENT,S) - -#define _STARPU_TRACE_EVENT_VERBOSE(S) \ - FUT_FULL_PROBESTR(_STARPU_FUT_KEYMASK_EVENT_VERBOSE, _STARPU_FUT_EVENT,S) - - -#define _STARPU_TRACE_THREAD_EVENT(S) \ - _STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_WORKER, _STARPU_FUT_THREAD_EVENT, _starpu_gettid(), S) - -#define _STARPU_TRACE_HYPERVISOR_BEGIN() \ - FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_HYP, _STARPU_FUT_HYPERVISOR_BEGIN, _starpu_gettid()); - -#define _STARPU_TRACE_HYPERVISOR_END() \ - FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_HYP, _STARPU_FUT_HYPERVISOR_END, _starpu_gettid()); - -#ifdef STARPU_FXT_LOCK_TRACES - -#define _STARPU_TRACE_LOCKING_MUTEX() do { \ - const char *file; \ - file = strrchr(__FILE__,'/') + 1; \ - _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_LOCKING_MUTEX,__LINE__,_starpu_gettid(),file); \ -} while (0) - -#define _STARPU_TRACE_MUTEX_LOCKED() do { \ - const char *file; \ - file = strrchr(__FILE__,'/') + 1; \ - _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_MUTEX_LOCKED,__LINE__,_starpu_gettid(),file); \ -} while(0) - -#define _STARPU_TRACE_UNLOCKING_MUTEX() do { \ - const char *file; \ - file = strrchr(__FILE__,'/') + 1; \ - _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_UNLOCKING_MUTEX,__LINE__,_starpu_gettid(),file); \ -} while(0) - -#define _STARPU_TRACE_MUTEX_UNLOCKED() do {\ - const char *file; \ - file = strrchr(__FILE__,'/') + 1; \ - _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_MUTEX_UNLOCKED,__LINE__,_starpu_gettid(),file); \ -} while(0) - -#define _STARPU_TRACE_TRYLOCK_MUTEX() do { \ - const char *file; \ - file = strrchr(__FILE__,'/') + 1; \ - _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_TRYLOCK_MUTEX,__LINE__,_starpu_gettid(),file); \ -} while(0) - -#define _STARPU_TRACE_RDLOCKING_RWLOCK() do { \ - const char *file; \ - file = strrchr(__FILE__,'/') + 1; \ - _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_RDLOCKING_RWLOCK,__LINE__,_starpu_gettid(),file); \ -} while(0) - -#define _STARPU_TRACE_RWLOCK_RDLOCKED() do { \ - const char *file; \ - file = strrchr(__FILE__,'/') + 1; \ - _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_RWLOCK_RDLOCKED,__LINE__,_starpu_gettid(),file); \ -} while(0) - -#define _STARPU_TRACE_WRLOCKING_RWLOCK() do { \ - const char *file; \ - file = strrchr(__FILE__,'/') + 1; \ - _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_WRLOCKING_RWLOCK,__LINE__,_starpu_gettid(),file); \ -} while(0) - -#define _STARPU_TRACE_RWLOCK_WRLOCKED() do { \ - const char *file; \ - file = strrchr(__FILE__,'/') + 1; \ - _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_RWLOCK_WRLOCKED,__LINE__,_starpu_gettid(),file); \ -} while(0) - -#define _STARPU_TRACE_UNLOCKING_RWLOCK() do { \ - const char *file; \ - file = strrchr(__FILE__,'/') + 1; \ - _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_UNLOCKING_RWLOCK,__LINE__,_starpu_gettid(),file); \ -} while(0) - -#define _STARPU_TRACE_RWLOCK_UNLOCKED() do { \ - const char *file; \ - file = strrchr(__FILE__,'/') + 1; \ - _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_RWLOCK_UNLOCKED,__LINE__,_starpu_gettid(),file); \ -} while(0) - -#define STARPU_TRACE_SPINLOCK_CONDITITION (starpu_worker_get_type(starpu_worker_get_id()) == STARPU_CUDA_WORKER) - -#define _STARPU_TRACE_LOCKING_SPINLOCK(file, line) do {\ - if (STARPU_TRACE_SPINLOCK_CONDITITION) { \ - const char *xfile; \ - xfile = strrchr(file,'/') + 1; \ - _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_LOCKING_SPINLOCK,line,_starpu_gettid(),xfile); \ - } \ -} while(0) - -#define _STARPU_TRACE_SPINLOCK_LOCKED(file, line) do { \ - if (STARPU_TRACE_SPINLOCK_CONDITITION) { \ - const char *xfile; \ - xfile = strrchr(file,'/') + 1; \ - _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_SPINLOCK_LOCKED,line,_starpu_gettid(),xfile); \ - } \ -} while(0) - -#define _STARPU_TRACE_UNLOCKING_SPINLOCK(file, line) do { \ - if (STARPU_TRACE_SPINLOCK_CONDITITION) { \ - const char *xfile; \ - xfile = strrchr(file,'/') + 1; \ - _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_UNLOCKING_SPINLOCK,line,_starpu_gettid(),xfile); \ - } \ -} while(0) - -#define _STARPU_TRACE_SPINLOCK_UNLOCKED(file, line) do { \ - if (STARPU_TRACE_SPINLOCK_CONDITITION) { \ - const char *xfile; \ - xfile = strrchr(file,'/') + 1; \ - _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_SPINLOCK_UNLOCKED,line,_starpu_gettid(),xfile); \ - } \ -} while(0) - -#define _STARPU_TRACE_TRYLOCK_SPINLOCK(file, line) do { \ - if (STARPU_TRACE_SPINLOCK_CONDITITION) { \ - const char *xfile; \ - xfile = strrchr(file,'/') + 1; \ - _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_TRYLOCK_SPINLOCK,line,_starpu_gettid(),xfile); \ - } \ -} while(0) - -#define _STARPU_TRACE_COND_WAIT_BEGIN() do { \ - const char *file; \ - file = strrchr(__FILE__,'/') + 1; \ - _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_COND_WAIT_BEGIN,__LINE__,_starpu_gettid(),file); \ -} while(0) - -#define _STARPU_TRACE_COND_WAIT_END() do { \ - const char *file; \ - file = strrchr(__FILE__,'/') + 1; \ - _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_COND_WAIT_END,__LINE__,_starpu_gettid(),file); \ -} while(0) - -#define _STARPU_TRACE_BARRIER_WAIT_BEGIN() do { \ - const char *file; \ - file = strrchr(__FILE__,'/') + 1; \ - _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_BARRIER_WAIT_BEGIN,__LINE__,_starpu_gettid(),file); \ -} while(0) - -#define _STARPU_TRACE_BARRIER_WAIT_END() do { \ - const char *file; \ - file = strrchr(__FILE__,'/') + 1; \ - _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_BARRIER_WAIT_END,__LINE__,_starpu_gettid(),file); \ -} while(0) - -#else // !STARPU_FXT_LOCK_TRACES - -#define _STARPU_TRACE_LOCKING_MUTEX() do {} while(0) -#define _STARPU_TRACE_MUTEX_LOCKED() do {} while(0) -#define _STARPU_TRACE_UNLOCKING_MUTEX() do {} while(0) -#define _STARPU_TRACE_MUTEX_UNLOCKED() do {} while(0) -#define _STARPU_TRACE_TRYLOCK_MUTEX() do {} while(0) -#define _STARPU_TRACE_RDLOCKING_RWLOCK() do {} while(0) -#define _STARPU_TRACE_RWLOCK_RDLOCKED() do {} while(0) -#define _STARPU_TRACE_WRLOCKING_RWLOCK() do {} while(0) -#define _STARPU_TRACE_RWLOCK_WRLOCKED() do {} while(0) -#define _STARPU_TRACE_UNLOCKING_RWLOCK() do {} while(0) -#define _STARPU_TRACE_RWLOCK_UNLOCKED() do {} while(0) -#define _STARPU_TRACE_LOCKING_SPINLOCK(file, line) do {(void) file; (void)line;} while(0) -#define _STARPU_TRACE_SPINLOCK_LOCKED(file, line) do {(void) file; (void)line;} while(0) -#define _STARPU_TRACE_UNLOCKING_SPINLOCK(file, line) do {(void) file; (void)line;} while(0) -#define _STARPU_TRACE_SPINLOCK_UNLOCKED(file, line) do {(void) file; (void)line;} while(0) -#define _STARPU_TRACE_TRYLOCK_SPINLOCK(file, line) do {(void) file; (void)line;} while(0) -#define _STARPU_TRACE_COND_WAIT_BEGIN() do {} while(0) -#define _STARPU_TRACE_COND_WAIT_END() do {} while(0) -#define _STARPU_TRACE_BARRIER_WAIT_BEGIN() do {} while(0) -#define _STARPU_TRACE_BARRIER_WAIT_END() do {} while(0) - -#endif // STARPU_FXT_LOCK_TRACES - -#define _STARPU_TRACE_MEMORY_FULL(size) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_MEMORY_FULL,size,_starpu_gettid()); - -#define _STARPU_TRACE_DATA_LOAD(workerid,size) \ - FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_DATA_LOAD, workerid, size, _starpu_gettid()); - -#define _STARPU_TRACE_START_UNPARTITION(handle, memnode) \ - FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_UNPARTITION_ON_TID, memnode, _starpu_gettid(), handle); - -#define _STARPU_TRACE_END_UNPARTITION(handle, memnode) \ - FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_UNPARTITION_ON_TID, memnode, _starpu_gettid(), handle); - -#define _STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(workerid, ntasks, exp_len) \ - FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_PUSH_PRIO, _starpu_gettid(), workerid, ntasks, exp_len); - -#define _STARPU_TRACE_SCHED_COMPONENT_POP_PRIO(workerid, ntasks, exp_len) \ - FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_POP_PRIO, _starpu_gettid(), workerid, ntasks, exp_len); - -#define _STARPU_TRACE_SCHED_COMPONENT_NEW(component) \ - if (STARPU_UNLIKELY(fut_active)) _STARPU_FUT_ALWAYS_PROBE1STR(_STARPU_FUT_SCHED_COMPONENT_NEW, component, (component)->name); - -#define _STARPU_TRACE_SCHED_COMPONENT_CONNECT(parent, child) \ - if (STARPU_UNLIKELY(fut_active)) FUT_RAW_ALWAYS_PROBE2(FUT_CODE(_STARPU_FUT_SCHED_COMPONENT_CONNECT,2), parent, child); - -#define _STARPU_TRACE_SCHED_COMPONENT_PUSH(from, to, task, prio) \ - FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_PUSH, _starpu_gettid(), from, to, task, prio); - -#define _STARPU_TRACE_SCHED_COMPONENT_PULL(from, to, task) \ - FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_PULL, _starpu_gettid(), from, to, task, (task)->priority); - -#define _STARPU_TRACE_HANDLE_DATA_REGISTER(handle) do { \ - if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_META) & fut_active)) { \ - const size_t __data_size = handle->ops->get_size(handle); \ - const starpu_ssize_t __max_data_size = _starpu_data_get_max_size(handle); \ - char __buf[(FXT_MAX_PARAMS-4)*sizeof(long)]; \ - void *__interface = handle->per_node[0].data_interface; \ - if (handle->ops->describe) \ - handle->ops->describe(__interface, __buf, sizeof(__buf)); \ - else \ - __buf[0] = 0; \ - _STARPU_FUT_FULL_PROBE4STR(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_HANDLE_DATA_REGISTER, handle, __data_size, __max_data_size, handle->home_node, __buf); \ - } \ -} while (0) - -#define _STARPU_TRACE_HANDLE_DATA_UNREGISTER(handle) \ - FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_DATA, _STARPU_FUT_HANDLE_DATA_UNREGISTER, handle) - -//Coherency Data Traces -#define _STARPU_TRACE_DATA_STATE_INVALID(handle, node) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_DATA_STATE_INVALID, handle, node) - -#define _STARPU_TRACE_DATA_STATE_OWNER(handle, node) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_DATA_STATE_OWNER, handle, node) - -#define _STARPU_TRACE_DATA_STATE_SHARED(handle, node) \ - FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_DATA_STATE_SHARED, handle, node) - -#define _STARPU_TRACE_DATA_REQUEST_CREATED(handle, orig, dest, prio, is_pre, req) \ - FUT_FULL_PROBE6(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_DATA_REQUEST_CREATED, orig, dest, prio, handle, is_pre, req) - - -#else // !STARPU_USE_FXT - -/* Dummy macros in case FxT is disabled */ -#define _STARPU_TRACE_NEW_MEM_NODE(nodeid) do {(void)(nodeid);} while(0) -#define _STARPU_TRACE_REGISTER_THREAD(cpuid) do {(void)(cpuid);} while(0) -#define _STARPU_TRACE_WORKER_INIT_START(a,b,c,d,e,f) do {(void)(a); (void)(b); (void)(c); (void)(d); (void)(e); (void)(f);} while(0) -#define _STARPU_TRACE_WORKER_INIT_END(workerid) do {(void)(workerid);} while(0) -#define _STARPU_TRACE_START_CODELET_BODY(job, nimpl, perf_arch, workerid, rank) do {(void)(job); (void)(nimpl); (void)(perf_arch); (void)(workerid); (void)(rank);} while(0) -#define _STARPU_TRACE_END_CODELET_BODY(job, nimpl, perf_arch, workerid, rank) do {(void)(job); (void)(nimpl); (void)(perf_arch); (void)(workerid); (void)(rank);} while(0) -#define _STARPU_TRACE_START_EXECUTING(job) do {(void)(job);} while(0) -#define _STARPU_TRACE_END_EXECUTING(job) do {(void)(job);} while(0) -#define _STARPU_TRACE_START_PARALLEL_SYNC(job) do {(void)(job);} while(0) -#define _STARPU_TRACE_END_PARALLEL_SYNC(job) do {(void)(job);} while(0) -#define _STARPU_TRACE_START_CALLBACK(job) do {(void)(job);} while(0) -#define _STARPU_TRACE_END_CALLBACK(job) do {(void)(job);} while(0) -#define _STARPU_TRACE_JOB_PUSH(task, prio) do {(void)(task); (void)(prio);} while(0) -#define _STARPU_TRACE_JOB_POP(task, prio) do {(void)(task); (void)(prio);} while(0) -#define _STARPU_TRACE_UPDATE_TASK_CNT(counter) do {(void)(counter);} while(0) -#define _STARPU_TRACE_START_FETCH_INPUT(job) do {(void)(job);} while(0) -#define _STARPU_TRACE_END_FETCH_INPUT(job) do {(void)(job);} while(0) -#define _STARPU_TRACE_START_PUSH_OUTPUT(job) do {(void)(job);} while(0) -#define _STARPU_TRACE_END_PUSH_OUTPUT(job) do {(void)(job);} while(0) -#define _STARPU_TRACE_TAG(tag, job) do {(void)(tag); (void)(job);} while(0) -#define _STARPU_TRACE_TAG_DEPS(a, b) do {(void)(a); (void)(b);} while(0) -#define _STARPU_TRACE_TASK_DEPS(a, b) do {(void)(a); (void)(b);} while(0) -#define _STARPU_TRACE_TASK_END_DEP(a, b) do {(void)(a); (void)(b);} while(0) -#define _STARPU_TRACE_GHOST_TASK_DEPS(a, b) do {(void)(a); (void)(b);} while(0) -#define _STARPU_TRACE_TASK_EXCLUDE_FROM_DAG(a) do {(void)(a);} while(0) -#define _STARPU_TRACE_TASK_NAME_LINE_COLOR(a) do {(void)(a);} while(0) -#define _STARPU_TRACE_TASK_NAME(a) do {(void)(a);} while(0) -#define _STARPU_TRACE_TASK_LINE(a) do {(void)(a);} while(0) -#define _STARPU_TRACE_TASK_COLOR(a) do {(void)(a);} while(0) -#define _STARPU_TRACE_TASK_DONE(a) do {(void)(a);} while(0) -#define _STARPU_TRACE_TAG_DONE(a) do {(void)(a);} while(0) -#define _STARPU_TRACE_DATA_NAME(a, b) do {(void)(a); (void)(b);} while(0) -#define _STARPU_TRACE_DATA_COORDINATES(a, b, c) do {(void)(a); (void)(b); (void)(c);} while(0) -#define _STARPU_TRACE_DATA_COPY(a, b, c) do {(void)(a); (void)(b); (void)(c);} while(0) -#define _STARPU_TRACE_DATA_WONT_USE(a) do {(void)(a);} while(0) -#define _STARPU_TRACE_DATA_DOING_WONT_USE(a) do {(void)(a);} while(0) -#define _STARPU_TRACE_START_DRIVER_COPY(a,b,c,d,e,f) do {(void)(a); (void)(b); (void)(c); (void)(d); (void)(e); (void)(f);} while(0) -#define _STARPU_TRACE_END_DRIVER_COPY(a,b,c,d,e) do {(void)(a); (void)(b); (void)(c); (void)(d); (void)(e);} while(0) -#define _STARPU_TRACE_START_DRIVER_COPY_ASYNC(a,b) do {(void)(a); (void)(b);} while(0) -#define _STARPU_TRACE_END_DRIVER_COPY_ASYNC(a,b) do {(void)(a); (void)(b);} while(0) -#define _STARPU_TRACE_WORK_STEALING(a, b) do {(void)(a); (void)(b);} while(0) -#define _STARPU_TRACE_WORKER_DEINIT_START do {} while(0) -#define _STARPU_TRACE_WORKER_DEINIT_END(a) do {(void)(a);} while(0) -#define _STARPU_TRACE_WORKER_SCHEDULING_START do {} while(0) -#define _STARPU_TRACE_WORKER_SCHEDULING_END do {} while(0) -#define _STARPU_TRACE_WORKER_SCHEDULING_PUSH do {} while(0) -#define _STARPU_TRACE_WORKER_SCHEDULING_POP do {} while(0) -#define _STARPU_TRACE_WORKER_SLEEP_START do {} while(0) -#define _STARPU_TRACE_WORKER_SLEEP_END do {} while(0) -#define _STARPU_TRACE_TASK_SUBMIT(job, a, b) do {(void)(job); (void)(a);(void)(b);} while(0) -#define _STARPU_TRACE_TASK_SUBMIT_START() do {} while(0) -#define _STARPU_TRACE_TASK_SUBMIT_END() do {} while(0) -#define _STARPU_TRACE_TASK_THROTTLE_START() do {} while(0) -#define _STARPU_TRACE_TASK_THROTTLE_END() do {} while(0) -#define _STARPU_TRACE_TASK_BUILD_START() do {} while(0) -#define _STARPU_TRACE_TASK_BUILD_END() do {} while(0) -#define _STARPU_TRACE_TASK_MPI_DECODE_START() do {} while(0) -#define _STARPU_TRACE_TASK_MPI_DECODE_END() do {} while(0) -#define _STARPU_TRACE_TASK_MPI_PRE_START() do {} while(0) -#define _STARPU_TRACE_TASK_MPI_PRE_END() do {} while(0) -#define _STARPU_TRACE_TASK_MPI_POST_START() do {} while(0) -#define _STARPU_TRACE_TASK_MPI_POST_END() do {} while(0) -#define _STARPU_TRACE_TASK_WAIT_START(job) do {(void)(job);} while(0) -#define _STARPU_TRACE_TASK_WAIT_END() do {} while(0) -#define _STARPU_TRACE_TASK_WAIT_FOR_ALL_START() do {} while(0) -#define _STARPU_TRACE_TASK_WAIT_FOR_ALL_END() do {} while(0) -#define _STARPU_TRACE_START_ALLOC(memnode, size, handle, is_prefetch) do {(void)(memnode); (void)(size); (void)(handle);} while(0) -#define _STARPU_TRACE_END_ALLOC(memnode, handle, r) do {(void)(memnode); (void)(handle); (void)(r);} while(0) -#define _STARPU_TRACE_START_ALLOC_REUSE(a, size, handle, is_prefetch) do {(void)(a); (void)(size); (void)(handle);} while(0) -#define _STARPU_TRACE_END_ALLOC_REUSE(a, handle, r) do {(void)(a); (void)(handle); (void)(r);} while(0) -#define _STARPU_TRACE_START_FREE(memnode, size, handle) do {(void)(memnode); (void)(size); (void)(handle);} while(0) -#define _STARPU_TRACE_END_FREE(memnode, handle) do {(void)(memnode); (void)(handle);} while(0) -#define _STARPU_TRACE_START_WRITEBACK(memnode, handle) do {(void)(memnode); (void)(handle);} while(0) -#define _STARPU_TRACE_END_WRITEBACK(memnode, handle) do {(void)(memnode); (void)(handle);} while(0) -#define _STARPU_TRACE_USED_MEM(memnode,used) do {(void)(memnode); (void)(used);} while (0) -#define _STARPU_TRACE_START_MEMRECLAIM(memnode,is_prefetch) do {(void)(memnode); (void)(is_prefetch);} while(0) -#define _STARPU_TRACE_END_MEMRECLAIM(memnode,is_prefetch) do {(void)(memnode); (void)(is_prefetch);} while(0) -#define _STARPU_TRACE_START_WRITEBACK_ASYNC(memnode) do {(void)(memnode);} while(0) -#define _STARPU_TRACE_END_WRITEBACK_ASYNC(memnode) do {(void)(memnode);} while(0) -#define _STARPU_TRACE_START_PROGRESS(memnode) do {(void)(memnode);} while(0) -#define _STARPU_TRACE_END_PROGRESS(memnode) do {(void)(memnode);} while(0) -#define _STARPU_TRACE_USER_EVENT(code) do {(void)(code);} while(0) -#define _STARPU_TRACE_SET_PROFILING(status) do {(void)(status);} while(0) -#define _STARPU_TRACE_TASK_WAIT_FOR_ALL() do {} while(0) -#define _STARPU_TRACE_EVENT_ALWAYS(S) do {(void)(S);} while(0) -#define _STARPU_TRACE_EVENT(S) do {(void)(S);} while(0) -#define _STARPU_TRACE_EVENT_VERBOSE(S) do {(void)(S);} while(0) -#define _STARPU_TRACE_THREAD_EVENT(S) do {(void)(S);} while(0) -#define _STARPU_TRACE_LOCKING_MUTEX() do {} while(0) -#define _STARPU_TRACE_MUTEX_LOCKED() do {} while(0) -#define _STARPU_TRACE_UNLOCKING_MUTEX() do {} while(0) -#define _STARPU_TRACE_MUTEX_UNLOCKED() do {} while(0) -#define _STARPU_TRACE_TRYLOCK_MUTEX() do {} while(0) -#define _STARPU_TRACE_RDLOCKING_RWLOCK() do {} while(0) -#define _STARPU_TRACE_RWLOCK_RDLOCKED() do {} while(0) -#define _STARPU_TRACE_WRLOCKING_RWLOCK() do {} while(0) -#define _STARPU_TRACE_RWLOCK_WRLOCKED() do {} while(0) -#define _STARPU_TRACE_UNLOCKING_RWLOCK() do {} while(0) -#define _STARPU_TRACE_RWLOCK_UNLOCKED() do {} while(0) -#define _STARPU_TRACE_LOCKING_SPINLOCK(file, line) do {(void)(file); (void)(line);} while(0) -#define _STARPU_TRACE_SPINLOCK_LOCKED(file, line) do {(void)(file); (void)(line);} while(0) -#define _STARPU_TRACE_UNLOCKING_SPINLOCK(file, line) do {(void)(file); (void)(line);} while(0) -#define _STARPU_TRACE_SPINLOCK_UNLOCKED(file, line) do {(void)(file); (void)(line);} while(0) -#define _STARPU_TRACE_TRYLOCK_SPINLOCK(file, line) do {(void)(file); (void)(line);} while(0) -#define _STARPU_TRACE_COND_WAIT_BEGIN() do {} while(0) -#define _STARPU_TRACE_COND_WAIT_END() do {} while(0) -#define _STARPU_TRACE_BARRIER_WAIT_BEGIN() do {} while(0) -#define _STARPU_TRACE_BARRIER_WAIT_END() do {} while(0) -#define _STARPU_TRACE_MEMORY_FULL(size) do {(void)(size);} while(0) -#define _STARPU_TRACE_DATA_LOAD(workerid,size) do {(void)(workerid); (void)(size);} while(0) -#define _STARPU_TRACE_START_UNPARTITION(handle, memnode) do {(void)(handle); (void)(memnode);} while(0) -#define _STARPU_TRACE_END_UNPARTITION(handle, memnode) do {(void)(handle); (void)(memnode);} while(0) -#define _STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(workerid, ntasks, exp_len) do {(void)(workerid); (void)(ntasks); (void)(exp_len);} while(0) -#define _STARPU_TRACE_SCHED_COMPONENT_POP_PRIO(workerid, ntasks, exp_len) do {(void)(workerid); (void)(ntasks); (void)(exp_len);} while(0) -#define _STARPU_TRACE_HYPERVISOR_BEGIN() do {} while(0) -#define _STARPU_TRACE_HYPERVISOR_END() do {} while(0) -#define _STARPU_TRACE_SCHED_COMPONENT_NEW(component) do {(void)(component);} while (0) -#define _STARPU_TRACE_SCHED_COMPONENT_CONNECT(parent, child) do {(void)(parent); (void)(child);} while (0) -#define _STARPU_TRACE_SCHED_COMPONENT_PUSH(from, to, task, prio) do {(void)(from); (void)(to); (void)(task); (void)(prio);} while (0) -#define _STARPU_TRACE_SCHED_COMPONENT_PULL(from, to, task) do {(void)(from); (void)(to); (void)(task);} while (0) -#define _STARPU_TRACE_HANDLE_DATA_REGISTER(handle) do {(void)(handle);} while (0) -#define _STARPU_TRACE_HANDLE_DATA_UNREGISTER(handle) do {(void)(handle);} while (0) -#define _STARPU_TRACE_WORKER_START_FETCH_INPUT(job, id) do {(void)(job); (void)(id);} while(0) -#define _STARPU_TRACE_WORKER_END_FETCH_INPUT(job, id) do {(void)(job); (void)(id);} while(0) -#define _STARPU_TRACE_DATA_STATE_INVALID(handle, node) do {(void)(handle); (void)(node);} while(0) -#define _STARPU_TRACE_DATA_STATE_OWNER(handle, node) do {(void)(handle); (void)(node);} while(0) -#define _STARPU_TRACE_DATA_STATE_SHARED(handle, node) do {(void)(handle); (void)(node);} while(0) -#define _STARPU_TRACE_DATA_REQUEST_CREATED(handle, orig, dest, prio, is_pre, req) do {(void)(handle); (void)(orig); (void)(dest); (void)(prio); (void)(is_pre); (void)(req); } while(0) -#define _STARPU_TRACE_PAPI_TASK_EVENT(event_id, task, value) do {(void)(event_id); (void)(task); (void)(value);} while(0) - -#ifdef STARPU_RECURSIVE_TASKS -#define _STARPU_TRACE_RECURSIVE_TASK_DEPS(a, b) do {(void)(a); (void)(b);} while(0) -#define _STARPU_TRACE_RECURSIVE_TASK(a) do {(void)(a);} while(0) -#endif - -#endif // STARPU_USE_FXT - -#pragma GCC visibility pop - -#endif // __FXT_H__ diff --git a/src/common/starpu_spinlock.c b/src/common/starpu_spinlock.c index 41c856d9e4..df98f73a3d 100644 --- a/src/common/starpu_spinlock.c +++ b/src/common/starpu_spinlock.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #if defined(STARPU_SPINLOCK_CHECK) diff --git a/src/common/starpu_spinlock.h b/src/common/starpu_spinlock.h index 1ab423960b..75e54d0b87 100644 --- a/src/common/starpu_spinlock.h +++ b/src/common/starpu_spinlock.h @@ -21,10 +21,11 @@ #include #include #include -#include #include #include +#include + #ifdef STARPU_SPINLOCK_CHECK /* We don't care about performance */ @@ -40,11 +41,11 @@ int _starpu_spin_destroy(struct _starpu_spinlock *lock); static inline int __starpu_spin_lock(struct _starpu_spinlock *lock, const char *file STARPU_ATTRIBUTE_UNUSED, int line STARPU_ATTRIBUTE_UNUSED, const char *func STARPU_ATTRIBUTE_UNUSED) { - _STARPU_TRACE_LOCKING_SPINLOCK(file, line); + _starpu_trace_locking_spinlock(file, line); int ret = starpu_pthread_mutex_lock(&lock->errcheck_lock); STARPU_ASSERT(!ret); lock->last_taker = func; - _STARPU_TRACE_SPINLOCK_LOCKED(file, line); + _starpu_trace_spinlock_locked(file, line); return ret; } @@ -55,23 +56,23 @@ static inline void _starpu_spin_checklocked(struct _starpu_spinlock *lock STARPU static inline int __starpu_spin_trylock(struct _starpu_spinlock *lock, const char *file STARPU_ATTRIBUTE_UNUSED, int line STARPU_ATTRIBUTE_UNUSED, const char *func STARPU_ATTRIBUTE_UNUSED) { - _STARPU_TRACE_TRYLOCK_SPINLOCK(file, line); + _starpu_trace_trylock_spinlock(file, line); int ret = starpu_pthread_mutex_trylock(&lock->errcheck_lock); STARPU_ASSERT(!ret || (ret == EBUSY)); if (STARPU_LIKELY(!ret)) { lock->last_taker = func; - _STARPU_TRACE_SPINLOCK_LOCKED(file, line); + _starpu_trace_spinlock_locked(file, line); } return ret; } static inline int __starpu_spin_unlock(struct _starpu_spinlock *lock, const char *file STARPU_ATTRIBUTE_UNUSED, int line STARPU_ATTRIBUTE_UNUSED, const char *func STARPU_ATTRIBUTE_UNUSED) { - _STARPU_TRACE_UNLOCKING_SPINLOCK(file, line); + _starpu_trace_unlocking_spinlock(file, line); int ret = starpu_pthread_mutex_unlock(&lock->errcheck_lock); STARPU_ASSERT(!ret); - _STARPU_TRACE_SPINLOCK_UNLOCKED(file, line); + _starpu_trace_spinlock_unlocked(file, line); return ret; } #else @@ -94,10 +95,10 @@ static inline int _starpu_spin_init(struct _starpu_spinlock *lock) static inline int __starpu_spin_lock(struct _starpu_spinlock *lock, const char *file STARPU_ATTRIBUTE_UNUSED, int line STARPU_ATTRIBUTE_UNUSED, const char *func STARPU_ATTRIBUTE_UNUSED) { - _STARPU_TRACE_LOCKING_SPINLOCK(file, line); + _starpu_trace_unlocking_spinlock(file, line); int ret = starpu_pthread_spin_lock(&lock->lock); STARPU_ASSERT(!ret); - _STARPU_TRACE_SPINLOCK_LOCKED(file, line); + _starpu_trace_spinlock_locked(file, line); return ret; } @@ -105,20 +106,20 @@ static inline int __starpu_spin_lock(struct _starpu_spinlock *lock, const char * static inline int __starpu_spin_trylock(struct _starpu_spinlock *lock, const char *file STARPU_ATTRIBUTE_UNUSED, int line STARPU_ATTRIBUTE_UNUSED, const char *func STARPU_ATTRIBUTE_UNUSED) { - _STARPU_TRACE_TRYLOCK_SPINLOCK(file, line); + _starpu_trace_trylock_spinlock(file, line); int ret = starpu_pthread_spin_trylock(&lock->lock); STARPU_ASSERT(!ret || (ret == EBUSY)); if (STARPU_LIKELY(!ret)) - _STARPU_TRACE_SPINLOCK_LOCKED(file, line); + _starpu_trace_spinlock_locked(file, line); return ret; } static inline int __starpu_spin_unlock(struct _starpu_spinlock *lock, const char *file STARPU_ATTRIBUTE_UNUSED, int line STARPU_ATTRIBUTE_UNUSED, const char *func STARPU_ATTRIBUTE_UNUSED) { - _STARPU_TRACE_UNLOCKING_SPINLOCK(file, line); + _starpu_trace_unlocking_spinlock(file, line); int ret = starpu_pthread_spin_unlock(&lock->lock); STARPU_ASSERT(!ret); - _STARPU_TRACE_SPINLOCK_UNLOCKED(file, line); + _starpu_trace_spinlock_unlocked(file, line); return ret; } #endif diff --git a/src/common/thread.c b/src/common/thread.c index 71ca2a048c..c8a64bb0cb 100644 --- a/src/common/thread.c +++ b/src/common/thread.c @@ -20,7 +20,7 @@ #include #endif #include -#include +#include #include #include @@ -226,7 +226,7 @@ int starpu_pthread_mutex_destroy(starpu_pthread_mutex_t *mutex) int starpu_pthread_mutex_lock(starpu_pthread_mutex_t *mutex) { - _STARPU_TRACE_LOCKING_MUTEX(); + _starpu_trace_locking_mutex(); /* Note: this is actually safe, because simgrid only preempts within * simgrid functions */ @@ -256,14 +256,14 @@ int starpu_pthread_mutex_lock(starpu_pthread_mutex_t *mutex) xbt_mutex_acquire(*mutex); #endif - _STARPU_TRACE_MUTEX_LOCKED(); + _starpu_trace_mutex_locked(); return 0; } int starpu_pthread_mutex_unlock(starpu_pthread_mutex_t *mutex) { - _STARPU_TRACE_UNLOCKING_MUTEX(); + _starpu_trace_unlocking_mutex(); #ifdef STARPU_HAVE_SIMGRID_MUTEX_H sg_mutex_unlock(*mutex); @@ -271,7 +271,7 @@ int starpu_pthread_mutex_unlock(starpu_pthread_mutex_t *mutex) xbt_mutex_release(*mutex); #endif - _STARPU_TRACE_MUTEX_UNLOCKED(); + _starpu_trace_mutex_unlocked(); return 0; } @@ -279,7 +279,7 @@ int starpu_pthread_mutex_unlock(starpu_pthread_mutex_t *mutex) int starpu_pthread_mutex_trylock(starpu_pthread_mutex_t *mutex) { int ret; - _STARPU_TRACE_TRYLOCK_MUTEX(); + _starpu_trace_trylock_mutex(); #ifdef STARPU_HAVE_SIMGRID_MUTEX_H ret = sg_mutex_try_lock(*mutex); @@ -290,8 +290,10 @@ int starpu_pthread_mutex_trylock(starpu_pthread_mutex_t *mutex) #endif ret = ret ? 0 : EBUSY; - _STARPU_TRACE_MUTEX_LOCKED(); - + if (ret) + { + _starpu_trace_mutex_locked(); + } return ret; } @@ -468,7 +470,7 @@ int starpu_pthread_cond_broadcast(starpu_pthread_cond_t *cond) int starpu_pthread_cond_wait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex) { - _STARPU_TRACE_COND_WAIT_BEGIN(); + _starpu_trace_cond_wait_begin(); _starpu_pthread_cond_auto_init(cond); #ifdef STARPU_HAVE_SIMGRID_COND_H @@ -477,7 +479,7 @@ int starpu_pthread_cond_wait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t xbt_cond_wait(*cond, *mutex); #endif - _STARPU_TRACE_COND_WAIT_END(); + _starpu_trace_cond_wait_end(); return 0; } @@ -493,7 +495,7 @@ int starpu_pthread_cond_timedwait(starpu_pthread_cond_t *cond, starpu_pthread_mu delta.tv_nsec = abstime->tv_nsec - now.tv_nsec; delay = (double) delta.tv_sec + (double) delta.tv_nsec / 1000000000.; - _STARPU_TRACE_COND_WAIT_BEGIN(); + _starpu_trace_cond_wait_begin(); _starpu_pthread_cond_auto_init(cond); #ifdef STARPU_HAVE_SIMGRID_COND_H @@ -502,7 +504,7 @@ int starpu_pthread_cond_timedwait(starpu_pthread_cond_t *cond, starpu_pthread_mu ret = xbt_cond_timedwait(*cond, *mutex, delay) ? ETIMEDOUT : 0; #endif - _STARPU_TRACE_COND_WAIT_END(); + _starpu_trace_cond_wait_end(); return ret; #else @@ -536,11 +538,11 @@ int starpu_pthread_rwlock_destroy(starpu_pthread_rwlock_t *rwlock) int starpu_pthread_rwlock_rdlock(starpu_pthread_rwlock_t *rwlock) { - _STARPU_TRACE_RDLOCKING_RWLOCK(); + _starpu_trace_rdlocking_rwlock(); int p_ret = starpu_pthread_mutex_lock(rwlock); - _STARPU_TRACE_RWLOCK_RDLOCKED(); + _starpu_trace_rwlock_rdlocked(); return p_ret; } @@ -550,18 +552,18 @@ int starpu_pthread_rwlock_tryrdlock(starpu_pthread_rwlock_t *rwlock) int p_ret = starpu_pthread_mutex_trylock(rwlock); if (!p_ret) - _STARPU_TRACE_RWLOCK_RDLOCKED(); + _starpu_trace_rwlock_rdlocked(); return p_ret; } int starpu_pthread_rwlock_wrlock(starpu_pthread_rwlock_t *rwlock) { - _STARPU_TRACE_WRLOCKING_RWLOCK(); + _starpu_trace_wrlocking_rwlock(); int p_ret = starpu_pthread_mutex_lock(rwlock); - _STARPU_TRACE_RWLOCK_WRLOCKED(); + _starpu_trace_rwlock_wrlocked(); return p_ret; } @@ -571,7 +573,7 @@ int starpu_pthread_rwlock_trywrlock(starpu_pthread_rwlock_t *rwlock) int p_ret = starpu_pthread_mutex_trylock(rwlock); if (!p_ret) - _STARPU_TRACE_RWLOCK_RDLOCKED(); + _starpu_trace_rwlock_rdlocked(); return p_ret; } @@ -579,11 +581,11 @@ int starpu_pthread_rwlock_trywrlock(starpu_pthread_rwlock_t *rwlock) int starpu_pthread_rwlock_unlock(starpu_pthread_rwlock_t *rwlock) { - _STARPU_TRACE_UNLOCKING_RWLOCK(); + _starpu_trace_unlocking_rwlock(); int p_ret = starpu_pthread_mutex_unlock(rwlock); - _STARPU_TRACE_RWLOCK_UNLOCKED(); + _starpu_trace_rwlock_unlocked(); return p_ret; } @@ -606,11 +608,11 @@ int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier) { int ret; - _STARPU_TRACE_BARRIER_WAIT_BEGIN(); + _starpu_trace_barrier_wait_begin(); ret = sg_barrier_wait(*barrier); - _STARPU_TRACE_BARRIER_WAIT_END(); + _starpu_trace_barrier_wait_end(); return ret; } #elif defined(STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT) || defined(xbt_barrier_init) @@ -631,11 +633,11 @@ int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier) { int ret; - _STARPU_TRACE_BARRIER_WAIT_BEGIN(); + _starpu_trace_barrier_wait_begin(); ret = xbt_barrier_wait(*barrier); - _STARPU_TRACE_BARRIER_WAIT_END(); + _starpu_trace_barrier_wait_end(); return ret; } #endif /* defined(STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT) */ @@ -812,7 +814,7 @@ int starpu_pthread_barrier_destroy(starpu_pthread_barrier_t *barrier) int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier) { int ret = 0; - _STARPU_TRACE_BARRIER_WAIT_BEGIN(); + _starpu_trace_barrier_wait_begin(); starpu_pthread_mutex_lock(&barrier->mutex); barrier->done++; @@ -832,7 +834,7 @@ int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier) starpu_pthread_mutex_unlock(&barrier->mutex); - _STARPU_TRACE_BARRIER_WAIT_END(); + _starpu_trace_barrier_wait_end(); return ret; } @@ -842,22 +844,22 @@ int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier) #if !defined(STARPU_SIMGRID) && !defined(_MSC_VER) /* !STARPU_SIMGRID */ int starpu_pthread_mutex_lock(starpu_pthread_mutex_t *mutex) { - _STARPU_TRACE_LOCKING_MUTEX(); + _starpu_trace_locking_mutex(); int p_ret = pthread_mutex_lock(mutex); - _STARPU_TRACE_MUTEX_LOCKED(); + _starpu_trace_mutex_locked(); return p_ret; } int starpu_pthread_mutex_unlock(starpu_pthread_mutex_t *mutex) { - _STARPU_TRACE_UNLOCKING_MUTEX(); + _starpu_trace_unlocking_mutex(); int p_ret = pthread_mutex_unlock(mutex); - _STARPU_TRACE_MUTEX_UNLOCKED(); + _starpu_trace_mutex_unlocked(); return p_ret; } @@ -865,80 +867,80 @@ int starpu_pthread_mutex_unlock(starpu_pthread_mutex_t *mutex) int starpu_pthread_mutex_trylock(starpu_pthread_mutex_t *mutex) { int ret; - _STARPU_TRACE_TRYLOCK_MUTEX(); + _starpu_trace_trylock_mutex(); ret = pthread_mutex_trylock(mutex); if (!ret) - _STARPU_TRACE_MUTEX_LOCKED(); + _starpu_trace_mutex_locked(); return ret; } int starpu_pthread_cond_wait(starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex) { - _STARPU_TRACE_COND_WAIT_BEGIN(); + _starpu_trace_cond_wait_begin(); int p_ret = pthread_cond_wait(cond, mutex); - _STARPU_TRACE_COND_WAIT_END(); + _starpu_trace_cond_wait_end(); return p_ret; } int starpu_pthread_rwlock_rdlock(starpu_pthread_rwlock_t *rwlock) { - _STARPU_TRACE_RDLOCKING_RWLOCK(); + _starpu_trace_rdlocking_rwlock(); int p_ret = pthread_rwlock_rdlock(rwlock); - _STARPU_TRACE_RWLOCK_RDLOCKED(); + _starpu_trace_rwlock_rdlocked(); return p_ret; } int starpu_pthread_rwlock_tryrdlock(starpu_pthread_rwlock_t *rwlock) { - _STARPU_TRACE_RDLOCKING_RWLOCK(); + _starpu_trace_rdlocking_rwlock(); int p_ret = pthread_rwlock_tryrdlock(rwlock); if (!p_ret) - _STARPU_TRACE_RWLOCK_RDLOCKED(); + _starpu_trace_rwlock_rdlocked(); return p_ret; } int starpu_pthread_rwlock_wrlock(starpu_pthread_rwlock_t *rwlock) { - _STARPU_TRACE_WRLOCKING_RWLOCK(); + _starpu_trace_wrlocking_rwlock(); int p_ret = pthread_rwlock_wrlock(rwlock); - _STARPU_TRACE_RWLOCK_WRLOCKED(); + _starpu_trace_rwlock_wrlocked(); return p_ret; } int starpu_pthread_rwlock_trywrlock(starpu_pthread_rwlock_t *rwlock) { - _STARPU_TRACE_WRLOCKING_RWLOCK(); + _starpu_trace_wrlocking_rwlock(); int p_ret = pthread_rwlock_trywrlock(rwlock); if (!p_ret) - _STARPU_TRACE_RWLOCK_WRLOCKED(); + _starpu_trace_rwlock_wrlocked(); return p_ret; } int starpu_pthread_rwlock_unlock(starpu_pthread_rwlock_t *rwlock) { - _STARPU_TRACE_UNLOCKING_RWLOCK(); + _starpu_trace_unlocking_rwlock(); int p_ret = pthread_rwlock_unlock(rwlock); - _STARPU_TRACE_RWLOCK_UNLOCKED(); + _starpu_trace_rwlock_unlocked(); return p_ret; } @@ -948,11 +950,11 @@ int starpu_pthread_rwlock_unlock(starpu_pthread_rwlock_t *rwlock) int starpu_pthread_barrier_wait(starpu_pthread_barrier_t *barrier) { int ret; - _STARPU_TRACE_BARRIER_WAIT_BEGIN(); + _starpu_trace_barrier_wait_begin(); ret = pthread_barrier_wait(barrier); - _STARPU_TRACE_BARRIER_WAIT_END(); + _starpu_trace_barrier_wait_end(); return ret; } diff --git a/src/common/utils.c b/src/common/utils.c index 929c1faed8..9e7a494eec 100644 --- a/src/common/utils.c +++ b/src/common/utils.c @@ -26,6 +26,16 @@ #include #include +#ifdef STARPU_HAVE_WINDOWS +#include +#endif + +#ifdef __linux__ +#include /* for SYS_gettid */ +#elif defined(__FreeBSD__) +#include /* for thr_self() */ +#endif + #if defined(_WIN32) && !defined(__CYGWIN__) #include #include @@ -766,3 +776,31 @@ void starpu_display_bindings(void) _STARPU_DISP("hwloc not available to display bindings.\n"); #endif } + +long _starpu_gettid(void) +{ + /* TODO: test at configure whether __thread is available, and use that + * to cache the value. + * Don't use the TSD, this is getting called before we would have the + * time to allocate it. */ +#ifdef STARPU_SIMGRID +# ifdef HAVE_SG_ACTOR_SELF + return (uintptr_t) sg_actor_self(); +# else + return (uintptr_t) MSG_process_self(); +# endif +#else +#if defined(__linux__) + return syscall(SYS_gettid); +#elif defined(__FreeBSD__) + long tid; + thr_self(&tid); + return tid; +#elif defined(_WIN32) && !defined(__CYGWIN__) + return (long) GetCurrentThreadId(); +#else + return (long) starpu_pthread_self(); +#endif +#endif +} + diff --git a/src/common/utils.h b/src/common/utils.h index 6579978127..52c4a4e9a2 100644 --- a/src/common/utils.h +++ b/src/common/utils.h @@ -216,6 +216,8 @@ enum initialization { UNINITIALIZED = 0, CHANGING, INITIALIZED }; #pragma GCC visibility pop +long _starpu_gettid(void) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; + #ifdef __cplusplus } #endif diff --git a/src/core/dependencies/cg.h b/src/core/dependencies/cg.h index 5a54a2ae01..828d9a488f 100644 --- a/src/core/dependencies/cg.h +++ b/src/core/dependencies/cg.h @@ -49,7 +49,7 @@ struct _starpu_cg_list /** Number of notifications to be waited for */ unsigned ndeps; /* how many deps ? */ unsigned ndeps_completed; /* how many deps are done ? */ -#ifdef STARPU_DEBUG +#if defined(STARPU_DEBUG) || defined(STARPU_PROF_TASKSTUBS) /** Array of the notifications, size ndeps */ struct _starpu_cg **deps; /** Which ones have notified, size ndeps */ @@ -87,7 +87,7 @@ struct _starpu_cg /** number of remaining tags */ unsigned remaining; -#ifdef STARPU_DEBUG +#if defined(STARPU_DEBUG) || defined(STARPU_PROF_TASKSTUBS) unsigned ndeps; /** array of predecessors, size ndeps */ void **deps; diff --git a/src/core/dependencies/implicit_data_deps.c b/src/core/dependencies/implicit_data_deps.c index c87b06ec79..57ecd1e557 100644 --- a/src/core/dependencies/implicit_data_deps.c +++ b/src/core/dependencies/implicit_data_deps.c @@ -87,7 +87,7 @@ static void _starpu_add_accessor(starpu_data_handle_t handle, struct starpu_task || STARPU_AYU_EVENT ) && handle->last_submitted_ghost_sync_id_is_valid) { - _STARPU_TRACE_GHOST_TASK_DEPS(handle->last_submitted_ghost_sync_id, + _starpu_trace_ghost_task_deps(handle->last_submitted_ghost_sync_id, _starpu_get_job_associated_to_task(pre_sync_task)); _starpu_add_ghost_dependency(handle, handle->last_submitted_ghost_sync_id, pre_sync_task); _STARPU_DEP_DEBUG("dep ID%lu -> %p\n", handle->last_submitted_ghost_sync_id, pre_sync_task); @@ -167,7 +167,7 @@ static void _starpu_add_sync_task(starpu_data_handle_t handle, struct starpu_tas while (ghost_accessors_id) { unsigned long id = ghost_accessors_id->id; - _STARPU_TRACE_GHOST_TASK_DEPS(id, + _starpu_trace_ghost_task_deps(id, _starpu_get_job_associated_to_task(pre_sync_task)); _starpu_add_ghost_dependency(handle, id, pre_sync_task); _STARPU_DEP_DEBUG("dep ID%lu -> %p\n", id, pre_sync_task); @@ -273,7 +273,7 @@ struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_ #endif ) { - _STARPU_TRACE_GHOST_TASK_DEPS(pre_sync_job->job_id, post_sync_job); + _starpu_trace_ghost_task_deps(pre_sync_job->job_id, post_sync_job); _starpu_bound_task_dep(post_sync_job, pre_sync_job); } diff --git a/src/core/dependencies/tags.c b/src/core/dependencies/tags.c index c01b59dcf0..36bf26ed43 100644 --- a/src/core/dependencies/tags.c +++ b/src/core/dependencies/tags.c @@ -279,7 +279,7 @@ void _starpu_notify_tag_dependencies(struct _starpu_tag *tag) } tag->state = STARPU_DONE; - _STARPU_TRACE_TAG_DONE(tag); + _starpu_trace_tag_done(tag); _starpu_notify_cg_list(tag, &tag->tag_successors); @@ -321,7 +321,7 @@ void _starpu_notify_restart_tag_dependencies(struct _starpu_tag *tag) return; } - _STARPU_TRACE_TAG_DONE(tag); + _starpu_trace_tag_done(tag); tag->state = STARPU_BLOCKED; @@ -339,7 +339,7 @@ void starpu_tag_notify_restart_from_apps(starpu_tag_t id) void _starpu_tag_declare(starpu_tag_t id, struct _starpu_job *job) { - _STARPU_TRACE_TAG(id, job); + _starpu_trace_tag(&id, job); job->task->use_tag = 1; struct _starpu_tag *tag= gettag_struct(id); @@ -400,7 +400,7 @@ void starpu_tag_declare_deps_array(starpu_tag_t id, unsigned ndeps, starpu_tag_t /* id depends on dep_id * so cg should be among dep_id's successors*/ - _STARPU_TRACE_TAG_DEPS(id, dep_id); + _starpu_trace_tag_deps(&id, &dep_id); _starpu_bound_tag_dep(id, dep_id); struct _starpu_tag *tag_dep = gettag_struct(dep_id); STARPU_ASSERT(tag_dep != tag_child); @@ -436,7 +436,7 @@ void starpu_tag_declare_deps(starpu_tag_t id, unsigned ndeps, ...) /* id depends on dep_id * so cg should be among dep_id's successors*/ - _STARPU_TRACE_TAG_DEPS(id, dep_id); + _starpu_trace_tag_deps(&id, &dep_id); _starpu_bound_tag_dep(id, dep_id); struct _starpu_tag *tag_dep = gettag_struct(dep_id); STARPU_ASSERT(tag_dep != tag_child); diff --git a/src/core/dependencies/task_deps.c b/src/core/dependencies/task_deps.c index dc9d3bf047..7daf4e8eb8 100644 --- a/src/core/dependencies/task_deps.c +++ b/src/core/dependencies/task_deps.c @@ -33,7 +33,7 @@ static struct _starpu_cg *create_cg_task(unsigned ntags, struct _starpu_job *j) cg->ntags = ntags; cg->remaining = ntags; -#ifdef STARPU_DEBUG +#if defined(STARPU_DEBUG) || defined(STARPU_PROF_TASKSTUBS) cg->ndeps = ntags; cg->deps = NULL; cg->done = NULL; @@ -42,7 +42,7 @@ static struct _starpu_cg *create_cg_task(unsigned ntags, struct _starpu_job *j) cg->succ.job = j; j->job_successors.ndeps++; -#ifdef STARPU_DEBUG +#if defined(STARPU_DEBUG) || defined(STARPU_PROF_TASKSTUBS) _STARPU_REALLOC(j->job_successors.deps, j->job_successors.ndeps * sizeof(j->job_successors.deps[0])); _STARPU_REALLOC(j->job_successors.done, j->job_successors.ndeps * sizeof(j->job_successors.done[0])); j->job_successors.deps[j->job_successors.ndeps-1] = cg; @@ -98,7 +98,7 @@ void _starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, s struct _starpu_cg *cg = create_cg_task(ndeps, job); STARPU_PTHREAD_MUTEX_UNLOCK(&job->sync_mutex); -#ifdef STARPU_DEBUG +#if defined(STARPU_DEBUG) || defined(STARPU_PROF_TASKSTUBS) _STARPU_MALLOC(cg->deps, ndeps * sizeof(cg->deps[0])); _STARPU_MALLOC(cg->done, ndeps * sizeof(cg->done[0])); #endif @@ -114,7 +114,7 @@ void _starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, s dep_job = _starpu_get_job_associated_to_task(dep_task); STARPU_ASSERT_MSG(dep_task != task, "A task cannot be made to depend on itself"); -#ifdef STARPU_DEBUG +#if defined(STARPU_DEBUG) || defined(STARPU_PROF_TASKSTUBS) cg->deps[i] = dep_job; cg->done[i] = 0; #endif @@ -138,7 +138,7 @@ void _starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, s } STARPU_PTHREAD_MUTEX_UNLOCK(&dep_job->sync_mutex); - _STARPU_TRACE_TASK_DEPS(dep_job, job); + _starpu_trace_task_deps(dep_job, job); _starpu_bound_task_dep(job, dep_job); if (check) { diff --git a/src/core/jobs.c b/src/core/jobs.c index a0abbbd6df..bafbd6a23d 100644 --- a/src/core/jobs.c +++ b/src/core/jobs.c @@ -79,7 +79,7 @@ void _starpu_exclude_task_from_dag(struct starpu_task *task) struct _starpu_job *j = _starpu_get_job_associated_to_task(task); j->exclude_from_dag = 1; - _STARPU_TRACE_TASK_EXCLUDE_FROM_DAG(j); + _starpu_trace_task_exclude_from_dag(j); } /* create an internal struct _starpu_job structure to encapsulate the task */ @@ -101,7 +101,7 @@ struct _starpu_job* STARPU_ATTRIBUTE_MALLOC _starpu_job_create(struct starpu_tas job->task = task; if ( -#if defined(STARPU_DEBUG) +#if defined(STARPU_DEBUG) || defined(STARPU_PROF_TASKSTUBS) 1 #elif defined(STARPU_USE_FXT) fut_active @@ -325,7 +325,7 @@ void starpu_task_end_dep_release(struct starpu_task *t) if (current) { struct _starpu_job *jcurrent = _starpu_get_job_associated_to_task(current); - _STARPU_TRACE_TASK_END_DEP(jcurrent, j); + _starpu_trace_task_end_dep(jcurrent, j); } #endif @@ -391,9 +391,9 @@ void _starpu_handle_job_termination(struct _starpu_job *j) _starpu_set_current_task(task); - _STARPU_TRACE_START_CALLBACK(j); + _starpu_trace_start_callback(j); epilogue_callback(task->epilogue_callback_arg); - _STARPU_TRACE_END_CALLBACK(j); + _starpu_trace_end_callback(j); _starpu_set_current_task(current_task); @@ -569,9 +569,9 @@ void _starpu_handle_job_termination(struct _starpu_job *j) _starpu_set_current_task(task); - _STARPU_TRACE_START_CALLBACK(j); + _starpu_trace_start_callback(j); callback(task->callback_arg); - _STARPU_TRACE_END_CALLBACK(j); + _starpu_trace_end_callback(j); _starpu_set_current_task(current_task); @@ -589,7 +589,7 @@ void _starpu_handle_job_termination(struct _starpu_job *j) /* Note: For now, we keep the TASK_DONE trace event for continuation, * however we could add a specific event for stopped tasks if needed. */ - _STARPU_TRACE_TASK_DONE(j); + _starpu_trace_task_done(j); STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex); @@ -856,9 +856,9 @@ static int _starpu_turn_task_into_recursive_task(struct _starpu_job *j) void _starpu_recursive_task_execute(struct _starpu_job *j) { - _STARPU_TRACE_RECURSIVE_TASK(j); - _STARPU_TRACE_TASK_NAME_LINE_COLOR(j); - _STARPU_TRACE_START_CODELET_BODY(j, 0, NULL, 0, 0); + _starpu_trace_recursive_task(j); + _starpu_trace_task_name_line_color(j); + _starpu_trace_start_codelet_body(j, 0, NULL, 0, 0); STARPU_ASSERT_MSG(j->task->recursive_task_gen_dag_func!=NULL || (j->task->cl && j->task->cl->recursive_task_gen_dag_func!=NULL), "task->recursive_task_gen_dag_func MUST be defined\n"); @@ -873,7 +873,7 @@ void _starpu_recursive_task_execute(struct _starpu_job *j) else j->task->cl->recursive_task_gen_dag_func(j->task, j->task->recursive_task_gen_dag_func_arg); j->task->where = STARPU_NOWHERE; - _STARPU_TRACE_END_CODELET_BODY(j, 0, NULL, 0, 0); + _starpu_trace_end_codelet_body(j, 0, NULL, 0, 0); } #endif @@ -940,7 +940,7 @@ unsigned _starpu_enforce_deps_and_schedule(struct _starpu_job *j) #ifdef STARPU_RECURSIVE_TASKS if (j->task->recursive_task_parent != 0) - _STARPU_TRACE_RECURSIVE_TASK_DEPS(j->task->recursive_task_parent, j); + _starpu_trace_recursive_task_deps(j->task->recursive_task_parent, j); #endif ret = _starpu_push_task(j); @@ -985,7 +985,7 @@ unsigned _starpu_enforce_deps_starting_from_task(struct _starpu_job *j) #ifdef STARPU_RECURSIVE_TASKS if (j->task->recursive_task_parent != 0) - _STARPU_TRACE_RECURSIVE_TASK_DEPS(j->task->recursive_task_parent, j); + _starpu_trace_recursive_task_deps(j->task->recursive_task_parent, j); #endif ret = _starpu_push_task(j); @@ -1026,7 +1026,7 @@ unsigned _starpu_take_deps_and_schedule(struct _starpu_job *j) #ifdef STARPU_RECURSIVE_TASKS if (j->task->recursive_task_parent != 0) - _STARPU_TRACE_RECURSIVE_TASK_DEPS(j->task->recursive_task_parent, j); + _starpu_trace_recursive_task_deps(j->task->recursive_task_parent, j); #endif /* And immediately push task */ diff --git a/src/core/jobs.h b/src/core/jobs.h index e4dc8fb878..3149a81258 100644 --- a/src/core/jobs.h +++ b/src/core/jobs.h @@ -34,7 +34,6 @@ #endif #include #include -#include #include #include #include @@ -219,6 +218,10 @@ struct _starpu_job unsigned is_recursive_task:1; #endif +#ifdef STARPU_PROF_TASKSTUBS + uintptr_t ps_task_timer; +#endif + #ifdef STARPU_NOSV nosv_task_type_t nosv_task_type; #endif diff --git a/src/core/sched_ctx.c b/src/core/sched_ctx.c index 9cd20ef410..79f8d04835 100644 --- a/src/core/sched_ctx.c +++ b/src/core/sched_ctx.c @@ -1012,9 +1012,9 @@ void starpu_sched_ctx_delete(unsigned sched_ctx_id) #ifdef STARPU_USE_SC_HYPERVISOR if (sched_ctx_id != 0 && sched_ctx_id != STARPU_NMAX_SCHED_CTXS && sched_ctx->perf_counters != NULL) { - _STARPU_TRACE_HYPERVISOR_BEGIN(); + _starpu_trace_hypervisor_begin(); sched_ctx->perf_counters->notify_delete_context(sched_ctx_id); - _STARPU_TRACE_HYPERVISOR_END(); + _starpu_trace_hypervisor_end(); } #endif //STARPU_USE_SC_HYPERVISOR @@ -2049,10 +2049,10 @@ void _starpu_sched_ctx_post_exec_task_cb(int workerid, struct starpu_task *task, if(_starpu_sched_ctx_allow_hypervisor(sched_ctx->id) || task->hypervisor_tag > 0) { - _STARPU_TRACE_HYPERVISOR_BEGIN(); + _starpu_trace_hypervisor_begin(); sched_ctx->perf_counters->notify_post_exec_task(task, data_size[task->sched_ctx][workerid], footprint, task->hypervisor_tag, flops[task->sched_ctx][workerid]); - _STARPU_TRACE_HYPERVISOR_END(); + _starpu_trace_hypervisor_end(); flops[task->sched_ctx][workerid] = 0.0; data_size[task->sched_ctx][workerid] = 0; } @@ -2066,9 +2066,9 @@ void starpu_sched_ctx_call_pushed_task_cb(int workerid, unsigned sched_ctx_id) if(sched_ctx != NULL && sched_ctx_id != _starpu_get_initial_sched_ctx()->id && sched_ctx_id != STARPU_NMAX_SCHED_CTXS && sched_ctx->perf_counters != NULL && _starpu_sched_ctx_allow_hypervisor(sched_ctx_id)) { - _STARPU_TRACE_HYPERVISOR_BEGIN(); + _starpu_trace_hypervisor_begin(); sched_ctx->perf_counters->notify_pushed_task(sched_ctx_id, workerid); - _STARPU_TRACE_HYPERVISOR_END(); + _starpu_trace_hypervisor_end(); } } #endif //STARPU_USE_SC_HYPERVISOR diff --git a/src/core/sched_ctx.h b/src/core/sched_ctx.h index 12d1742b8c..bf113566af 100644 --- a/src/core/sched_ctx.h +++ b/src/core/sched_ctx.h @@ -30,6 +30,7 @@ #include #include #include +#include #include "sched_ctx_list.h" #ifdef STARPU_HAVE_HWLOC diff --git a/src/core/sched_policy.c b/src/core/sched_policy.c index 98c3c498ec..3107e98b00 100644 --- a/src/core/sched_policy.c +++ b/src/core/sched_policy.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -543,7 +544,7 @@ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int wo struct starpu_task *alias = starpu_task_dup(task); alias->destroy = 1; - _STARPU_TRACE_JOB_PUSH(alias, alias->priority); + _starpu_trace_job_push(alias, alias->priority); worker = _starpu_get_worker_struct(combined_workerid[j]); ret |= _starpu_push_local_task(worker, alias); } @@ -637,9 +638,9 @@ int _starpu_repush_task(struct _starpu_job *j) if(sched_ctx->id != 0 && sched_ctx->perf_counters != NULL && sched_ctx->perf_counters->notify_empty_ctx) { - _STARPU_TRACE_HYPERVISOR_BEGIN(); + _starpu_trace_hypervisor_begin(); sched_ctx->perf_counters->notify_empty_ctx(sched_ctx->id, task); - _STARPU_TRACE_HYPERVISOR_END(); + _starpu_trace_hypervisor_end(); } #endif return 0; @@ -654,7 +655,7 @@ int _starpu_repush_task(struct _starpu_job *j) * corresponding dependencies */ if (task->cl == NULL || task->where == STARPU_NOWHERE) { - _STARPU_TRACE_TASK_NAME_LINE_COLOR(j); + _starpu_trace_task_name_line_color(j); if (!_starpu_perf_counter_paused() && !j->internal) { (void)STARPU_PERF_COUNTER_ADD64(& _starpu_task__g_current_ready__value, -1); @@ -674,8 +675,8 @@ int _starpu_repush_task(struct _starpu_job *j) { int worker_id = starpu_worker_get_id(); - _STARPU_TRACE_START_CODELET_BODY(j, 0, NULL, worker_id, 0); - _STARPU_TRACE_END_CODELET_BODY(j, 0, NULL, worker_id, 0); + _starpu_trace_start_codelet_body(j, 0, NULL, worker_id, 0); + _starpu_trace_end_codelet_body(j, 0, NULL, worker_id, 0); } if (task->cl && task->cl->specific_nodes) @@ -708,7 +709,7 @@ int _starpu_push_task_to_workers(struct starpu_task *task) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx); - _STARPU_TRACE_JOB_PUSH(task, task->priority); + _starpu_trace_job_push(task, task->priority); /* if the contexts still does not have workers put the task back to its place in the empty ctx list */ @@ -727,9 +728,9 @@ int _starpu_push_task_to_workers(struct starpu_task *task) if(sched_ctx->id != 0 && sched_ctx->perf_counters != NULL && sched_ctx->perf_counters->notify_empty_ctx) { - _STARPU_TRACE_HYPERVISOR_BEGIN(); + _starpu_trace_hypervisor_begin(); sched_ctx->perf_counters->notify_empty_ctx(sched_ctx->id, task); - _STARPU_TRACE_HYPERVISOR_END(); + _starpu_trace_hypervisor_end(); } #endif @@ -779,7 +780,7 @@ int _starpu_push_task_to_workers(struct starpu_task *task) if (job->task_size > 1) { alias = starpu_task_dup(task); - _STARPU_TRACE_JOB_PUSH(alias, alias->priority); + _starpu_trace_job_push(alias, alias->priority); alias->destroy = 1; } else @@ -839,7 +840,7 @@ int _starpu_push_task_to_workers(struct starpu_task *task) if(ret == -1) { _STARPU_MSG("repush task \n"); - _STARPU_TRACE_JOB_POP(task, task->priority); + _starpu_trace_job_pop(task, task->priority); ret = _starpu_push_task_to_workers(task); } } @@ -866,7 +867,7 @@ int _starpu_pop_task_end(struct starpu_task *task) { if (!task) return 0; - _STARPU_TRACE_JOB_POP(task, task->priority); + _starpu_trace_job_pop(task, task->priority); return 0; } @@ -1091,9 +1092,9 @@ struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker) struct starpu_sched_ctx_performance_counters *perf_counters = sched_ctx->perf_counters; if(sched_ctx->id != 0 && perf_counters != NULL && perf_counters->notify_idle_cycle && _starpu_sched_ctx_allow_hypervisor(sched_ctx->id)) { -// _STARPU_TRACE_HYPERVISOR_BEGIN(); +// _starpu_trace_hypervisor_begin(); perf_counters->notify_idle_cycle(sched_ctx->id, worker->workerid, 1.0); -// _STARPU_TRACE_HYPERVISOR_END(); +// _starpu_trace_hypervisor_end(); } } #endif //STARPU_USE_SC_HYPERVISOR @@ -1131,9 +1132,9 @@ struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker) if(sched_ctx->id != 0 && perf_counters != NULL && perf_counters->notify_poped_task && _starpu_sched_ctx_allow_hypervisor(sched_ctx->id)) { -// _STARPU_TRACE_HYPERVISOR_BEGIN(); +// _starpu_trace_hypervisor_begin(); perf_counters->notify_poped_task(task->sched_ctx, worker->workerid); -// _STARPU_TRACE_HYPERVISOR_END(); +// _starpu_trace_hypervisor_end(); } #endif //STARPU_USE_SC_HYPERVISOR diff --git a/src/core/sched_policy.h b/src/core/sched_policy.h index 2aa0e47bc3..278a5257d9 100644 --- a/src/core/sched_policy.h +++ b/src/core/sched_policy.h @@ -25,17 +25,18 @@ #include #include #include +#include #include #pragma GCC visibility push(hidden) #define _STARPU_SCHED_BEGIN \ - _STARPU_TRACE_WORKER_SCHEDULING_PUSH; \ + _starpu_trace_worker_scheduling_push(); \ _SIMGRID_TIMER_BEGIN(_starpu_simgrid_sched_cost()) #define _STARPU_SCHED_END \ _SIMGRID_TIMER_END; \ - _STARPU_TRACE_WORKER_SCHEDULING_POP + _starpu_trace_worker_scheduling_pop() void _starpu_sched_init(void); diff --git a/src/core/task.c b/src/core/task.c index 9049a50e93..a4a975c493 100644 --- a/src/core/task.c +++ b/src/core/task.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -562,7 +561,7 @@ int starpu_task_wait(struct starpu_task *task) struct _starpu_job *j = _starpu_get_job_associated_to_task(task); - _STARPU_TRACE_TASK_WAIT_START(j); + _starpu_trace_task_wait_start(j); starpu_do_schedule(); _starpu_wait_job(j); @@ -573,7 +572,7 @@ int starpu_task_wait(struct starpu_task *task) _starpu_task_destroy(task); _starpu_perf_counter_update_global_sample(); - _STARPU_TRACE_TASK_WAIT_END(); + _starpu_trace_task_wait_end(); _STARPU_LOG_OUT(); return 0; } @@ -669,9 +668,9 @@ int _starpu_submit_job(struct _starpu_job *j, int nodeps) } } - _STARPU_TRACE_HYPERVISOR_BEGIN(); + _starpu_trace_hypervisor_begin(); sched_ctx->perf_counters->notify_submitted_job(j->task, j->footprint, data_size); - _STARPU_TRACE_HYPERVISOR_END(); + _starpu_trace_hypervisor_end(); } #endif//STARPU_USE_SC_HYPERVISOR @@ -960,7 +959,7 @@ static int _starpu_task_submit_head(struct starpu_task *task) { unsigned i; unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); - _STARPU_TRACE_UPDATE_TASK_CNT(0); + _starpu_trace_update_task_cnt(0); /* Check buffers */ if (task->dyn_handles == NULL) @@ -1105,13 +1104,13 @@ int _starpu_task_submit(struct starpu_task *task, int nodeps) && limit_min_submitted_tasks < nsubmitted_tasks) { starpu_do_schedule(); - _STARPU_TRACE_TASK_THROTTLE_START(); + _starpu_trace_task_throttle_start(); starpu_task_wait_for_n_submitted(limit_min_submitted_tasks); - _STARPU_TRACE_TASK_THROTTLE_END(); + _starpu_trace_task_throttle_end(); } } - _STARPU_TRACE_TASK_SUBMIT_START(); + _starpu_trace_task_submit_start(); if (task->cl && !continuation) { @@ -1121,7 +1120,7 @@ int _starpu_task_submit(struct starpu_task *task, int nodeps) ret = _starpu_task_submit_head(task); if (ret) { - _STARPU_TRACE_TASK_SUBMIT_END(); + _starpu_trace_task_submit_end(); return ret; } @@ -1134,10 +1133,10 @@ int _starpu_task_submit(struct starpu_task *task, int nodeps) #endif task->iterations[0] = _starpu_get_sched_ctx_struct(task->sched_ctx)->iterations[0]; task->iterations[1] = _starpu_get_sched_ctx_struct(task->sched_ctx)->iterations[1]; - _STARPU_TRACE_TASK_SUBMIT(j, task->iterations[0], task->iterations[1]); - _STARPU_TRACE_TASK_COLOR(j); - _STARPU_TRACE_TASK_NAME(j); - _STARPU_TRACE_TASK_LINE(j); + _starpu_trace_task_submit(j, task->iterations[0], task->iterations[1]); + _starpu_trace_task_color(j); + _starpu_trace_task_name(j); + _starpu_trace_task_line(j); } /* If this is a continuation, we don't modify the implicit data dependencies detected earlier. */ @@ -1214,7 +1213,7 @@ int _starpu_task_submit(struct starpu_task *task, int nodeps) _starpu_task_destroy(task); } - _STARPU_TRACE_TASK_SUBMIT_END(); + _starpu_trace_task_submit_end(); _STARPU_LOG_OUT(); return ret; } @@ -1438,9 +1437,9 @@ int starpu_task_wait_for_all(void) int _starpu_task_wait_for_all_in_ctx_and_return_nb_waited_tasks(unsigned sched_ctx) { - _STARPU_TRACE_TASK_WAIT_FOR_ALL_START(); + _starpu_trace_task_wait_for_all_start(); int ret = _starpu_wait_for_all_tasks_of_sched_ctx(sched_ctx); - _STARPU_TRACE_TASK_WAIT_FOR_ALL_END(); + _starpu_trace_task_wait_for_all_end(); /* TODO: improve Temanejo into knowing about contexts ... */ STARPU_AYU_BARRIER(); return ret; diff --git a/src/core/task.h b/src/core/task.h index a0ab87245c..20085adf0c 100644 --- a/src/core/task.h +++ b/src/core/task.h @@ -24,6 +24,10 @@ #include #include +#ifdef BUILDING_STARPU +#include +#endif + #ifdef __cplusplus extern "C" { #endif diff --git a/src/core/topology.h b/src/core/topology.h index 7b47970be1..bcfb1da3d2 100644 --- a/src/core/topology.h +++ b/src/core/topology.h @@ -22,7 +22,6 @@ #include #include #include -#include #include #pragma GCC visibility push(hidden) diff --git a/src/core/workers.c b/src/core/workers.c index 71e9bb1244..56b81ae1d8 100644 --- a/src/core/workers.c +++ b/src/core/workers.c @@ -35,8 +35,8 @@ #include #include #include +#include #include -#include #include #include #include @@ -921,25 +921,15 @@ static void _starpu_worker_deinit(struct _starpu_worker *workerarg) _starpu_perf_counter_sample_exit(&workerarg->perf_counter_sample); } -#ifdef STARPU_USE_FXT -void _starpu_worker_start(struct _starpu_worker *worker, enum starpu_worker_archtype archtype, unsigned sync) -{ - unsigned devid = worker->devid; - unsigned memnode = worker->memory_node; - _STARPU_TRACE_WORKER_INIT_START(archtype, worker->workerid, devid, memnode, worker->bindid, sync); -} -#endif - void _starpu_driver_start(struct _starpu_worker *worker, enum starpu_worker_archtype archtype, unsigned sync STARPU_ATTRIBUTE_UNUSED) { (void) archtype; int devid = worker->devid; (void) devid; -#ifdef STARPU_USE_FXT - _STARPU_TRACE_REGISTER_THREAD(worker->bindid); - _starpu_worker_start(worker, archtype, sync); -#endif + _starpu_trace_register_thread(worker->bindid); + _starpu_trace_worker_init_start(worker, archtype, sync); + _starpu_set_local_worker_key(worker); STARPU_PTHREAD_MUTEX_LOCK(&worker->mutex); @@ -1452,9 +1442,7 @@ void _starpu_handler(int sig) #ifdef STARPU_VERBOSE _STARPU_MSG("Catching signal '%d'\n", sig); #endif -#ifdef STARPU_USE_FXT - _starpu_fxt_dump_file(); -#endif + _starpu_trace_finalize(); if (sig == SIGINT) { void (*sig_act)(int) = act_sigint; @@ -1829,9 +1817,9 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv) _starpu_init_perfmodel(); -#ifdef STARPU_USE_FXT - _starpu_fxt_init_profiling(_starpu_config.conf.trace_buffer_size); -#endif + /* Initialize the profiling tool(s) */ + + _starpu_trace_initialize(); _starpu_open_debug_logfile(); @@ -1876,9 +1864,7 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv) STARPU_PTHREAD_COND_SIGNAL(&init_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex); -#ifdef STARPU_USE_FXT - _starpu_stop_fxt_profiling(); -#endif + _starpu_trace_finalize(); return ret; } @@ -1966,12 +1952,6 @@ int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv) fflush(stdout); } -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info_init(starpu_prof_tool_event_init_end, 0, starpu_prof_tool_driver_cpu, &(_starpu_config.conf)); - pi.conf = &_starpu_config.conf; - starpu_prof_tool_callbacks.starpu_prof_tool_event_init_end(&pi, NULL, NULL); -#endif - return 0; } @@ -2035,10 +2015,7 @@ static void _starpu_terminate_workers(struct _starpu_machine_config *pconfig) STARPU_ASSERT(_starpu_ctx_change_list_empty(&worker->ctx_change_list)); } -#ifdef STARPU_PROF_TOOL - struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info_init(starpu_prof_tool_event_terminate, 0, starpu_prof_tool_driver_cpu, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_terminate(&pi, NULL, NULL); -#endif + _starpu_trace_finalize(); } /* Condition variable and mutex used to pause/resume. */ @@ -2226,8 +2203,6 @@ void starpu_shutdown(void) for (worker = 0; worker < _starpu_config.topology.nworkers; worker++) _starpu_worker_deinit(&_starpu_config.workers[worker]); - _starpu_prof_tool_unload(); - _starpu_profiling_terminate(); _starpu_disk_unregister(); @@ -2237,9 +2212,8 @@ void starpu_shutdown(void) #endif _starpu_destroy_topology(&_starpu_config); _starpu_initialized_combined_workers = 0; -#ifdef STARPU_USE_FXT - _starpu_stop_fxt_profiling(); -#endif + + _starpu_trace_finalize(); _starpu_data_interface_shutdown(); diff --git a/src/core/workers.h b/src/core/workers.h index dc4c320a7b..1c029258d7 100644 --- a/src/core/workers.h +++ b/src/core/workers.h @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/datawizard/coherency.c b/src/datawizard/coherency.c index d9c6074c98..4ecdfc9922 100644 --- a/src/datawizard/coherency.c +++ b/src/datawizard/coherency.c @@ -205,11 +205,11 @@ void _starpu_update_data_state(starpu_data_handle_t handle, /* The mapping node will be kept up to date */ continue; if (handle->per_node[node].state != STARPU_INVALID) - _STARPU_TRACE_DATA_STATE_INVALID(handle, node); + _starpu_trace_data_state_invalid(&handle, node); handle->per_node[node].state = STARPU_INVALID; } if (requesting_replicate->state != STARPU_OWNER) - _STARPU_TRACE_DATA_STATE_OWNER(handle, requesting_node); + _starpu_trace_data_state_owner(&handle, requesting_node); requesting_replicate->state = STARPU_OWNER; if (handle->home_node != -1 && handle->per_node[handle->home_node].state == STARPU_INVALID) /* Notify that this MC is now dirty */ @@ -228,12 +228,12 @@ void _starpu_update_data_state(starpu_data_handle_t handle, if (replicate->state != STARPU_INVALID) { if (replicate->state != STARPU_SHARED) - _STARPU_TRACE_DATA_STATE_SHARED(handle, node); + _starpu_trace_data_state_shared(&handle, node); replicate->state = STARPU_SHARED; } } if (requesting_replicate->state != STARPU_SHARED) - _STARPU_TRACE_DATA_STATE_SHARED(handle, requesting_node); + _starpu_trace_data_state_shared(&handle, requesting_node); requesting_replicate->state = STARPU_SHARED; } } @@ -1159,10 +1159,10 @@ int _starpu_fetch_task_input(struct starpu_task *task, struct _starpu_job *j, in worker->task_transferring = task; worker->nb_buffers_transferred = 0; if (worker->ntasks <= 1) - _STARPU_TRACE_WORKER_START_FETCH_INPUT(NULL, workerid); + _starpu_trace_worker_start_fetch_input(NULL, workerid); } else - _STARPU_TRACE_START_FETCH_INPUT(NULL); + _starpu_trace_start_fetch_input(NULL); int profiling = starpu_profiling_status_get(); if (profiling && task->profiling_info) @@ -1252,7 +1252,7 @@ int _starpu_fetch_task_input(struct starpu_task *task, struct _starpu_job *j, in return 0; enomem: - _STARPU_TRACE_END_FETCH_INPUT(NULL); + _starpu_trace_end_fetch_input(NULL); _STARPU_DISP("something went wrong with buffer %u\n", index); /* try to unreference all the input that were successfully taken */ @@ -1355,12 +1355,12 @@ void _starpu_fetch_task_input_tail(struct starpu_task *task, struct _starpu_job total_size += _starpu_data_get_size(handle); #endif } - _STARPU_TRACE_DATA_LOAD(workerid,total_size); + _starpu_trace_data_load(workerid,total_size); if (profiling && task->profiling_info) _starpu_clock_gettime(&task->profiling_info->acquire_data_end_time); - _STARPU_TRACE_END_FETCH_INPUT(NULL); + _starpu_trace_end_fetch_input(NULL); _starpu_clear_worker_status(worker, STATUS_INDEX_WAITING, NULL); } @@ -1435,9 +1435,9 @@ void __starpu_push_task_output(struct _starpu_job *j) /* Version for a driver running on a worker: we show the driver state in the trace */ void _starpu_push_task_output(struct _starpu_job *j) { - _STARPU_TRACE_START_PUSH_OUTPUT(NULL); + _starpu_trace_start_push_output(NULL); __starpu_push_task_output(j); - _STARPU_TRACE_END_PUSH_OUTPUT(NULL); + _starpu_trace_end_push_output(NULL); } struct fetch_nowhere_wrapper diff --git a/src/datawizard/coherency.h b/src/datawizard/coherency.h index 8002ecb01b..b22135b289 100644 --- a/src/datawizard/coherency.h +++ b/src/datawizard/coherency.h @@ -25,7 +25,6 @@ #include #include #include -#include #include #include diff --git a/src/datawizard/copy_driver.c b/src/datawizard/copy_driver.c index cdb234b4cd..4236ba173c 100644 --- a/src/datawizard/copy_driver.c +++ b/src/datawizard/copy_driver.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -359,11 +358,11 @@ int STARPU_ATTRIBUTE_WARN_UNUSED_RESULT _starpu_driver_copy_data_1_to_1(starpu_d dst_replicate->initialized = 1; - _STARPU_TRACE_START_DRIVER_COPY(src_node, dst_node, size, com_id, prefetch, handle); + _starpu_trace_start_driver_copy(src_node, dst_node, size, com_id, prefetch, &handle); int ret_copy = copy_data_1_to_1_generic(handle, src_replicate, dst_replicate, req); if (!req) /* Synchronous, this is already finished */ - _STARPU_TRACE_END_DRIVER_COPY(src_node, dst_node, size, com_id, prefetch); + _starpu_trace_end_driver_copy(src_node, dst_node, size, com_id, prefetch); return ret_copy; } @@ -373,13 +372,13 @@ int STARPU_ATTRIBUTE_WARN_UNUSED_RESULT _starpu_driver_copy_data_1_to_1(starpu_d void starpu_interface_data_copy(unsigned src_node, unsigned dst_node, size_t size) { - _STARPU_TRACE_DATA_COPY(src_node, dst_node, size); + _starpu_trace_data_copy(src_node, dst_node, size); } void starpu_interface_start_driver_copy_async(unsigned src_node, unsigned dst_node, double *start) { *start = starpu_timing_now(); - _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node); + _starpu_trace_start_driver_copy_async(src_node, dst_node); } void starpu_interface_start_driver_copy_async_devid(int src_dev, enum starpu_node_kind src_kind, @@ -397,7 +396,7 @@ void starpu_interface_start_driver_copy_async_devid(int src_dev, enum starpu_nod (void)dst_kind; #endif *start = starpu_timing_now(); - _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node); + _starpu_trace_start_driver_copy_async(src_node, dst_node); } void starpu_interface_end_driver_copy_async(unsigned src_node, unsigned dst_node, double start) @@ -418,7 +417,7 @@ void starpu_interface_end_driver_copy_async(unsigned src_node, unsigned dst_node _STARPU_DISP("Warning: the submission of asynchronous transfer from %s to %s took a very long time (%f ms)\nFor proper asynchronous transfer overlapping, data registered to StarPU must be allocated with starpu_malloc() or pinned with starpu_memory_pin()\n", src_name, dst_name, elapsed / 1000.); } } - _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node); + _starpu_trace_end_driver_copy_async(src_node, dst_node); } void starpu_interface_end_driver_copy_async_devid(int src_dev, enum starpu_node_kind src_kind, @@ -457,7 +456,7 @@ void starpu_interface_end_driver_copy_async_devid(int src_dev, enum starpu_node_ _STARPU_DISP("Warning: the submission of asynchronous transfer from %s to %s took a very long time (%f ms)\nFor proper asynchronous transfer overlapping, data registered to StarPU must be allocated with starpu_malloc() or pinned with starpu_memory_pin()\n", src_name, dst_name, elapsed / 1000.); } } - _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node); + _starpu_trace_end_driver_copy_async(src_node, dst_node); } /* This can be used by interfaces to easily transfer a piece of data without diff --git a/src/datawizard/data_request.c b/src/datawizard/data_request.c index d631d21e93..6450143475 100644 --- a/src/datawizard/data_request.c +++ b/src/datawizard/data_request.c @@ -153,7 +153,7 @@ struct _starpu_data_request *_starpu_create_data_request(starpu_data_handle_t ha _starpu_spin_init(&r->lock); - _STARPU_TRACE_DATA_REQUEST_CREATED(handle, src_replicate?src_replicate->memory_node:-1, dst_replicate?dst_replicate->memory_node:-1, prio, is_prefetch, r); + _starpu_trace_data_request_created(&handle, src_replicate?src_replicate->memory_node:-1, dst_replicate?dst_replicate->memory_node:-1, prio, is_prefetch, r); r->origin = origin; r->handle = handle; @@ -443,7 +443,7 @@ static void starpu_handle_data_request_completion(struct _starpu_data_request *r unsigned src_node = src_replicate->memory_node; unsigned dst_node = dst_replicate->memory_node; size_t size = _starpu_data_get_size(handle); - _STARPU_TRACE_END_DRIVER_COPY(src_node, dst_node, size, r->com_id, r->prefetch); + _starpu_trace_end_driver_copy(src_node, dst_node, size, r->com_id, r->prefetch); } #endif diff --git a/src/datawizard/filters.c b/src/datawizard/filters.c index 0c81a895f9..97bd7bba0f 100644 --- a/src/datawizard/filters.c +++ b/src/datawizard/filters.c @@ -319,7 +319,7 @@ static void _starpu_data_partition(starpu_data_handle_t initial_handle, starpu_d * store it in the handle */ child->footprint = _starpu_compute_data_footprint(child); - _STARPU_TRACE_HANDLE_DATA_REGISTER(child); + _starpu_trace_handle_data_register(&child); } /* now let the header */ _starpu_spin_unlock(&initial_handle->header_lock); @@ -340,7 +340,7 @@ void starpu_data_unpartition(starpu_data_handle_t root_handle, unsigned gatherin unsigned node; unsigned sizes[root_handle->nchildren]; - _STARPU_TRACE_START_UNPARTITION(root_handle, gathering_node); + _starpu_trace_start_unpartition(&root_handle, gathering_node); STARPU_ASSERT_MSG(root_handle->nchildren != 0, "data %p is not partitioned, can not unpartition it", root_handle); @@ -564,7 +564,7 @@ void starpu_data_unpartition(starpu_data_handle_t root_handle, unsigned gatherin free(child_handle->switch_cl); } - _STARPU_TRACE_HANDLE_DATA_UNREGISTER(child_handle); + _starpu_trace_handle_data_unregister(&child_handle); } /* there is no child anymore */ @@ -578,7 +578,7 @@ void starpu_data_unpartition(starpu_data_handle_t root_handle, unsigned gatherin free(children); - _STARPU_TRACE_END_UNPARTITION(root_handle, gathering_node); + _starpu_trace_end_unpartition(&root_handle, gathering_node); } void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_data_filter *f) diff --git a/src/datawizard/interfaces/data_interface.c b/src/datawizard/interfaces/data_interface.c index 51c5860bd0..43f354aafe 100644 --- a/src/datawizard/interfaces/data_interface.c +++ b/src/datawizard/interfaces/data_interface.c @@ -458,7 +458,7 @@ void starpu_data_register(starpu_data_handle_t *handleptr, int home_node, void * _starpu_data_register_ops(ops); _starpu_register_new_data(handle, home_node, 0); - _STARPU_TRACE_HANDLE_DATA_REGISTER(handle); + _starpu_trace_handle_data_register(&handle); } void starpu_data_register_same(starpu_data_handle_t *handledst, starpu_data_handle_t handlesrc) @@ -892,7 +892,7 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere free(handle->switch_cl->dyn_nodes); free(handle->switch_cl); } - _STARPU_TRACE_HANDLE_DATA_UNREGISTER(handle); + _starpu_trace_handle_data_unregister(&handle); free(handle); (void)STARPU_ATOMIC_ADD(&nregistered, -1); } @@ -958,7 +958,7 @@ static void __starpu_data_deinitialize(starpu_data_handle_t handle) struct _starpu_data_replicate *local = &handle->per_node[node]; if (local->state != STARPU_INVALID) - _STARPU_TRACE_DATA_STATE_INVALID(handle, node); + _starpu_trace_data_state_invalid(&handle, node); local->state = STARPU_INVALID; local->initialized = 0; } @@ -1204,7 +1204,7 @@ size_t starpu_data_get_alloc_size(starpu_data_handle_t handle) void starpu_data_set_name(starpu_data_handle_t handle STARPU_ATTRIBUTE_UNUSED, const char *name STARPU_ATTRIBUTE_UNUSED) { - _STARPU_TRACE_DATA_NAME(handle, name); + _starpu_trace_data_name(&handle, name); } int starpu_data_get_home_node(starpu_data_handle_t handle) @@ -1224,7 +1224,7 @@ void starpu_data_set_coordinates_array(starpu_data_handle_t handle, unsigned dim for (i = 0; i < dimensions; i++) handle->coordinates[i] = dims[i]; - _STARPU_TRACE_DATA_COORDINATES(handle, dimensions, dims); + _starpu_trace_data_coordinates(&handle, dimensions, dims); } void starpu_data_set_coordinates(starpu_data_handle_t handle, unsigned dimensions, ...) diff --git a/src/datawizard/malloc.c b/src/datawizard/malloc.c index 813a02473d..74e9e118d7 100644 --- a/src/datawizard/malloc.c +++ b/src/datawizard/malloc.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -205,9 +204,9 @@ int _starpu_malloc_flags_on_node(unsigned dst_node, void **A, size_t dim, int fl size_t freed; size_t reclaim = 2 * dim; _STARPU_DEBUG("There is not enough memory left, we are going to reclaim %ld\n", (long)reclaim); - _STARPU_TRACE_START_MEMRECLAIM(dst_node,0); + _starpu_trace_start_memreclaim(dst_node,0); freed = _starpu_memory_reclaim_generic(dst_node, 0, reclaim, STARPU_FETCH); - _STARPU_TRACE_END_MEMRECLAIM(dst_node,0); + _starpu_trace_end_memreclaim(dst_node,0); if (freed < dim && !(flags & STARPU_MEMORY_WAIT)) { // We could not reclaim enough memory @@ -780,7 +779,7 @@ static uintptr_t _starpu_malloc_on_node(unsigned dst_node, size_t size, int flag if (addr == 0) { // Allocation failed, gives the memory back to the memory manager - _STARPU_TRACE_MEMORY_FULL(size); + _starpu_trace_memory_full(size); if (flags & STARPU_MALLOC_COUNT) starpu_memory_deallocate(dst_node, size); } diff --git a/src/datawizard/memalloc.c b/src/datawizard/memalloc.c index 06a593f74a..c69ef06c7a 100644 --- a/src/datawizard/memalloc.c +++ b/src/datawizard/memalloc.c @@ -341,7 +341,7 @@ static int STARPU_ATTRIBUTE_WARN_UNUSED_RESULT transfer_subtree_to_node(starpu_d /* some other node may have the copy */ if (src_replicate->state != STARPU_INVALID) - _STARPU_TRACE_DATA_STATE_INVALID(handle, src_node); + _starpu_trace_data_state_invalid(&handle, src_node); src_replicate->state = STARPU_INVALID; /* count the number of copies */ @@ -358,7 +358,7 @@ static int STARPU_ATTRIBUTE_WARN_UNUSED_RESULT transfer_subtree_to_node(starpu_d if (cnt == 1) { if (handle->per_node[last].state != STARPU_OWNER) - _STARPU_TRACE_DATA_STATE_OWNER(handle, last); + _starpu_trace_data_state_owner(&handle, last); handle->per_node[last].state = STARPU_OWNER; } @@ -434,9 +434,9 @@ static size_t free_memory_on_node(struct _starpu_mem_chunk *mc, unsigned node) data_interface = mc->chunk_interface; STARPU_ASSERT(data_interface); - _STARPU_TRACE_START_FREE(node, mc->size, handle); + _starpu_trace_start_free(node, mc->size, &handle); mc->ops->free_data_on_node(data_interface, node); - _STARPU_TRACE_END_FREE(node, handle); + _starpu_trace_end_free(node, &handle); if (handle) notify_handle_children(handle, replicate, node); @@ -666,12 +666,12 @@ static size_t try_to_throw_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node if (handle->per_node[node].state == STARPU_OWNER) _starpu_memory_handle_stats_invalidated(handle, node); #endif - _STARPU_TRACE_START_WRITEBACK(node, handle); + _starpu_trace_start_writeback(node, &handle); /* Note: this may need to allocate data etc. * and thus release the header lock, take * mc_lock, etc. */ res = transfer_subtree_to_node(handle, node, target); - _STARPU_TRACE_END_WRITEBACK(node, handle); + _starpu_trace_end_writeback(node, &handle); #ifdef STARPU_MEMORY_STATS _starpu_memory_handle_stats_loaded_owner(handle, target); #endif @@ -1233,7 +1233,7 @@ void starpu_memchunk_tidy(unsigned node) /* _STARPU_DEBUG("%d not clean: %d %d\n", node, node_struct->mc_clean_nb, node_struct->mc_nb); */ - _STARPU_TRACE_START_WRITEBACK_ASYNC(node); + _starpu_trace_start_writeback_async(node); _starpu_spin_lock(&node_struct->mc_lock); for (mc = node_struct->mc_dirty_head; @@ -1378,7 +1378,7 @@ void starpu_memchunk_tidy(unsigned node) _starpu_spin_unlock(&handle->header_lock); } _starpu_spin_unlock(&node_struct->mc_lock); - _STARPU_TRACE_END_WRITEBACK_ASYNC(node); + _starpu_trace_end_writeback_async(node); } total = starpu_memory_get_total(node); @@ -1418,9 +1418,9 @@ void starpu_memchunk_tidy(unsigned node) } } - _STARPU_TRACE_START_MEMRECLAIM(node,2); + _starpu_trace_start_memreclaim(node,2); free_potentially_in_use_mc(node, 0, amount, STARPU_PREFETCH); - _STARPU_TRACE_END_MEMRECLAIM(node,2); + _starpu_trace_end_memreclaim(node,2); out: (void) STARPU_ATOMIC_ADD(&node_struct->tidying, -1); } @@ -1587,16 +1587,16 @@ static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, st #ifdef STARPU_USE_ALLOCATION_CACHE if (!prefetch_oom) - _STARPU_TRACE_START_ALLOC_REUSE(dst_node, data_size, handle, is_prefetch); + _starpu_trace_start_alloc_reuse(dst_node, data_size, &handle, is_prefetch); if (try_to_find_reusable_mc(dst_node, handle, replicate, footprint)) { _starpu_allocation_cache_hit(dst_node); if (!prefetch_oom) - _STARPU_TRACE_END_ALLOC_REUSE(dst_node, handle, 1); + _starpu_trace_end_alloc_reuse(dst_node, &handle, 1); return data_size; } if (!prefetch_oom) - _STARPU_TRACE_END_ALLOC_REUSE(dst_node, handle, 0); + _starpu_trace_end_alloc_reuse(dst_node, &handle, 0); #endif /* If this is RAM and pinned this will be slow @@ -1624,11 +1624,11 @@ static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, st do { if (!prefetch_oom) - _STARPU_TRACE_START_ALLOC(dst_node, data_size, handle, is_prefetch); + _starpu_trace_start_alloc(dst_node, data_size, &handle, is_prefetch); allocated_memory = handle->ops->allocate_data_on_node(data_interface, dst_node); if (!prefetch_oom) - _STARPU_TRACE_END_ALLOC(dst_node, handle, allocated_memory); + _starpu_trace_end_alloc(dst_node, &handle, allocated_memory); if (allocated_memory == -ENOMEM) { @@ -1673,9 +1673,9 @@ static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, st told_reclaiming = 1; } /* That was not enough, we have to really reclaim */ - _STARPU_TRACE_START_MEMRECLAIM(dst_node,is_prefetch); + _starpu_trace_start_memreclaim(dst_node,is_prefetch); freed = _starpu_memory_reclaim_generic(dst_node, 0, reclaim, is_prefetch); - _STARPU_TRACE_END_MEMRECLAIM(dst_node,is_prefetch); + _starpu_trace_end_memreclaim(dst_node,is_prefetch); if (!freed && is_prefetch >= STARPU_FETCH) { @@ -1727,9 +1727,9 @@ static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, st else if (replicate->allocated) { /* Argl, somebody allocated it in between already, drop this one */ - _STARPU_TRACE_START_FREE(dst_node, data_size, handle); + _starpu_trace_start_free(dst_node, data_size, &handle); handle->ops->free_data_on_node(data_interface, dst_node); - _STARPU_TRACE_END_FREE(dst_node, handle); + _starpu_trace_end_free(dst_node, &handle); allocated_memory = 0; } else diff --git a/src/datawizard/memory_manager.c b/src/datawizard/memory_manager.c index 699b0cc7d1..a9e8e973b4 100644 --- a/src/datawizard/memory_manager.c +++ b/src/datawizard/memory_manager.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -101,7 +100,7 @@ int starpu_memory_allocate(unsigned node, size_t size, int flags) /* And take it */ node_struct->used_size += size; - _STARPU_TRACE_USED_MEM(node, node_struct->used_size); + _starpu_trace_used_mem(node, node_struct->used_size); ret = 0; } else if (flags & STARPU_MEMORY_OVERFLOW @@ -109,7 +108,7 @@ int starpu_memory_allocate(unsigned node, size_t size, int flags) || node_struct->used_size + size <= node_struct->global_size) { node_struct->used_size += size; - _STARPU_TRACE_USED_MEM(node, node_struct->used_size); + _starpu_trace_used_mem(node, node_struct->used_size); ret = 0; } else @@ -126,7 +125,7 @@ void starpu_memory_deallocate(unsigned node, size_t size) STARPU_PTHREAD_MUTEX_LOCK(&node_struct->lock_nodes); node_struct->used_size -= size; - _STARPU_TRACE_USED_MEM(node, node_struct->used_size); + _starpu_trace_used_mem(node, node_struct->used_size); /* If there's now room for waiters, wake them */ if (node_struct->waiting_size && diff --git a/src/datawizard/memory_nodes.c b/src/datawizard/memory_nodes.c index 6f80fb6e01..af8398afb3 100644 --- a/src/datawizard/memory_nodes.c +++ b/src/datawizard/memory_nodes.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include @@ -120,7 +120,7 @@ unsigned _starpu_memory_node_register(enum starpu_node_kind kind, int devid) STARPU_ASSERT_MSG(node < STARPU_MAXNODES,"Too many nodes (%u) for maximum %d. Use configure option --enable-maxnodes=xxx to update the maximum number of nodes.", node + 1, STARPU_MAXNODES); _starpu_descr.nodes[node] = kind; - _STARPU_TRACE_NEW_MEM_NODE(node); + _starpu_trace_new_mem_node(node); _starpu_descr.devid[node] = devid; _starpu_descr.node_ops[node] = node_ops; diff --git a/src/datawizard/user_interactions.c b/src/datawizard/user_interactions.c index 78f520e727..65b96736fe 100644 --- a/src/datawizard/user_interactions.c +++ b/src/datawizard/user_interactions.c @@ -669,7 +669,7 @@ static void _starpu_data_wont_use(void *data) unsigned node; starpu_data_handle_t handle = data; - _STARPU_TRACE_DATA_DOING_WONT_USE(handle); + _starpu_trace_data_doing_wont_use(&handle); _starpu_spin_lock(&handle->header_lock); for (node = 0; node < STARPU_MAXNODES; node++) @@ -742,7 +742,7 @@ void starpu_data_wont_use(starpu_data_handle_t handle) return; } - _STARPU_TRACE_DATA_WONT_USE(handle); + _starpu_trace_data_wont_use(&handle); starpu_data_acquire_on_node_cb_sequential_consistency_quick(handle, STARPU_ACQUIRE_NO_NODE_LOCK_ALL, STARPU_R, _starpu_data_wont_use, handle, 1, 1); } diff --git a/src/debug/traces/starpu_fxt.h b/src/debug/traces/starpu_fxt.h index b648192105..a1a745b2db 100644 --- a/src/debug/traces/starpu_fxt.h +++ b/src/debug/traces/starpu_fxt.h @@ -36,7 +36,7 @@ #include #include -#include +#include #include #include "../mpi/src/starpu_mpi_fxt.h" #include diff --git a/src/drivers/cpu/driver_cpu.c b/src/drivers/cpu/driver_cpu.c index 18664849c0..83255293d2 100644 --- a/src/drivers/cpu/driver_cpu.c +++ b/src/drivers/cpu/driver_cpu.c @@ -46,7 +46,7 @@ #include #include #include -#include +#include #ifdef STARPU_HAVE_HWLOC #include @@ -177,14 +177,6 @@ static int _starpu_cpu_driver_init(struct _starpu_worker *cpu_worker) { int devid = cpu_worker->devid; -#ifdef STARPU_PROF_TOOL - struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init, devid, cpu_worker->workerid, starpu_prof_tool_driver_cpu, -1, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init(&pi, NULL, NULL); - - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init_start, devid, cpu_worker->workerid, starpu_prof_tool_driver_cpu, -1, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_start(&pi, NULL, NULL); -#endif - _starpu_driver_start(cpu_worker, STARPU_CPU_WORKER, 1); snprintf(cpu_worker->name, sizeof(cpu_worker->name), "CPU %d", devid); snprintf(cpu_worker->short_name, sizeof(cpu_worker->short_name), "CPU %d", devid); @@ -198,7 +190,8 @@ static int _starpu_cpu_driver_init(struct _starpu_worker *cpu_worker) STARPU_ASSERT(status == 0); } #endif - _STARPU_TRACE_WORKER_INIT_END(cpu_worker->workerid); + int rc = _starpu_trace_worker_init_end(cpu_worker, STARPU_CPU_WORKER); + (void) rc; STARPU_PTHREAD_MUTEX_LOCK_SCHED(&cpu_worker->sched_mutex); cpu_worker->status = STATUS_UNKNOWN; @@ -210,16 +203,15 @@ static int _starpu_cpu_driver_init(struct _starpu_worker *cpu_worker) STARPU_PTHREAD_COND_SIGNAL(&cpu_worker->ready_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&cpu_worker->mutex); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init_end, devid, cpu_worker->workerid, starpu_prof_tool_driver_cpu, -1, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_end(&pi, NULL, NULL); -#endif return 0; } static int _starpu_cpu_driver_deinit(struct _starpu_worker *cpu_worker) { - _STARPU_TRACE_WORKER_DEINIT_START; + int rc; + + rc = _starpu_trace_worker_deinit_start(); + (void) rc; unsigned memnode = cpu_worker->memory_node; _starpu_datawizard_handle_all_pending_node_data_requests(memnode); @@ -238,13 +230,9 @@ static int _starpu_cpu_driver_deinit(struct _starpu_worker *cpu_worker) } #endif cpu_worker->worker_is_initialized = 0; - _STARPU_TRACE_WORKER_DEINIT_END(STARPU_CPU_WORKER); -#ifdef STARPU_PROF_TOOL - int workerid = cpu_worker->workerid; - struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_deinit, workerid, workerid, starpu_prof_tool_driver_cpu, memnode, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_deinit(&pi, NULL, NULL); -#endif + rc = _starpu_trace_worker_deinit_end(cpu_worker->workerid, STARPU_CPU_WORKER); + (void) rc; return 0; } @@ -384,10 +372,7 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_ int profiling = starpu_profiling_status_get(); struct starpu_task *task = j->task; struct starpu_codelet *cl = task->cl; -#ifdef STARPU_PROF_TOOL - struct starpu_prof_tool_info pi; - int devid = cpu_args->devid; -#endif + int rc; STARPU_ASSERT(cl); @@ -415,13 +400,8 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_ STARPU_ASSERT_MSG(func, "when STARPU_CPU is defined in 'where', cpu_func or cpu_funcs has to be defined"); if (_starpu_get_disable_kernels() <= 0) { - _STARPU_TRACE_START_EXECUTING(j); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_cpu_exec, devid, worker_task->workerid, starpu_prof_tool_driver_cpu, -1, (void*)func); - pi.model_name = _starpu_job_get_model_name(j); - pi.task_name = _starpu_job_get_task_name(j); - starpu_prof_tool_callbacks.starpu_prof_tool_event_start_cpu_exec(&pi, NULL, NULL); -#endif + rc = _starpu_trace_start_executing( j, worker_task, cpu_args, func); + #ifdef STARPU_SIMGRID if (cl->flags & STARPU_CODELET_SIMGRID_EXECUTE) func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); @@ -465,13 +445,9 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_ _starpu_profiling_papi_task_stop_counters(task); #endif #endif -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_cpu_exec, devid, worker_task->workerid, starpu_prof_tool_driver_cpu, -1, (void*)func); - pi.model_name = _starpu_job_get_model_name(j); - pi.task_name = _starpu_job_get_task_name(j); - starpu_prof_tool_callbacks.starpu_prof_tool_event_end_cpu_exec(&pi, NULL, NULL); -#endif - _STARPU_TRACE_END_EXECUTING(j); + rc = _starpu_trace_end_executing(j, cpu_args); + (void) rc; + } if (is_parallel_task && cl->type == STARPU_FORKJOIN) /* rebind to single CPU */ @@ -479,29 +455,19 @@ static int execute_job_on_cpu(struct _starpu_job *j, struct starpu_task *worker_ } else { -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_cpu_exec, devid, worker_task->workerid, starpu_prof_tool_driver_cpu, -1, (void*)func); - pi.model_name = _starpu_job_get_model_name(j); - pi.task_name = _starpu_job_get_task_name(j); - starpu_prof_tool_callbacks.starpu_prof_tool_event_end_cpu_exec(&pi, NULL, NULL); -#endif - _STARPU_TRACE_START_EXECUTING(j); + rc = _starpu_trace_start_executing(j, worker_task, cpu_args, func); + (void) rc; } if (is_parallel_task) { - _STARPU_TRACE_START_PARALLEL_SYNC(j); + _starpu_trace_start_parallel_sync(j); STARPU_PTHREAD_BARRIER_WAIT(&j->after_work_barrier); - _STARPU_TRACE_END_PARALLEL_SYNC(j); + _starpu_trace_end_parallel_sync(j); if (rank != 0) { -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_cpu_exec, devid, worker_task->workerid, starpu_prof_tool_driver_cpu, -1, (void*)func); - pi.model_name = _starpu_job_get_model_name(j); - pi.task_name = _starpu_job_get_task_name(j); - starpu_prof_tool_callbacks.starpu_prof_tool_event_end_cpu_exec(&pi, NULL, NULL); -#endif - _STARPU_TRACE_END_EXECUTING(j); + rc = _starpu_trace_end_executing(j, cpu_args); + (void)rc; } } @@ -638,13 +604,11 @@ static int _starpu_cpu_driver_run_once(struct _starpu_worker *cpu_worker) { unsigned memnode = cpu_worker->memory_node; int workerid = cpu_worker->workerid; -#ifdef STARPU_PROF_TOOL - struct starpu_prof_tool_info pi; -#endif int res; struct _starpu_job *j; struct starpu_task *task = NULL, *pending_task; int rank = 0; + int rc; #ifdef STARPU_SIMGRID starpu_pthread_wait_reset(&cpu_worker->wait); @@ -656,11 +620,9 @@ static int _starpu_cpu_driver_run_once(struct _starpu_worker *cpu_worker) { int ret; STARPU_RMB(); - _STARPU_TRACE_END_PROGRESS(memnode); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info_d(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_cpu, memnode, cpu_worker->nb_buffers_totransfer, cpu_worker->nb_buffers_transferred); - starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); -#endif + rc = _starpu_trace_end_progress(memnode, cpu_worker); + (void) rc; + j = _starpu_get_job_associated_to_task(pending_task); _starpu_fetch_task_input_tail(pending_task, j, cpu_worker); @@ -668,11 +630,9 @@ static int _starpu_cpu_driver_run_once(struct _starpu_worker *cpu_worker) cpu_worker->task_transferring = NULL; ret = _starpu_cpu_driver_execute_task(cpu_worker, pending_task, j); - _STARPU_TRACE_START_PROGRESS(memnode); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info_d(starpu_prof_tool_event_start_transfer, workerid, workerid, starpu_prof_tool_driver_cpu, memnode, cpu_worker->nb_buffers_totransfer, cpu_worker->nb_buffers_transferred); - starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); -#endif + rc = _starpu_trace_start_progress(memnode, cpu_worker); + (void) rc; + return ret; } @@ -722,14 +682,9 @@ static int _starpu_cpu_driver_run_once(struct _starpu_worker *cpu_worker) _starpu_push_task_to_workers(task); return 0; } + rc = _starpu_trace_end_progress(memnode, cpu_worker); + (void) rc; -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_cpu, memnode, NULL); - /* pi.model_name = _starpu_job_get_model_name(j); - pi.task_name = _starpu_job_get_task_name(j); */ - starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); -#endif - _STARPU_TRACE_END_PROGRESS(memnode); /* Get the rank in case it is a parallel task */ if (j->task_size > 1) { @@ -758,46 +713,33 @@ static int _starpu_cpu_driver_run_once(struct _starpu_worker *cpu_worker) else { int ret = _starpu_cpu_driver_execute_task(cpu_worker, task, j); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, cpu_worker->workerid, starpu_prof_tool_driver_cpu, memnode, NULL); - /* pi.model_name = _starpu_job_get_model_name(j); - pi.task_name = _starpu_job_get_task_name(j); */ - starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); -#endif - _STARPU_TRACE_END_PROGRESS(memnode); + rc = _starpu_trace_end_transfer(memnode, cpu_worker); + (void) rc; return ret; } -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, cpu_worker->workerid, starpu_prof_tool_driver_cpu, memnode, NULL); - /* pi.model_name = _starpu_job_get_model_name(j); - pi.task_name = _starpu_job_get_task_name(j); */ - starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); -#endif - _STARPU_TRACE_END_PROGRESS(memnode); + rc = _starpu_trace_end_transfer(memnode, cpu_worker); + (void) rc; + return 0; } static void *_starpu_cpu_worker(void *arg) { struct _starpu_worker *worker = arg; + unsigned memnode = worker->memory_node; + int rc; _starpu_cpu_driver_init(worker); - _STARPU_TRACE_START_PROGRESS(worker->memory_node); -#ifdef STARPU_PROF_TOOL - struct starpu_prof_tool_info pi; - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_cpu, worker->memory_node, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); -#endif + rc = _starpu_trace_start_transfer(memnode, worker); + (void) rc; + while (_starpu_machine_is_running()) { _starpu_may_pause(); _starpu_cpu_driver_run_once(worker); } - _STARPU_TRACE_END_PROGRESS(worker->memory_node); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_cpu, worker->memory_node, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); -#endif + rc = _starpu_trace_end_transfer(memnode, worker); + (void) rc; _starpu_cpu_driver_deinit(worker); return NULL; diff --git a/src/drivers/cuda/driver_cuda.c b/src/drivers/cuda/driver_cuda.c index ba77f90409..db1a68e0da 100644 --- a/src/drivers/cuda/driver_cuda.c +++ b/src/drivers/cuda/driver_cuda.c @@ -50,7 +50,8 @@ #include #include #include -#include +#include +#include #ifdef STARPU_SIMGRID #include @@ -1010,31 +1011,18 @@ static int _starpu_cuda_driver_init(struct _starpu_worker *worker) struct _starpu_worker *worker0 = &worker_set->workers[0]; int lastdevid = -1; unsigned i; -#ifdef STARPU_PROF_TOOL - struct starpu_prof_tool_info pi; -#endif _starpu_driver_start(worker0, STARPU_CUDA_WORKER, 0); _starpu_set_local_worker_set_key(worker_set); -#ifdef STARPU_USE_FXT for (i = 1; i < worker_set->nworkers; i++) - _starpu_worker_start(&worker_set->workers[i], STARPU_CUDA_WORKER, 0); -#endif - + _starpu_trace_worker_init_start(&worker_set->workers[i], STARPU_CUDA_WORKER, 0); for (i = 0; i < worker_set->nworkers; i++) { worker = &worker_set->workers[i]; unsigned devid = worker->devid; unsigned memnode = worker->memory_node; -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init, devid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init(&pi, NULL, NULL); - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init_start, devid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_start(&pi, NULL, NULL); -#endif - if ((int) devid == lastdevid) { #ifdef STARPU_SIMGRID @@ -1108,11 +1096,8 @@ static int _starpu_cuda_driver_init(struct _starpu_worker *worker) #endif init_worker_context(workerid, worker->devid); - _STARPU_TRACE_WORKER_INIT_END(workerid); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init_end, devid, worker->workerid, starpu_prof_tool_driver_gpu, 0, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_end(&pi, NULL, NULL); -#endif + int rc = _starpu_trace_worker_init_end(worker, STARPU_CUDA_WORKER); + (void) rc; } { char thread_name[16]; @@ -1141,7 +1126,7 @@ static int _starpu_cuda_driver_deinit(struct _starpu_worker *worker) struct _starpu_worker_set *worker_set = worker->set; int lastdevid = -1; unsigned i; - _STARPU_TRACE_WORKER_DEINIT_START; + _starpu_trace_worker_deinit_start(); for (i = 0; i < worker_set->nworkers; i++) { @@ -1185,15 +1170,10 @@ static int _starpu_cuda_driver_deinit(struct _starpu_worker *worker) unsigned memnode = worker->memory_node; deinit_worker_context(workerid, worker->devid); - -#ifdef STARPU_PROF_TOOL - struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_deinit, workerid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_deinit(&pi, NULL, NULL); -#endif + _starpu_trace_worker_deinit_end(workerid, STARPU_CUDA_WORKER); } worker_set->workers[0].worker_is_initialized = 0; - _STARPU_TRACE_WORKER_DEINIT_END(STARPU_CUDA_WORKER); return 0; } @@ -2152,9 +2132,6 @@ static void start_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *work struct starpu_task *task = j->task; int profiling = starpu_profiling_status_get(); -#if !defined(STARPU_SIMGRID) && defined(STARPU_PROF_TOOL) - struct starpu_prof_tool_info pi; -#endif STARPU_ASSERT(task); struct starpu_codelet *cl = task->cl; @@ -2180,7 +2157,8 @@ static void start_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *work if (_starpu_get_disable_kernels() <= 0) { - _STARPU_TRACE_START_EXECUTING(j); + _starpu_trace_start_executing(j, task, worker, func); + #ifdef STARPU_SIMGRID int async = task->cl->cuda_flags[j->nimpl] & STARPU_CUDA_ASYNC; unsigned workerid = worker->workerid; @@ -2210,24 +2188,11 @@ static void start_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *work } #endif -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_gpu_exec, worker->devid, worker->workerid, starpu_prof_tool_driver_gpu, -1, (void*)func); - pi.model_name = _starpu_job_get_model_name(j); - pi.task_name = _starpu_job_get_task_name(j); - starpu_prof_tool_callbacks.starpu_prof_tool_event_start_gpu_exec(&pi, NULL, NULL); -#endif func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_gpu_exec, worker->devid, worker->workerid, starpu_prof_tool_driver_gpu, -1, (void*)func); - pi.model_name = _starpu_job_get_model_name(j); - pi.task_name = _starpu_job_get_task_name(j); - starpu_prof_tool_callbacks.starpu_prof_tool_event_end_gpu_exec(&pi, NULL, NULL); -#endif - #endif - _STARPU_TRACE_END_EXECUTING(j); + _starpu_trace_end_executing(j, worker); } } @@ -2335,9 +2300,6 @@ static int _starpu_cuda_driver_run_once(struct _starpu_worker *worker) struct _starpu_worker *worker0 = &worker_set->workers[0]; struct starpu_task *tasks[worker_set->nworkers], *task; struct _starpu_job *j; -#ifdef STARPU_PROF_TOOL - struct starpu_prof_tool_info pi; -#endif int i, res; int idle_tasks, idle_transfers; @@ -2372,13 +2334,7 @@ static int _starpu_cuda_driver_run_once(struct _starpu_worker *worker) if (task && worker->nb_buffers_transferred == worker->nb_buffers_totransfer) { STARPU_RMB(); - _STARPU_TRACE_END_PROGRESS(memnode); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info_d(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_gpu, memnode, worker->nb_buffers_totransfer, worker->nb_buffers_transferred); - /* pi.model_name = _starpu_job_get_model_name(j); - pi.task_name = _starpu_job_get_task_name(j); */ - starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); -#endif + _starpu_trace_end_progress(memnode, worker); j = _starpu_get_job_associated_to_task(task); _starpu_set_local_worker_key(worker); @@ -2397,13 +2353,7 @@ static int _starpu_cuda_driver_run_once(struct _starpu_worker *worker) { execute_job_on_cuda(task, worker); } - _STARPU_TRACE_START_PROGRESS(memnode); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info_d(starpu_prof_tool_event_start_transfer, worker->workerid, workerid, starpu_prof_tool_driver_gpu, memnode, worker->nb_buffers_totransfer, worker->nb_buffers_transferred); - /* pi.model_name = _starpu_job_get_model_name(j); - pi.task_name = _starpu_job_get_task_name(j); */ - starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); -#endif + _starpu_trace_start_progress(memnode, worker); } /* Then test for termination of queued tasks */ @@ -2432,13 +2382,7 @@ static int _starpu_cuda_driver_run_once(struct _starpu_worker *worker) else #endif /* !STARPU_SIMGRID */ { -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_gpu, memnode, NULL); - /* pi.model_name = _starpu_job_get_model_name(j); - pi.task_name = _starpu_job_get_task_name(j); */ - starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); -#endif - _STARPU_TRACE_END_PROGRESS(memnode); + _starpu_trace_end_progress(memnode, worker); /* Asynchronous task completed! */ _starpu_set_local_worker_key(worker); finish_job_on_cuda(_starpu_get_job_associated_to_task(task), worker); @@ -2461,24 +2405,17 @@ static int _starpu_cuda_driver_run_once(struct _starpu_worker *worker) * flushing the pipeline, we can now at * last execute it. */ - _STARPU_TRACE_EVENT("sync_task"); + _starpu_trace_event("sync_task"); execute_job_on_cuda(task, worker); - _STARPU_TRACE_EVENT("end_sync_task"); + _starpu_trace_event("end_sync_task"); worker->pipeline_stuck = 0; } } else /* Data for next task didn't have time to finish transferring :/ */ - _STARPU_TRACE_WORKER_START_FETCH_INPUT(NULL, workerid); + _starpu_trace_worker_start_fetch_input(NULL, workerid); } - _STARPU_TRACE_START_PROGRESS(memnode); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); - /* pi.model_name = _starpu_job_get_model_name(j); - pi.task_name = _starpu_job_get_task_name(j); */ - starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); -#endif - + _starpu_trace_start_progress(memnode, worker); } if (!worker->pipeline_length || worker->ntasks < worker->pipeline_length) @@ -2526,24 +2463,12 @@ static int _starpu_cuda_driver_run_once(struct _starpu_worker *worker) } /* Fetch data asynchronously */ -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); - /* pi.model_name = _starpu_job_get_model_name(j); - pi.task_name = _starpu_job_get_task_name(j); */ - starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); -#endif - _STARPU_TRACE_END_PROGRESS(memnode); + _starpu_trace_end_progress(memnode, worker); _starpu_set_local_worker_key(worker); res = _starpu_fetch_task_input(task, j, 1); STARPU_ASSERT(res == 0); - _STARPU_TRACE_START_PROGRESS(memnode); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); - /* pi.model_name = _starpu_job_get_model_name(j); - pi.task_name = _starpu_job_get_task_name(j); */ - starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); -#endif - // _STARPU_TRACE_END_PROGRESS(memnode); + _starpu_trace_start_progress(memnode, worker); + // _starpu_trace_end_progress(memnode, worker); } return 0; @@ -2553,19 +2478,12 @@ void *_starpu_cuda_worker(void *_arg) { struct _starpu_worker *worker = _arg; struct _starpu_worker_set* worker_set = worker->set; -#ifdef STARPU_PROF_TOOL - struct starpu_prof_tool_info pi; -#endif unsigned i; _starpu_cuda_driver_init(worker); for (i = 0; i < worker_set->nworkers; i++) { -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker_set->workers[i].workerid, worker_set->workers[i].workerid, starpu_prof_tool_driver_gpu, worker_set->workers[i].memory_node, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); -#endif - _STARPU_TRACE_START_PROGRESS(worker_set->workers[i].memory_node); + _starpu_trace_start_progress(worker_set->workers[i].memory_node, worker); } while (_starpu_machine_is_running()) { @@ -2574,12 +2492,7 @@ void *_starpu_cuda_worker(void *_arg) } for (i = 0; i < worker_set->nworkers; i++) { - _STARPU_TRACE_END_PROGRESS(worker_set->workers[i].memory_node); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, worker_set->workers[i].workerid, worker_set->workers[i].workerid, starpu_prof_tool_driver_gpu, worker_set->workers[i].memory_node, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); -#endif - + _starpu_trace_end_progress(worker_set->workers[i].memory_node, worker); } _starpu_cuda_driver_deinit(worker); diff --git a/src/drivers/cuda/driver_cuda0.c b/src/drivers/cuda/driver_cuda0.c index c68c1098e4..36e209f475 100644 --- a/src/drivers/cuda/driver_cuda0.c +++ b/src/drivers/cuda/driver_cuda0.c @@ -326,7 +326,7 @@ static int _starpu_cuda_driver_init(struct _starpu_worker *worker) snprintf(worker->short_name, sizeof(worker->short_name), "CUDA %u", devid); _STARPU_DEBUG("cuda (%s) dev id %u thread is ready to run on CPU %d !\n", devname, devid, worker->bindid); - _STARPU_TRACE_WORKER_INIT_END(workerid); + _starpu_trace_worker_init_end(workerid); { char thread_name[16]; @@ -346,7 +346,7 @@ static int _starpu_cuda_driver_init(struct _starpu_worker *worker) static int _starpu_cuda_driver_deinit(struct _starpu_worker *worker) { - _STARPU_TRACE_WORKER_DEINIT_START; + _starpu_trace_worker_deinit_start; unsigned devid = worker->devid; unsigned memnode = worker->memory_node; @@ -364,7 +364,7 @@ static int _starpu_cuda_driver_deinit(struct _starpu_worker *worker) deinit_device_context(devid); worker->worker_is_initialized = 0; - _STARPU_TRACE_WORKER_DEINIT_END(STARPU_CUDA_WORKER); + _starpu_trace_worker_deinit_end(STARPU_CUDA_WORKER); return 0; } @@ -617,9 +617,9 @@ static int start_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *worke if (_starpu_get_disable_kernels() <= 0) { - _STARPU_TRACE_START_EXECUTING(j); + _starpu_trace_start_executing(j); func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); - _STARPU_TRACE_END_EXECUTING(j); + _starpu_trace_end_executing(j); } return 0; diff --git a/src/drivers/cuda/driver_cuda1.c b/src/drivers/cuda/driver_cuda1.c index adbc576638..d6004416f2 100644 --- a/src/drivers/cuda/driver_cuda1.c +++ b/src/drivers/cuda/driver_cuda1.c @@ -478,7 +478,7 @@ static int _starpu_cuda_driver_init(struct _starpu_worker *worker) init_worker_context(workerid, worker->devid); - _STARPU_TRACE_WORKER_INIT_END(workerid); + _starpu_trace_worker_init_end(workerid); { char thread_name[16]; @@ -498,7 +498,7 @@ static int _starpu_cuda_driver_init(struct _starpu_worker *worker) static int _starpu_cuda_driver_deinit(struct _starpu_worker *worker) { - _STARPU_TRACE_WORKER_DEINIT_START; + _starpu_trace_worker_deinit_start; unsigned devid = worker->devid; unsigned memnode = worker->memory_node; @@ -520,7 +520,7 @@ static int _starpu_cuda_driver_deinit(struct _starpu_worker *worker) deinit_worker_context(workerid, worker->devid); worker->worker_is_initialized = 0; - _STARPU_TRACE_WORKER_DEINIT_END(STARPU_CUDA_WORKER); + _starpu_trace_worker_deinit_end(STARPU_CUDA_WORKER); return 0; } @@ -1095,9 +1095,9 @@ static void start_job_on_cuda(struct _starpu_job *j, struct _starpu_worker *work if (_starpu_get_disable_kernels() <= 0) { - _STARPU_TRACE_START_EXECUTING(j); + _starpu_trace_start_executing(j); func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); - _STARPU_TRACE_END_EXECUTING(j); + _starpu_trace_end_executing(j); } } @@ -1186,7 +1186,7 @@ static int _starpu_cuda_driver_run_once(struct _starpu_worker *worker) if (task && worker->nb_buffers_transferred == worker->nb_buffers_totransfer) { STARPU_RMB(); - _STARPU_TRACE_END_PROGRESS(memnode); + _starpu_trace_end_progress(memnode); j = _starpu_get_job_associated_to_task(task); _starpu_fetch_task_input_tail(task, j, worker); @@ -1194,7 +1194,7 @@ static int _starpu_cuda_driver_run_once(struct _starpu_worker *worker) worker->task_transferring = NULL; execute_job_on_cuda(task, worker); - _STARPU_TRACE_START_PROGRESS(memnode); + _starpu_trace_start_progress(memnode); } /* Then test for termination of queued tasks */ @@ -1216,10 +1216,10 @@ static int _starpu_cuda_driver_run_once(struct _starpu_worker *worker) } else { - _STARPU_TRACE_END_PROGRESS(memnode); + _starpu_trace_end_progress(memnode); /* Asynchronous task completed! */ finish_job_on_cuda(_starpu_get_job_associated_to_task(task), worker); - _STARPU_TRACE_START_PROGRESS(memnode); + _starpu_trace_start_progress(memnode); } if (worker->ntasks < 1) idle_tasks++; @@ -1261,11 +1261,11 @@ static int _starpu_cuda_driver_run_once(struct _starpu_worker *worker) worker->current_task = task; /* Fetch data asynchronously */ - _STARPU_TRACE_END_PROGRESS(memnode); + _starpu_trace_end_progress(memnode); _starpu_set_local_worker_key(worker); res = _starpu_fetch_task_input(task, j, 1); STARPU_ASSERT(res == 0); - _STARPU_TRACE_START_PROGRESS(memnode); + _starpu_trace_start_progress(memnode); return 0; } @@ -1275,13 +1275,13 @@ void *_starpu_cuda_worker(void *_arg) struct _starpu_worker *worker = _arg; _starpu_cuda_driver_init(worker); - _STARPU_TRACE_START_PROGRESS(worker->memory_node); + _starpu_trace_start_progress(worker->memory_node); while (_starpu_machine_is_running()) { _starpu_may_pause(); _starpu_cuda_driver_run_once(worker); } - _STARPU_TRACE_END_PROGRESS(worker->memory_node); + _starpu_trace_end_progress(worker->memory_node); _starpu_cuda_driver_deinit(worker); return NULL; diff --git a/src/drivers/driver_common/driver_common.c b/src/drivers/driver_common/driver_common.c index 2b341b3764..6616125c0e 100644 --- a/src/drivers/driver_common/driver_common.c +++ b/src/drivers/driver_common/driver_common.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -156,13 +157,13 @@ void _starpu_driver_start_job(struct _starpu_worker *worker, struct _starpu_job } } } - _STARPU_TRACE_TASK_NAME_LINE_COLOR(j); - _STARPU_TRACE_START_CODELET_BODY(j, j->nimpl, &sched_ctx->perf_arch, workerid, rank); + _starpu_trace_task_name_line_color(j); + _starpu_trace_start_codelet_body(j, j->nimpl, &sched_ctx->perf_arch, workerid, rank); } else { - _STARPU_TRACE_TASK_NAME_LINE_COLOR(j); - _STARPU_TRACE_START_CODELET_BODY(j, j->nimpl, perf_arch, workerid, rank); + _starpu_trace_task_name_line_color(j); + _starpu_trace_start_codelet_body(j, j->nimpl, perf_arch, workerid, rank); } _starpu_sched_ctx_unlock_read(sched_ctx->id); _STARPU_TASK_BREAK_ON(task, exec); @@ -183,12 +184,12 @@ void _starpu_driver_end_job(struct _starpu_worker *worker, struct _starpu_job *j if (!sched_ctx->sched_policy) { _starpu_perfmodel_create_comb_if_needed(&(sched_ctx->perf_arch)); - _STARPU_TRACE_END_CODELET_BODY(j, j->nimpl, &(sched_ctx->perf_arch), workerid, rank); + _starpu_trace_end_codelet_body(j, j->nimpl, &(sched_ctx->perf_arch), workerid, rank); } else { _starpu_perfmodel_create_comb_if_needed(perf_arch); - _STARPU_TRACE_END_CODELET_BODY(j, j->nimpl, perf_arch, workerid, rank); + _starpu_trace_end_codelet_body(j, j->nimpl, perf_arch, workerid, rank); } if (cl && cl->model && cl->model->benchmarking) @@ -362,7 +363,7 @@ static void _starpu_worker_set_status_scheduling(int workerid) if (!(_starpu_worker_get_status(workerid) & STATUS_SCHEDULING)) { if (!(_starpu_worker_get_status(workerid) & STATUS_SLEEPING)) - _STARPU_TRACE_WORKER_SCHEDULING_START; + _starpu_trace_worker_scheduling_start(); _starpu_worker_add_status(workerid, STATUS_INDEX_SCHEDULING); } } @@ -371,7 +372,7 @@ static void _starpu_worker_set_status_scheduling_done(int workerid) { STARPU_ASSERT(_starpu_worker_get_status(workerid) & STATUS_SCHEDULING); if (!(_starpu_worker_get_status(workerid) & STATUS_SLEEPING)) - _STARPU_TRACE_WORKER_SCHEDULING_END; + _starpu_trace_worker_scheduling_end(); _starpu_worker_clear_status(workerid, STATUS_INDEX_SCHEDULING); } @@ -379,7 +380,7 @@ static void _starpu_worker_set_status_sleeping(int workerid) { if (!(_starpu_worker_get_status(workerid) & STATUS_SLEEPING)) { - _STARPU_TRACE_WORKER_SLEEP_START; + _starpu_trace_worker_sleep_start(); _starpu_worker_add_status(workerid, STATUS_INDEX_SLEEPING); } } @@ -388,7 +389,7 @@ static void _starpu_worker_set_status_wakeup(int workerid) { if ((_starpu_worker_get_status(workerid) & STATUS_SLEEPING)) { - _STARPU_TRACE_WORKER_SLEEP_END; + _starpu_trace_worker_sleep_end(); _starpu_worker_clear_status(workerid, STATUS_INDEX_SLEEPING); } } diff --git a/src/drivers/hip/driver_hip.c b/src/drivers/hip/driver_hip.c index 3cf6539094..7049b92b8a 100644 --- a/src/drivers/hip/driver_hip.c +++ b/src/drivers/hip/driver_hip.c @@ -38,7 +38,8 @@ #include #include #include -#include +#include +#include #if HAVE_DECL_HWLOC_HIP_GET_DEVICE_OSDEV_BY_INDEX #include @@ -662,10 +663,6 @@ int _starpu_hip_driver_init(struct _starpu_worker *worker) _starpu_driver_start(worker0, STARPU_HIP_WORKER, 0); _starpu_set_local_worker_key(worker); -#ifdef STARPU_PROF_TOOL - struct starpu_prof_tool_info pi; -#endif - #ifdef STARPU_USE_FXT for (i = 1; i < worker_set->nworkers; i++) _starpu_worker_start(&worker_set->workers[i], STARPU_HIP_WORKER, 0); @@ -677,12 +674,7 @@ int _starpu_hip_driver_init(struct _starpu_worker *worker) unsigned devid = worker->devid; unsigned memnode = worker->memory_node; -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init, devid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init(&pi, NULL, NULL); - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init_start, devid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_start(&pi, NULL, NULL); -#endif + _starpu_trace_worker_init_start(worker, STARPU_HIP_WORKER, 0); if ((int) devid == lastdevid) { @@ -742,11 +734,8 @@ int _starpu_hip_driver_init(struct _starpu_worker *worker) #endif init_worker_context(workerid, worker->devid); - _STARPU_TRACE_WORKER_INIT_END(workerid); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init_end, devid, worker->workerid, starpu_prof_tool_driver_gpu, 0, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_end(&pi, NULL, NULL); -#endif + int rc = _starpu_trace_worker_init_end(workerid, starpu_prof_tool_driver_hip); + (void) rc; } { char thread_name[16]; @@ -775,7 +764,7 @@ int _starpu_hip_driver_deinit(struct _starpu_worker *worker) struct _starpu_worker_set *worker_set = worker->set; int lastdevid = -1; unsigned i; - _STARPU_TRACE_WORKER_DEINIT_START; + _starpu_trace_worker_deinit_start(); for (i = 0; i < worker_set->nworkers; i++) { @@ -819,15 +808,10 @@ int _starpu_hip_driver_deinit(struct _starpu_worker *worker) unsigned memnode = worker->memory_node; deinit_worker_context(workerid, worker->devid); - -#ifdef STARPU_PROF_TOOL - struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_deinit, workerid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_deinit(&pi, NULL, NULL); -#endif } worker_set->workers[0].worker_is_initialized = 0; - _STARPU_TRACE_WORKER_DEINIT_END(STARPU_HIP_WORKER); + _starpu_trace_worker_deinit_end(STARPU_HIP_WORKER); return 0; } @@ -1381,9 +1365,6 @@ static void start_job_on_hip(struct _starpu_job *j, struct _starpu_worker *worke struct starpu_task *task = j->task; int profiling = starpu_profiling_status_get(); -#ifdef STARPU_PROF_TOOL - struct starpu_prof_tool_info pi; -#endif STARPU_ASSERT(task); struct starpu_codelet *cl = task->cl; @@ -1409,19 +1390,9 @@ static void start_job_on_hip(struct _starpu_job *j, struct _starpu_worker *worke if (_starpu_get_disable_kernels() <= 0) { - _STARPU_TRACE_START_EXECUTING(j); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_gpu_exec, worker->devid, worker->workerid, starpu_prof_tool_driver_gpu, -1, (void*)func); - starpu_prof_tool_callbacks.starpu_prof_tool_event_start_gpu_exec(&pi, NULL, NULL); -#endif - + _starpu_trace_start_executing(j, worker); func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); - -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_gpu_exec, worker->devid, worker->workerid, starpu_prof_tool_driver_gpu, -1, (void*)func); - starpu_prof_tool_callbacks.starpu_prof_tool_event_end_gpu_exec(&pi, NULL, NULL); -#endif - _STARPU_TRACE_END_EXECUTING(j); + _starpu_trace_end_executing(j, worker); } } @@ -1497,9 +1468,6 @@ int _starpu_hip_driver_run_once(struct _starpu_worker *worker) struct starpu_task *tasks[worker_set->nworkers]; struct starpu_task *task; struct _starpu_job *j; -#ifdef STARPU_PROF_TOOL - struct starpu_prof_tool_info pi; -#endif int i, res; int idle_tasks, idle_transfers; @@ -1530,11 +1498,7 @@ int _starpu_hip_driver_run_once(struct _starpu_worker *worker) if (task && worker->nb_buffers_transferred == worker->nb_buffers_totransfer) { STARPU_RMB(); - _STARPU_TRACE_END_PROGRESS(memnode); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info_d(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_gpu, memnode, worker->nb_buffers_totransfer, worker->nb_buffers_transferred); - starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); -#endif + _starpu_trace_end_progress(memnode, worker); j = _starpu_get_job_associated_to_task(task); _starpu_set_local_worker_key(worker); @@ -1552,11 +1516,7 @@ int _starpu_hip_driver_run_once(struct _starpu_worker *worker) { execute_job_on_hip(task, worker); } - _STARPU_TRACE_START_PROGRESS(memnode); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info_d(starpu_prof_tool_event_start_transfer, worker->workerid, workerid, starpu_prof_tool_driver_gpu, memnode, worker->nb_buffers_totransfer, worker->nb_buffers_transferred); - starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); -#endif + _starpu_trace_start_progress(memnode, worker); } /* Then test for termination of queued tasks */ @@ -1581,11 +1541,7 @@ int _starpu_hip_driver_run_once(struct _starpu_worker *worker) } else { -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_gpu, memnode, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); -#endif - _STARPU_TRACE_END_PROGRESS(memnode); + _starpu_trace_end_progress(memnode, worker); /* Asynchronous task completed! */ _starpu_set_local_worker_key(worker); finish_job_on_hip(_starpu_get_job_associated_to_task(task), worker); @@ -1608,22 +1564,17 @@ int _starpu_hip_driver_run_once(struct _starpu_worker *worker) * flushing the pipeline, we can now at * last execute it. */ - _STARPU_TRACE_EVENT("sync_task"); + _starpu_trace_event("sync_task"); execute_job_on_hip(task, worker); - _STARPU_TRACE_EVENT("end_sync_task"); + _starpu_trace_event("end_sync_task"); worker->pipeline_stuck = 0; } } else /* Data for next task didn't have time to finish transferring :/ */ - _STARPU_TRACE_WORKER_START_FETCH_INPUT(NULL, workerid); + _starpu_trace_worker_start_fetch_input(NULL, workerid); } - _STARPU_TRACE_START_PROGRESS(memnode); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); -#endif - + _starpu_trace_start_progress(memnode, worker); } if (!worker->pipeline_length || worker->ntasks < worker->pipeline_length) @@ -1666,19 +1617,11 @@ int _starpu_hip_driver_run_once(struct _starpu_worker *worker) } /* Fetch data asynchronously */ -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); -#endif - _STARPU_TRACE_END_PROGRESS(memnode); + _starpu_trace_end_progress(memnode, worker); _starpu_set_local_worker_key(worker); res = _starpu_fetch_task_input(task, j, 1); STARPU_ASSERT(res == 0); - _STARPU_TRACE_START_PROGRESS(memnode); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_gpu, memnode, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); -#endif + _starpu_trace_start_progress(memnode, worker); } return 0; @@ -1689,19 +1632,12 @@ void *_starpu_hip_worker(void *_arg) struct _starpu_worker *worker = _arg; struct _starpu_worker_set* worker_set = worker->set; -#ifdef STARPU_PROF_TOOL - struct starpu_prof_tool_info pi; -#endif unsigned i; _starpu_hip_driver_init(worker); for (i = 0; i < worker_set->nworkers; i++) { -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker_set->workers[i].workerid, worker_set->workers[i].workerid, starpu_prof_tool_driver_gpu, worker_set->workers[i].memory_node, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); -#endif - _STARPU_TRACE_START_PROGRESS(worker_set->workers[i].memory_node); + _starpu_trace_start_progress(worker_set->workers[i].memory_node, worker_set->workers[i]); } while (_starpu_machine_is_running()) { @@ -1710,11 +1646,7 @@ void *_starpu_hip_worker(void *_arg) } for (i = 0; i < worker_set->nworkers; i++) { - _STARPU_TRACE_END_PROGRESS(worker_set->workers[i].memory_node); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, worker_set->workers[i].workerid, worker_set->workers[i].workerid, starpu_prof_tool_driver_gpu, worker_set->workers[i].memory_node, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); -#endif + _starpu_trace_end_progress(worker_set->workers[i].memory_node, worker_set->workers[i]); } _starpu_hip_driver_deinit(worker); diff --git a/src/drivers/max/driver_max_fpga.c b/src/drivers/max/driver_max_fpga.c index 7b6d7601e8..d7c3d386aa 100644 --- a/src/drivers/max/driver_max_fpga.c +++ b/src/drivers/max/driver_max_fpga.c @@ -328,7 +328,7 @@ static int _starpu_max_fpga_driver_init(struct _starpu_worker *worker) snprintf(worker->short_name, sizeof(worker->short_name), "FPGA %d", devid); starpu_pthread_setname(worker->short_name); - _STARPU_TRACE_WORKER_INIT_END(worker->workerid); + _starpu_trace_worker_init_end(worker->workerid); /* tell the main thread that we are ready */ STARPU_PTHREAD_MUTEX_LOCK(&worker->mutex); @@ -341,7 +341,7 @@ static int _starpu_max_fpga_driver_init(struct _starpu_worker *worker) static int _starpu_max_fpga_driver_deinit(struct _starpu_worker *fpga_worker) { - _STARPU_TRACE_WORKER_DEINIT_START; + _starpu_trace_worker_deinit_start(); unsigned memnode = fpga_worker->memory_node; _starpu_datawizard_handle_all_pending_node_data_requests(memnode); @@ -352,7 +352,7 @@ static int _starpu_max_fpga_driver_deinit(struct _starpu_worker *fpga_worker) _starpu_free_all_automatically_allocated_buffers(memnode); fpga_worker->worker_is_initialized = 0; - _STARPU_TRACE_WORKER_DEINIT_END(STARPU_MAX_FPGA_WORKER); + _starpu_trace_worker_deinit_end(STARPU_MAX_FPGA_WORKER); return 0; } @@ -561,9 +561,9 @@ static int execute_job_on_fpga(struct _starpu_job *j, struct starpu_task *worker STARPU_ASSERT_MSG(func, "when STARPU_MAX_FPGA is defined in 'where', fpga_func or max_fpga_funcs has to be defined"); if (_starpu_get_disable_kernels() <= 0) { - _STARPU_TRACE_START_EXECUTING(j); + _starpu_trace_start_executing(j; task, fpga_args, func); func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); - _STARPU_TRACE_END_EXECUTING(j); + _starpu_trace_end_executing(j, fpga_args); } } @@ -581,13 +581,13 @@ int _starpu_max_fpga_driver_run_once(struct _starpu_worker *fpga_worker) unsigned memnode = fpga_worker->memory_node; int workerid = fpga_worker->workerid; - _STARPU_TRACE_START_PROGRESS(memnode); + _starpu_trace_start_progress(memnode, fpga_worker); _starpu_datawizard_progress(1); if (memnode != STARPU_MAIN_RAM) { _starpu_datawizard_progress(1); } - _STARPU_TRACE_END_PROGRESS(memnode); + _starpu_trace_end_progress(memnode, fpga_worker); struct _starpu_job *j; struct starpu_task *task; @@ -683,16 +683,16 @@ int _starpu_max_fpga_driver_run_once(struct _starpu_worker *fpga_worker) void *_starpu_max_fpga_worker(void *_arg) { struct _starpu_worker* worker = _arg; - unsigned memnode = worker->memory_node; + unsigned memnode = worker->memory_node; _starpu_max_fpga_driver_init(worker); - _STARPU_TRACE_START_PROGRESS(memnode); + _starpu_trace_start_progress(memnode, worker); while (_starpu_machine_is_running()) { _starpu_may_pause(); _starpu_max_fpga_driver_run_once(worker); } - _STARPU_TRACE_END_PROGRESS(memnode); + _starpu_trace_end_progress(memnode, worker); _starpu_max_fpga_driver_deinit(worker); return NULL; diff --git a/src/drivers/max/driver_max_fpga.h b/src/drivers/max/driver_max_fpga.h index a760b7d641..5f8b9f4a5e 100644 --- a/src/drivers/max/driver_max_fpga.h +++ b/src/drivers/max/driver_max_fpga.h @@ -28,7 +28,6 @@ #include #include #include -#include void _starpu_max_fpga_preinit(void); diff --git a/src/drivers/mp_common/source_common.c b/src/drivers/mp_common/source_common.c index d04626f715..c9faa3d8a5 100644 --- a/src/drivers/mp_common/source_common.c +++ b/src/drivers/mp_common/source_common.c @@ -247,7 +247,7 @@ static void _starpu_src_common_handle_stored_async(struct _starpu_mp_node *node) struct mp_message * message = mp_message_list_pop_back(&node->message_queue); /* Release mutex during handle */ stopped_progress = 1; - _STARPU_TRACE_END_PROGRESS(mp_node_memory_node(node)); + _starpu_trace_end_progress(mp_node_memory_node(node), NULL/* TODO: worker */); STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex); _starpu_src_common_handle_async(node, message->buffer, message->size, message->type, 1); free(message->buffer); @@ -257,7 +257,7 @@ static void _starpu_src_common_handle_stored_async(struct _starpu_mp_node *node) } STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex); if (stopped_progress) - _STARPU_TRACE_START_PROGRESS(mp_node_memory_node(node)); + _starpu_trace_start_progress(mp_node_memory_node(node), NULL/* TODO: worker */); } /* Store a message if is asynchronous @@ -1158,7 +1158,7 @@ static void _starpu_src_common_worker_internal_work(struct _starpu_worker_set * STARPU_RMB(); struct _starpu_job * j = _starpu_get_job_associated_to_task(task); - _STARPU_TRACE_END_PROGRESS(memnode); + _starpu_trace_end_progress(memnode, &worker_set->workers[i]); _starpu_set_local_worker_key(&worker_set->workers[i]); _starpu_fetch_task_input_tail(task, j, &worker_set->workers[i]); /* Reset it */ @@ -1182,7 +1182,7 @@ static void _starpu_src_common_worker_internal_work(struct _starpu_worker_set * STARPU_ASSERT(0); } - _STARPU_TRACE_START_PROGRESS(memnode); + _starpu_trace_start_progress(memnode, &worker_set->workers[i]); } } @@ -1198,13 +1198,13 @@ static void _starpu_src_common_worker_internal_work(struct _starpu_worker_set * while(mp_node->nt_recv_is_ready(mp_node)) { stopped_progress = 1; - _STARPU_TRACE_END_PROGRESS(mp_node_memory_node(mp_node)); + _starpu_trace_end_progress(mp_node_memory_node(mp_node), NULL /* TODO: worker */); _starpu_src_common_recv_async(mp_node); /* Mutex is unlock in _starpu_src_common_recv_async */ STARPU_PTHREAD_MUTEX_LOCK(&mp_node->connection_mutex); } if (stopped_progress) - _STARPU_TRACE_START_PROGRESS(mp_node_memory_node(mp_node)); + _starpu_trace_start_progress(mp_node_memory_node(mp_node), NULL /* TODO: worker */); STARPU_PTHREAD_MUTEX_UNLOCK(&mp_node->connection_mutex); @@ -1224,11 +1224,11 @@ static void _starpu_src_common_worker_internal_work(struct _starpu_worker_set * if(tasks[i] != NULL) { struct _starpu_worker *worker = &worker_set->workers[i]; - _STARPU_TRACE_END_PROGRESS(worker->memory_node); + _starpu_trace_end_progress(worker->memory_node, worker); _starpu_set_local_worker_key(worker); int ret = _starpu_fetch_task_input(tasks[i], _starpu_get_job_associated_to_task(tasks[i]), 1); STARPU_ASSERT(!ret); - _STARPU_TRACE_START_PROGRESS(worker->memory_node); + _starpu_trace_start_progress(worker->memory_node, worker); } } @@ -1276,7 +1276,7 @@ void _starpu_src_common_workers_set(struct _starpu_worker_set * worker_set, int STARPU_PTHREAD_COND_SIGNAL(&device_worker_set->ready_cond); STARPU_PTHREAD_MUTEX_UNLOCK(&device_worker_set->mutex); - _STARPU_TRACE_START_PROGRESS(memnode[device]); + _starpu_trace_start_progress(memnode[device], NULL /* TODO: worker */); } /*main loop*/ @@ -1292,7 +1292,7 @@ void _starpu_src_common_workers_set(struct _starpu_worker_set * worker_set, int for (device = 0; device < ndevices; device++) { - _STARPU_TRACE_END_PROGRESS(memnode[device]); + _starpu_trace_end_progress(memnode[device], NULL /* TODO: worker */); _starpu_datawizard_handle_all_pending_node_data_requests(memnode[device]); } diff --git a/src/drivers/mpi/driver_mpi_source.c b/src/drivers/mpi/driver_mpi_source.c index 50d4ef4433..d130bbeb8c 100644 --- a/src/drivers/mpi/driver_mpi_source.c +++ b/src/drivers/mpi/driver_mpi_source.c @@ -314,7 +314,7 @@ void *_starpu_mpi_src_worker(void *arg) for (i = 0; i < worker_set->nworkers; i++) { struct _starpu_worker *worker = &worker_set->workers[i]; - _STARPU_TRACE_WORKER_INIT_END(worker->workerid); + _starpu_trace_worker_init_end(worker->workerid); } _starpu_src_common_init_switch_env(workersetnum); diff --git a/src/drivers/opencl/driver_opencl.c b/src/drivers/opencl/driver_opencl.c index 98b2bfda0c..11464638e8 100644 --- a/src/drivers/opencl/driver_opencl.c +++ b/src/drivers/opencl/driver_opencl.c @@ -37,7 +37,8 @@ #include #include #include -#include +#include +#include #if defined(STARPU_HAVE_HWLOC) && defined(STARPU_USE_OPENCL) #include @@ -569,14 +570,7 @@ static int _starpu_opencl_driver_init(struct _starpu_worker *worker) { int devid = worker->devid; -#ifdef STARPU_PROF_TOOL - struct starpu_prof_tool_info pi; - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init, devid, worker->workerid, starpu_prof_tool_driver_ocl, worker->memory_node, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init(&pi, NULL, NULL); - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init_start, devid, worker->workerid, starpu_prof_tool_driver_ocl, worker->memory_node, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_start(&pi, NULL, NULL); -#endif - + _starpu_trace_worker_init_start(worker, STARPU_OPENCL_WORKER, 0); _starpu_driver_start(worker, STARPU_OPENCL_WORKER, 0); _starpu_opencl_init_context(devid); @@ -620,12 +614,7 @@ static int _starpu_opencl_driver_init(struct _starpu_worker *worker) _STARPU_DEBUG("OpenCL (%s) dev id %d thread is ready to run on CPU %d !\n", devname, devid, worker->bindid); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init_end, devid, worker->workerid, starpu_prof_tool_driver_ocl, 0, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_end(&pi, NULL, NULL); -#endif - - _STARPU_TRACE_WORKER_INIT_END(worker->workerid); + _starpu_trace_worker_init_end(worker, STARPU_OPENCL_WORKER ); /* tell the main thread that this one is ready */ STARPU_PTHREAD_MUTEX_LOCK(&worker->mutex); @@ -639,7 +628,7 @@ static int _starpu_opencl_driver_init(struct _starpu_worker *worker) static int _starpu_opencl_driver_deinit(struct _starpu_worker *worker) { - _STARPU_TRACE_WORKER_DEINIT_START; + _starpu_trace_worker_deinit_start(); unsigned memnode = worker->memory_node; @@ -656,11 +645,7 @@ static int _starpu_opencl_driver_deinit(struct _starpu_worker *worker) _starpu_opencl_deinit_context(devid); worker->worker_is_initialized = 0; - _STARPU_TRACE_WORKER_DEINIT_END(STARPU_OPENCL_WORKER); -#ifdef STARPU_PROF_TOOL - struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_deinit, worker->workerid, worker->workerid, starpu_prof_tool_driver_ocl, memnode, NULL); - starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_deinit(&pi, NULL, NULL); -#endif + _starpu_trace_worker_deinit_end(worker->workerid, STARPU_OPENCL_WORKER); return 0; } @@ -1355,7 +1340,8 @@ static int _starpu_opencl_start_job(struct _starpu_job *j, struct _starpu_worker if (_starpu_get_disable_kernels() <= 0) { - _STARPU_TRACE_START_EXECUTING(j); + _starpu_trace_start_executing(j, task, worker, func); + #ifdef STARPU_SIMGRID double length = NAN; double energy = NAN; @@ -1407,24 +1393,13 @@ static int _starpu_opencl_start_job(struct _starpu_job *j, struct _starpu_worker async ? &task_finished[worker->devid][pipeline_idx] : NULL); } #else -#ifdef STARPU_PROF_TOOL - struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_gpu_exec, worker->devid, worker->workerid, starpu_prof_tool_driver_ocl, -1, (void*)func); - pi.model_name = _starpu_job_get_model_name(j); - pi.task_name = _starpu_job_get_task_name(j); - starpu_prof_tool_callbacks.starpu_prof_tool_event_start_gpu_exec(&pi, NULL, NULL); -#endif + func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_gpu_exec, worker->devid, worker->workerid, starpu_prof_tool_driver_ocl, -1, (void*)func); - pi.model_name = _starpu_job_get_model_name(j); - pi.task_name = _starpu_job_get_task_name(j); - starpu_prof_tool_callbacks.starpu_prof_tool_event_end_gpu_exec(&pi, NULL, NULL); -#endif cl_command_queue queue; starpu_opencl_get_queue(worker->devid, &queue); #endif - _STARPU_TRACE_END_EXECUTING(j); + _starpu_trace_end_executing(j, worker); } return 0; } @@ -1533,9 +1508,6 @@ static int _starpu_opencl_driver_run_once(struct _starpu_worker *worker) struct _starpu_job *j; struct starpu_task *task; int res; -#ifdef STARPU_PROF_TOOL - struct starpu_prof_tool_info pi; -#endif int idle_tasks, idle_transfers; @@ -1553,13 +1525,7 @@ static int _starpu_opencl_driver_run_once(struct _starpu_worker *worker) if (task && worker->nb_buffers_transferred == worker->nb_buffers_totransfer) { STARPU_RMB(); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_ocl, memnode, NULL); - /* pi.model_name = _starpu_job_get_model_name(j); - pi.task_name = _starpu_job_get_task_name(j); */ - starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); -#endif - _STARPU_TRACE_END_PROGRESS(memnode); + _starpu_trace_end_progress(memnode, worker); j = _starpu_get_job_associated_to_task(task); _starpu_fetch_task_input_tail(task, j, worker); @@ -1576,13 +1542,7 @@ static int _starpu_opencl_driver_run_once(struct _starpu_worker *worker) } _starpu_opencl_execute_job(task, worker); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_ocl, memnode, NULL); - /* pi.model_name = _starpu_job_get_model_name(j); - pi.task_name = _starpu_job_get_task_name(j); */ - starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); -#endif - _STARPU_TRACE_START_PROGRESS(memnode); + _starpu_trace_start_progress(memnode, worker); } /* Then poll for completed jobs */ @@ -1611,13 +1571,7 @@ static int _starpu_opencl_driver_run_once(struct _starpu_worker *worker) } else { - _STARPU_TRACE_END_PROGRESS(memnode); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_ocl, memnode, NULL); - /* pi.model_name = _starpu_job_get_model_name(j); - pi.task_name = _starpu_job_get_task_name(j); */ - starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); -#endif + _starpu_trace_end_progress(memnode, worker); #ifndef STARPU_SIMGRID err = clReleaseEvent(task_events[worker->devid][worker->first_task]); _STARPU_OPENCL_CHECK_AND_REPORT_ERROR(err); @@ -1640,19 +1594,13 @@ static int _starpu_opencl_driver_run_once(struct _starpu_worker *worker) else { /* A synchronous task, we have finished flushing the pipeline, we can now at last execute it. */ - _STARPU_TRACE_EVENT("sync_task"); + _starpu_trace_event("sync_task"); _starpu_opencl_execute_job(task, worker); - _STARPU_TRACE_EVENT("end_sync_task"); + _starpu_trace_event("end_sync_task"); worker->pipeline_stuck = 0; } } - _STARPU_TRACE_START_PROGRESS(memnode); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_ocl, memnode, NULL); - /* pi.model_name = _starpu_job_get_model_name(j); - pi.task_name = _starpu_job_get_task_name(j); */ - starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); -#endif + _starpu_trace_start_progress(memnode, worker); } } if (!worker->pipeline_length || worker->ntasks < worker->pipeline_length) @@ -1698,24 +1646,12 @@ static int _starpu_opencl_driver_run_once(struct _starpu_worker *worker) return 0; } - _STARPU_TRACE_END_PROGRESS(memnode); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_ocl, memnode, NULL); - /* pi.model_name = _starpu_job_get_model_name(j); - pi.task_name = _starpu_job_get_task_name(j); */ - starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); -#endif + _starpu_trace_end_progress(memnode, worker); /* Fetch data asynchronously */ res = _starpu_fetch_task_input(task, j, 1); STARPU_ASSERT(res == 0); - _STARPU_TRACE_START_PROGRESS(memnode); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_ocl, memnode, NULL); - /* pi.model_name = _starpu_job_get_model_name(j); - pi.task_name = _starpu_job_get_task_name(j); */ - starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); -#endif + _starpu_trace_start_progress(memnode, worker); return 0; } @@ -1725,26 +1661,15 @@ void *_starpu_opencl_worker(void *_arg) struct _starpu_worker* worker = _arg; _starpu_opencl_driver_init(worker); - _STARPU_TRACE_START_PROGRESS(worker->memory_node); -#ifdef STARPU_PROF_TOOL - struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_ocl, worker->memory_node, NULL); - /* pi.model_name = _starpu_job_get_model_name(j); - pi.task_name = _starpu_job_get_task_name(j); */ - starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); -#endif + _starpu_trace_start_progress(worker->memory_node, worker); + while (_starpu_machine_is_running()) { _starpu_may_pause(); _starpu_opencl_driver_run_once(worker); } _starpu_opencl_driver_deinit(worker); - _STARPU_TRACE_END_PROGRESS(worker->memory_node); -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, worker->workerid, worker->workerid, starpu_prof_tool_driver_ocl, worker->memory_node, NULL); - /* pi.model_name = _starpu_job_get_model_name(j); - pi.task_name = _starpu_job_get_task_name(j); */ - starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); -#endif + _starpu_trace_end_progress(worker->memory_node, worker); return NULL; } diff --git a/src/drivers/tcpip/driver_tcpip_source.c b/src/drivers/tcpip/driver_tcpip_source.c index efa45c65f2..563b02c17e 100644 --- a/src/drivers/tcpip/driver_tcpip_source.c +++ b/src/drivers/tcpip/driver_tcpip_source.c @@ -307,7 +307,7 @@ void *_starpu_tcpip_src_worker(void *arg) for (i = 0; i < worker_set->nworkers; i++) { struct _starpu_worker *worker = &worker_set->workers[i]; - _STARPU_TRACE_WORKER_INIT_END(worker->workerid); + _starpu_trace_worker_init_end(worker->workerid); } _starpu_src_common_init_switch_env(workersetnum); diff --git a/src/profiling/callbacks.c b/src/profiling/callbacks/callbacks.c similarity index 99% rename from src/profiling/callbacks.c rename to src/profiling/callbacks/callbacks.c index 3f9255e0cb..f8989d3a84 100644 --- a/src/profiling/callbacks.c +++ b/src/profiling/callbacks/callbacks.c @@ -15,7 +15,7 @@ * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ -#include +#include #include #ifdef HAVE_DLOPEN #include diff --git a/src/profiling/callbacks.h b/src/profiling/callbacks/callbacks.h similarity index 100% rename from src/profiling/callbacks.h rename to src/profiling/callbacks/callbacks.h diff --git a/src/common/fxt.c b/src/profiling/fxt/fxt.c similarity index 92% rename from src/common/fxt.c rename to src/profiling/fxt/fxt.c index fd0b382c8d..275dd8f2d9 100644 --- a/src/common/fxt.c +++ b/src/profiling/fxt/fxt.c @@ -26,20 +26,10 @@ unsigned long _starpu_job_cnt = 0; #ifdef STARPU_USE_FXT -#include +#include #include #include -#ifdef STARPU_HAVE_WINDOWS -#include -#endif - -#ifdef __linux__ -#include /* for SYS_gettid */ -#elif defined(__FreeBSD__) -#include /* for thr_self() */ -#endif - /* By default, record all events but the VERBOSE_EXTRA ones, which are very costly: */ #define KEYMASKALL_DEFAULT FUT_KEYMASKALL & (~_STARPU_FUT_KEYMASK_TASK_VERBOSE_EXTRA) & (~_STARPU_FUT_KEYMASK_MPI_VERBOSE_EXTRA) @@ -80,33 +70,6 @@ uint64_t fut_getstamp(void) } #endif -long _starpu_gettid(void) -{ - /* TODO: test at configure whether __thread is available, and use that - * to cache the value. - * Don't use the TSD, this is getting called before we would have the - * time to allocate it. */ -#ifdef STARPU_SIMGRID -# ifdef HAVE_SG_ACTOR_SELF - return (uintptr_t) sg_actor_self(); -# else - return (uintptr_t) MSG_process_self(); -# endif -#else -#if defined(__linux__) - return syscall(SYS_gettid); -#elif defined(__FreeBSD__) - long tid; - thr_self(&tid); - return tid; -#elif defined(_WIN32) && !defined(__CYGWIN__) - return (long) GetCurrentThreadId(); -#else - return (long) starpu_pthread_self(); -#endif -#endif -} - static void _starpu_profile_set_tracefile(void) { char *user; @@ -255,13 +218,13 @@ void starpu_fxt_start_profiling() { unsigned threadid = _starpu_gettid(); fut_keychange(FUT_ENABLE, _starpu_profile_get_user_keymask(), threadid); - _STARPU_TRACE_META("start_profiling"); + _starpu_trace_meta("start_profiling"); } void starpu_fxt_stop_profiling() { unsigned threadid = _starpu_gettid(); - _STARPU_TRACE_META("stop_profiling"); + _starpu_trace_meta("stop_profiling"); fut_keychange(FUT_SETMASK, _STARPU_FUT_KEYMASK_META, threadid); } @@ -275,7 +238,6 @@ void _starpu_fxt_flush_callback() { _STARPU_MSG("FxT is flushing trace to disk ! This can impact performance.\n"); _STARPU_MSG("Maybe you should increase the value of STARPU_TRACE_BUFFER_SIZE ?\n"); - starpu_fxt_trace_user_event_string("fxt flush"); } #endif @@ -484,7 +446,7 @@ void starpu_fxt_stop_profiling() void starpu_fxt_trace_user_event(unsigned long code STARPU_ATTRIBUTE_UNUSED) { #ifdef STARPU_USE_FXT - _STARPU_TRACE_USER_EVENT(code); + _starpu_trace_user_event(code); #endif } @@ -492,13 +454,13 @@ void starpu_fxt_trace_user_event(unsigned long code STARPU_ATTRIBUTE_UNUSED) void starpu_fxt_trace_user_meta_string(const char *s STARPU_ATTRIBUTE_UNUSED) { #ifdef STARPU_USE_FXT - _STARPU_TRACE_META(s); + _starpu_trace_meta(s); #endif } void starpu_fxt_trace_user_event_string(const char *s STARPU_ATTRIBUTE_UNUSED) { #ifdef STARPU_USE_FXT - _STARPU_TRACE_EVENT(s); + _starpu_trace_event(s); #endif } diff --git a/src/profiling/fxt/fxt.h b/src/profiling/fxt/fxt.h new file mode 100644 index 0000000000..c212ceffe2 --- /dev/null +++ b/src/profiling/fxt/fxt.h @@ -0,0 +1,817 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2008-2024 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria + * Copyright (C) 2018,2020 Federal University of Rio Grande do Sul (UFRGS) + * Copyright (C) 2013-2013 Joris Pablo + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef __FXT_H__ +#define __FXT_H__ + + +/** @file */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE 1 /* ou _BSD_SOURCE ou _SVID_SOURCE */ +#endif + +#include +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include + +#ifdef STARPU_USE_FXT +#include +#include +#endif + +#pragma GCC visibility push(hidden) + +/* some key to identify the worker kind */ +#define _STARPU_FUT_WORKER_KEY(kind) (kind + 0x100) +#define _STARPU_FUT_KEY_WORKER(key) (key - 0x100) + +#define _STARPU_FUT_WORKER_INIT_START 0x5100 +#define _STARPU_FUT_WORKER_INIT_END 0x5101 + +#define _STARPU_FUT_START_CODELET_BODY 0x5102 +#define _STARPU_FUT_END_CODELET_BODY 0x5103 + +#define _STARPU_FUT_JOB_PUSH 0x5104 +#define _STARPU_FUT_JOB_POP 0x5105 + +#define _STARPU_FUT_UPDATE_TASK_CNT 0x5106 + +#define _STARPU_FUT_START_FETCH_INPUT_ON_TID 0x5107 +#define _STARPU_FUT_END_FETCH_INPUT_ON_TID 0x5108 +#define _STARPU_FUT_START_PUSH_OUTPUT_ON_TID 0x5109 +#define _STARPU_FUT_END_PUSH_OUTPUT_ON_TID 0x5110 + +#define _STARPU_FUT_TAG 0x5111 +#define _STARPU_FUT_TAG_DEPS 0x5112 + +#define _STARPU_FUT_TASK_DEPS 0x5113 + +#define _STARPU_FUT_DATA_COPY 0x5114 +#define _STARPU_FUT_WORK_STEALING 0x5115 + +#define _STARPU_FUT_WORKER_DEINIT_START 0x5116 +#define _STARPU_FUT_WORKER_DEINIT_END 0x5117 + +#define _STARPU_FUT_WORKER_SLEEP_START 0x5118 +#define _STARPU_FUT_WORKER_SLEEP_END 0x5119 + +#define _STARPU_FUT_TASK_SUBMIT 0x511a +#define _STARPU_FUT_CODELET_DATA_HANDLE 0x511b + +#define _STARPU_FUT_MODEL_NAME 0x511c + +#define _STARPU_FUT_DATA_NAME 0x511d +#define _STARPU_FUT_DATA_COORDINATES 0x511e +#define _STARPU_FUT_HANDLE_DATA_UNREGISTER 0x511f + +#define _STARPU_FUT_CODELET_DATA_HANDLE_NUMA_ACCESS 0x5120 + +#define _STARPU_FUT_NEW_MEM_NODE 0x5122 + +#define _STARPU_FUT_START_CALLBACK 0x5123 +#define _STARPU_FUT_END_CALLBACK 0x5124 + +#define _STARPU_FUT_TASK_DONE 0x5125 +#define _STARPU_FUT_TAG_DONE 0x5126 + +#define _STARPU_FUT_START_ALLOC 0x5127 +#define _STARPU_FUT_END_ALLOC 0x5128 + +#define _STARPU_FUT_START_ALLOC_REUSE 0x5129 +#define _STARPU_FUT_END_ALLOC_REUSE 0x5130 + +#define _STARPU_FUT_USED_MEM 0x512a + +#define _STARPU_FUT_TASK_NAME 0x512b + +#define _STARPU_FUT_DATA_WONT_USE 0x512c + +#define _STARPU_FUT_TASK_COLOR 0x512d + +#define _STARPU_FUT_DATA_DOING_WONT_USE 0x512e + +#define _STARPU_FUT_TASK_LINE 0x512f + +#define _STARPU_FUT_START_MEMRECLAIM 0x5131 +#define _STARPU_FUT_END_MEMRECLAIM 0x5132 + +#define _STARPU_FUT_START_DRIVER_COPY 0x5133 +#define _STARPU_FUT_END_DRIVER_COPY 0x5134 + +#define _STARPU_FUT_START_DRIVER_COPY_ASYNC 0x5135 +#define _STARPU_FUT_END_DRIVER_COPY_ASYNC 0x5136 + +#define _STARPU_FUT_START_PROGRESS_ON_TID 0x5137 +#define _STARPU_FUT_END_PROGRESS_ON_TID 0x5138 + +#define _STARPU_FUT_USER_EVENT 0x5139 + +#define _STARPU_FUT_SET_PROFILING 0x513a + +#define _STARPU_FUT_TASK_WAIT_FOR_ALL 0x513b + +#define _STARPU_FUT_EVENT 0x513c +#define _STARPU_FUT_THREAD_EVENT 0x513d + +#define _STARPU_FUT_CODELET_DETAILS 0x513e +#define _STARPU_FUT_CODELET_DATA 0x513f + +#define _STARPU_FUT_LOCKING_MUTEX 0x5140 +#define _STARPU_FUT_MUTEX_LOCKED 0x5141 + +#define _STARPU_FUT_UNLOCKING_MUTEX 0x5142 +#define _STARPU_FUT_MUTEX_UNLOCKED 0x5143 + +#define _STARPU_FUT_TRYLOCK_MUTEX 0x5144 + +#define _STARPU_FUT_RDLOCKING_RWLOCK 0x5145 +#define _STARPU_FUT_RWLOCK_RDLOCKED 0x5146 + +#define _STARPU_FUT_WRLOCKING_RWLOCK 0x5147 +#define _STARPU_FUT_RWLOCK_WRLOCKED 0x5148 + +#define _STARPU_FUT_UNLOCKING_RWLOCK 0x5149 +#define _STARPU_FUT_RWLOCK_UNLOCKED 0x514a + +#define _STARPU_FUT_LOCKING_SPINLOCK 0x514b +#define _STARPU_FUT_SPINLOCK_LOCKED 0x514c + +#define _STARPU_FUT_UNLOCKING_SPINLOCK 0x514d +#define _STARPU_FUT_SPINLOCK_UNLOCKED 0x514e + +#define _STARPU_FUT_TRYLOCK_SPINLOCK 0x514f + +#define _STARPU_FUT_COND_WAIT_BEGIN 0x5150 +#define _STARPU_FUT_COND_WAIT_END 0x5151 + +#define _STARPU_FUT_MEMORY_FULL 0x5152 + +#define _STARPU_FUT_DATA_LOAD 0x5153 + +#define _STARPU_FUT_START_UNPARTITION_ON_TID 0x5154 +#define _STARPU_FUT_END_UNPARTITION_ON_TID 0x5155 + +#define _STARPU_FUT_START_FREE 0x5156 +#define _STARPU_FUT_END_FREE 0x5157 + +#define _STARPU_FUT_START_WRITEBACK 0x5158 +#define _STARPU_FUT_END_WRITEBACK 0x5159 + +#define _STARPU_FUT_SCHED_COMPONENT_PUSH_PRIO 0x515a +#define _STARPU_FUT_SCHED_COMPONENT_POP_PRIO 0x515b + +#define _STARPU_FUT_START_WRITEBACK_ASYNC 0x515c +#define _STARPU_FUT_END_WRITEBACK_ASYNC 0x515d + +#define _STARPU_FUT_HYPERVISOR_BEGIN 0x5160 +#define _STARPU_FUT_HYPERVISOR_END 0x5161 + +#define _STARPU_FUT_BARRIER_WAIT_BEGIN 0x5162 +#define _STARPU_FUT_BARRIER_WAIT_END 0x5163 + +#define _STARPU_FUT_WORKER_SCHEDULING_START 0x5164 +#define _STARPU_FUT_WORKER_SCHEDULING_END 0x5165 +#define _STARPU_FUT_WORKER_SCHEDULING_PUSH 0x5166 +#define _STARPU_FUT_WORKER_SCHEDULING_POP 0x5167 + +#define _STARPU_FUT_START_EXECUTING 0x5168 +#define _STARPU_FUT_END_EXECUTING 0x5169 + +#define _STARPU_FUT_SCHED_COMPONENT_NEW 0x516a +#define _STARPU_FUT_SCHED_COMPONENT_CONNECT 0x516b +#define _STARPU_FUT_SCHED_COMPONENT_PUSH 0x516c +#define _STARPU_FUT_SCHED_COMPONENT_PULL 0x516d + +#define _STARPU_FUT_TASK_SUBMIT_START 0x516e +#define _STARPU_FUT_TASK_SUBMIT_END 0x516f + +#define _STARPU_FUT_TASK_BUILD_START 0x5170 +#define _STARPU_FUT_TASK_BUILD_END 0x5171 + +#define _STARPU_FUT_TASK_MPI_DECODE_START 0x5172 +#define _STARPU_FUT_TASK_MPI_DECODE_END 0x5173 + +#define _STARPU_FUT_TASK_MPI_PRE_START 0x5174 +#define _STARPU_FUT_TASK_MPI_PRE_END 0x5175 + +#define _STARPU_FUT_TASK_MPI_POST_START 0x5176 +#define _STARPU_FUT_TASK_MPI_POST_END 0x5177 + +#define _STARPU_FUT_TASK_WAIT_START 0x5178 +#define _STARPU_FUT_TASK_WAIT_END 0x5179 + +#define _STARPU_FUT_TASK_WAIT_FOR_ALL_START 0x517a +#define _STARPU_FUT_TASK_WAIT_FOR_ALL_END 0x517b + +#define _STARPU_FUT_HANDLE_DATA_REGISTER 0x517c + +#define _STARPU_FUT_START_FETCH_INPUT 0x517e +#define _STARPU_FUT_END_FETCH_INPUT 0x517f + +#define _STARPU_FUT_TASK_THROTTLE_START 0x5180 +#define _STARPU_FUT_TASK_THROTTLE_END 0x5181 + +#define _STARPU_FUT_DATA_STATE_INVALID 0x5182 +#define _STARPU_FUT_DATA_STATE_OWNER 0x5183 +#define _STARPU_FUT_DATA_STATE_SHARED 0x5184 + +#define _STARPU_FUT_DATA_REQUEST_CREATED 0x5185 +#define _STARPU_FUT_PAPI_TASK_EVENT_VALUE 0x5186 +#define _STARPU_FUT_TASK_EXCLUDE_FROM_DAG 0x5187 + +#define _STARPU_FUT_TASK_END_DEP 0x5188 + +#ifdef STARPU_RECURSIVE_TASKS +#define _STARPU_FUT_RECURSIVE_TASK 0x5189 +#endif + +#define _STARPU_FUT_START_PARALLEL_SYNC 0x518a +#define _STARPU_FUT_END_PARALLEL_SYNC 0x518b + +/* Predefined FUT key masks */ +#define _STARPU_FUT_KEYMASK_META FUT_KEYMASK0 +#define _STARPU_FUT_KEYMASK_USER FUT_KEYMASK1 +#define _STARPU_FUT_KEYMASK_TASK FUT_KEYMASK2 +#define _STARPU_FUT_KEYMASK_TASK_VERBOSE FUT_KEYMASK3 +#define _STARPU_FUT_KEYMASK_DATA FUT_KEYMASK4 +#define _STARPU_FUT_KEYMASK_DATA_VERBOSE FUT_KEYMASK5 +#define _STARPU_FUT_KEYMASK_WORKER FUT_KEYMASK6 +#define _STARPU_FUT_KEYMASK_WORKER_VERBOSE FUT_KEYMASK7 +#define _STARPU_FUT_KEYMASK_DSM FUT_KEYMASK8 +#define _STARPU_FUT_KEYMASK_DSM_VERBOSE FUT_KEYMASK9 +#define _STARPU_FUT_KEYMASK_SCHED FUT_KEYMASK10 +#define _STARPU_FUT_KEYMASK_SCHED_VERBOSE FUT_KEYMASK11 +#define _STARPU_FUT_KEYMASK_LOCK FUT_KEYMASK12 +#define _STARPU_FUT_KEYMASK_LOCK_VERBOSE FUT_KEYMASK13 +#define _STARPU_FUT_KEYMASK_EVENT FUT_KEYMASK14 +#define _STARPU_FUT_KEYMASK_EVENT_VERBOSE FUT_KEYMASK15 +#define _STARPU_FUT_KEYMASK_MPI FUT_KEYMASK16 +#define _STARPU_FUT_KEYMASK_MPI_VERBOSE FUT_KEYMASK17 +#define _STARPU_FUT_KEYMASK_HYP FUT_KEYMASK18 +#define _STARPU_FUT_KEYMASK_HYP_VERBOSE FUT_KEYMASK19 +#define _STARPU_FUT_KEYMASK_TASK_VERBOSE_EXTRA FUT_KEYMASK20 +#define _STARPU_FUT_KEYMASK_MPI_VERBOSE_EXTRA FUT_KEYMASK21 +/* When doing modifications to keymasks: + * - also adapt _starpu_profile_get_user_keymask() in src/profiling/fxt/fxt.c + * - adapt KEYMASKALL_DEFAULT in src/profiling/fxt/fxt.c + * - adapt the documentation in 501_environment_variable.doxy and/or + * 380_offline_performance_tools.doxy */ + +extern unsigned long _starpu_job_cnt; + +static inline unsigned long _starpu_fxt_get_job_id(void) +{ + unsigned long ret = STARPU_ATOMIC_ADDL(&_starpu_job_cnt, 1); + STARPU_ASSERT_MSG(ret != 0, "Oops, job_id wrapped! There are too many tasks for tracking them for profiling"); + return ret; +} + +#ifdef STARPU_USE_FXT + +/* Some versions of FxT do not include the declaration of the function */ +#ifdef HAVE_ENABLE_FUT_FLUSH +#if !HAVE_DECL_ENABLE_FUT_FLUSH +void enable_fut_flush(); +#endif +#endif +#ifdef HAVE_FUT_SET_FILENAME +#if !HAVE_DECL_FUT_SET_FILENAME +void fut_set_filename(char *filename); +#endif +#endif + +extern int _starpu_fxt_started STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +extern int _starpu_fxt_willstart STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +extern starpu_pthread_mutex_t _starpu_fxt_started_mutex STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; +extern starpu_pthread_cond_t _starpu_fxt_started_cond STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; + +/** Wait until FXT is started (or not). Returns if FXT was started */ +static inline int _starpu_fxt_wait_initialisation() +{ + STARPU_PTHREAD_MUTEX_LOCK(&_starpu_fxt_started_mutex); + while (_starpu_fxt_willstart && !_starpu_fxt_started) + STARPU_PTHREAD_COND_WAIT(&_starpu_fxt_started_cond, &_starpu_fxt_started_mutex); + STARPU_PTHREAD_MUTEX_UNLOCK(&_starpu_fxt_started_mutex); + + return _starpu_fxt_started; +} + +extern unsigned long _starpu_submit_order; + +static inline unsigned long _starpu_fxt_get_submit_order(void) +{ + unsigned long ret = STARPU_ATOMIC_ADDL(&_starpu_submit_order, 1); + STARPU_ASSERT_MSG(_starpu_submit_order != 0, "Oops, submit_order wrapped! There are too many tasks for tracking them for profiling"); + return ret; +} + +int _starpu_generate_paje_trace_read_option(const char *option, struct starpu_fxt_options *options) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; + +/** Initialize the FxT library. */ +void _starpu_fxt_init_profiling(uint64_t trace_buffer_size); + +/** Stop the FxT library, and generate the trace file. */ +void _starpu_stop_fxt_profiling(void); + +/** In case we use MPI, tell the profiling system how many processes are used. */ +void _starpu_profiling_set_mpi_worldsize(int worldsize) STARPU_ATTRIBUTE_VISIBILITY_DEFAULT; + +/** Generate the trace file. Used when catching signals SIGINT and SIGSEGV */ +void _starpu_fxt_dump_file(void); + +#ifdef FUT_NEEDS_COMMIT +#define _STARPU_FUT_COMMIT(size) fut_commitstampedbuffer(size) +#else +#define _STARPU_FUT_COMMIT(size) do { } while (0) +#endif + +#ifdef FUT_RAW_ALWAYS_PROBE1STR +#define _STARPU_FUT_ALWAYS_PROBE1STR(CODE, P1, str) FUT_RAW_ALWAYS_PROBE1STR(CODE, P1, str) +#else +#define _STARPU_FUT_ALWAYS_PROBE1STR(CODE, P1, str) \ +do { \ + if(STARPU_UNLIKELY(fut_active)) { \ + /* No more than FXT_MAX_PARAMS args are allowed */ \ + /* we add a \0 just in case ... */ \ + size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 1)*sizeof(unsigned long));\ + unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ + unsigned nbargs = 1 + nbargs_str; \ + size_t total_len = FUT_SIZE(nbargs); \ + unsigned long *futargs = \ + fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ + *(futargs++) = (unsigned long)(P1); \ + snprintf((char *)futargs, len, "%s", str); \ + ((char *)futargs)[len - 1] = '\0'; \ + _STARPU_FUT_COMMIT(total_len); \ + }} while (0) +#endif + +#ifdef FUT_FULL_PROBE1STR +#define _STARPU_FUT_FULL_PROBE1STR(KEYMASK, CODE, P1, str) FUT_FULL_PROBE1STR(CODE, P1, str) +#else +/** Sometimes we need something a little more specific than the wrappers from + * FxT: these macro permit to put add an event with 3 (or 4) numbers followed + * by a string. */ +#define _STARPU_FUT_FULL_PROBE1STR(KEYMASK, CODE, P1, str) \ +do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + _STARPU_FUT_ALWAYS_PROBE1STR(CODE, P1, str); \ + } \ +} while (0) +#endif + +#ifdef FUT_ALWAYS_PROBE2STR +#define _STARPU_FUT_ALWAYS_PROBE2STR(CODE, P1, P2, str) FUT_RAW_ALWAYS_PROBE2STR(CODE, P1, P2, str) +#else +#define _STARPU_FUT_ALWAYS_PROBE2STR(CODE, P1, P2, str) \ +do { \ + /* No more than FXT_MAX_PARAMS args are allowed */ \ + /* we add a \0 just in case ... */ \ + size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 2)*sizeof(unsigned long));\ + unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ + unsigned nbargs = 2 + nbargs_str; \ + size_t total_len = FUT_SIZE(nbargs); \ + unsigned long *futargs = \ + fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ + *(futargs++) = (unsigned long)(P1); \ + *(futargs++) = (unsigned long)(P2); \ + snprintf((char *)futargs, len, "%s", str); \ + ((char *)futargs)[len - 1] = '\0'; \ + _STARPU_FUT_COMMIT(total_len); \ +} while (0) +#endif + +#ifdef FUT_FULL_PROBE2STR +#define _STARPU_FUT_FULL_PROBE2STR(KEYMASK, CODE, P1, P2, str) FUT_FULL_PROBE2STR(CODE, P1, P2, str) +#else +#define _STARPU_FUT_FULL_PROBE2STR(KEYMASK, CODE, P1, P2, str) \ +do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + _STARPU_FUT_ALWAYS_PROBE2STR(CODE, P1, P2, str); \ + } \ +} while (0) +#endif + +#ifdef FUT_ALWAYS_PROBE3STR +#define _STARPU_FUT_ALWAYS_PROBE3STR(CODE, P1, P2, P3, str) FUT_RAW_ALWAYS_PROBE3STR(CODE, P1, P2, P3, str) +#else +#define _STARPU_FUT_ALWAYS_PROBE3STR(CODE, P1, P2, P3, str) \ +do { \ + /* No more than FXT_MAX_PARAMS args are allowed */ \ + /* we add a \0 just in case ... */ \ + size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 3)*sizeof(unsigned long));\ + unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ + unsigned nbargs = 3 + nbargs_str; \ + size_t total_len = FUT_SIZE(nbargs); \ + unsigned long *futargs = \ + fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ + *(futargs++) = (unsigned long)(P1); \ + *(futargs++) = (unsigned long)(P2); \ + *(futargs++) = (unsigned long)(P3); \ + snprintf((char *)futargs, len, "%s", str); \ + ((char *)futargs)[len - 1] = '\0'; \ + _STARPU_FUT_COMMIT(total_len); \ +} while (0) +#endif + +#ifdef FUT_FULL_PROBE3STR +#define _STARPU_FUT_FULL_PROBE3STR(KEYMASK, CODE, P1, P2, P3, str) FUT_FULL_PROBE3STR(CODE, P1, P2, P3, str) +#else +#define _STARPU_FUT_FULL_PROBE3STR(KEYMASK, CODE, P1, P2, P3, str) \ +do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + _STARPU_FUT_ALWAYS_PROBE3STR(CODE, P1, P2, P3, str); \ + } \ +} while (0) +#endif + +#ifdef FUT_ALWAYS_PROBE4STR +#define _STARPU_FUT_ALWAYS_PROBE4STR(CODE, P1, P2, P3, P4, str) FUT_RAW_ALWAYS_PROBE4STR(CODE, P1, P2, P3, P4, str) +#else +#define _STARPU_FUT_ALWAYS_PROBE4STR(CODE, P1, P2, P3, P4, str) \ +do { \ + /* No more than FXT_MAX_PARAMS args are allowed */ \ + /* we add a \0 just in case ... */ \ + size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 4)*sizeof(unsigned long));\ + unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ + unsigned nbargs = 4 + nbargs_str; \ + size_t total_len = FUT_SIZE(nbargs); \ + unsigned long *futargs = \ + fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ + *(futargs++) = (unsigned long)(P1); \ + *(futargs++) = (unsigned long)(P2); \ + *(futargs++) = (unsigned long)(P3); \ + *(futargs++) = (unsigned long)(P4); \ + snprintf((char *)futargs, len, "%s", str); \ + ((char *)futargs)[len - 1] = '\0'; \ + _STARPU_FUT_COMMIT(total_len); \ +} while (0) +#endif + +#ifdef FUT_FULL_PROBE4STR +#define _STARPU_FUT_FULL_PROBE4STR(KEYMASK, CODE, P1, P2, P3, P4, str) FUT_FULL_PROBE4STR(CODE, P1, P2, P3, P4, str) +#else +#define _STARPU_FUT_FULL_PROBE4STR(KEYMASK, CODE, P1, P2, P3, P4, str) \ +do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + _STARPU_FUT_ALWAYS_PROBE4STR(CODE, P1, P2, P3, P4, str); \ + } \ +} while (0) +#endif + +#ifdef FUT_ALWAYS_PROBE5STR +#define _STARPU_FUT_ALWAYS_PROBE5STR(CODE, P1, P2, P3, P4, P5, str) FUT_RAW_ALWAYS_PROBE5STR(CODE, P1, P2, P3, P4, P5, str) +#else +#define _STARPU_FUT_ALWAYS_PROBE5STR(CODE, P1, P2, P3, P4, P5, str) \ +do { \ + /* No more than FXT_MAX_PARAMS args are allowed */ \ + /* we add a \0 just in case ... */ \ + size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 5)*sizeof(unsigned long));\ + unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ + unsigned nbargs = 5 + nbargs_str; \ + size_t total_len = FUT_SIZE(nbargs); \ + unsigned long *futargs = \ + fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ + *(futargs++) = (unsigned long)(P1); \ + *(futargs++) = (unsigned long)(P2); \ + *(futargs++) = (unsigned long)(P3); \ + *(futargs++) = (unsigned long)(P4); \ + *(futargs++) = (unsigned long)(P5); \ + snprintf((char *)futargs, len, "%s", str); \ + ((char *)futargs)[len - 1] = '\0'; \ + _STARPU_FUT_COMMIT(total_len); \ +} while (0) +#endif + +#ifdef FUT_FULL_PROBE5STR +#define _STARPU_FUT_FULL_PROBE5STR(KEYMASK, CODE, P1, P2, P3, P4, P5, str) FUT_FULL_PROBE5STR(CODE, P1, P2, P3, P4, P5, str) +#else +#define _STARPU_FUT_FULL_PROBE5STR(KEYMASK, CODE, P1, P2, P3, P4, P5, str) \ +do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + _STARPU_FUT_ALWAYS_PROBE5STR(CODE, P1, P2, P3, P4, P5, str); \ + } \ +} while (0) +#endif + +#ifdef FUT_ALWAYS_PROBE6STR +#define _STARPU_FUT_ALWAYS_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str) FUT_RAW_ALWAYS_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str) +#else +#define _STARPU_FUT_ALWAYS_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str) \ +do { \ + /* No more than FXT_MAX_PARAMS args are allowed */ \ + /* we add a \0 just in case ... */ \ + size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 6)*sizeof(unsigned long));\ + unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ + unsigned nbargs = 6 + nbargs_str; \ + size_t total_len = FUT_SIZE(nbargs); \ + unsigned long *futargs = \ + fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ + *(futargs++) = (unsigned long)(P1); \ + *(futargs++) = (unsigned long)(P2); \ + *(futargs++) = (unsigned long)(P3); \ + *(futargs++) = (unsigned long)(P4); \ + *(futargs++) = (unsigned long)(P5); \ + *(futargs++) = (unsigned long)(P6); \ + snprintf((char *)futargs, len, "%s", str); \ + ((char *)futargs)[len - 1] = '\0'; \ + _STARPU_FUT_COMMIT(total_len); \ +} while (0) +#endif + +#ifdef FUT_FULL_PROBE6STR +#define _STARPU_FUT_FULL_PROBE6STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, str) FUT_FULL_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str) +#else +#define _STARPU_FUT_FULL_PROBE6STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, str) \ +do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + _STARPU_FUT_ALWAYS_PROBE6STR(CODE, P1, P2, P3, P4, P5, P6, str); \ + } \ +} while (0) +#endif + +#ifdef FUT_ALWAYS_PROBE7STR +#define _STARPU_FUT_ALWAYS_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str) FUT_RAW_ALWAYS_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str) +#else +#define _STARPU_FUT_ALWAYS_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str) \ +do { \ + /* No more than FXT_MAX_PARAMS args are allowed */ \ + /* we add a \0 just in case ... */ \ + size_t len = STARPU_MIN(strlen(str)+1, (FXT_MAX_PARAMS - 7)*sizeof(unsigned long));\ + unsigned nbargs_str = (len + sizeof(unsigned long) - 1)/(sizeof(unsigned long));\ + unsigned nbargs = 7 + nbargs_str; \ + size_t total_len = FUT_SIZE(nbargs); \ + unsigned long *futargs = \ + fut_getstampedbuffer(FUT_CODE(CODE, nbargs), total_len);\ + *(futargs++) = (unsigned long)(P1); \ + *(futargs++) = (unsigned long)(P2); \ + *(futargs++) = (unsigned long)(P3); \ + *(futargs++) = (unsigned long)(P4); \ + *(futargs++) = (unsigned long)(P5); \ + *(futargs++) = (unsigned long)(P6); \ + *(futargs++) = (unsigned long)(P7); \ + snprintf((char *)futargs, len, "%s", str); \ + ((char *)futargs)[len - 1] = '\0'; \ + _STARPU_FUT_COMMIT(total_len); \ +} while (0) +#endif + +#ifdef FUT_FULL_PROBE7STR +#define _STARPU_FUT_FULL_PROBE7STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, P7, str) FUT_FULL_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str) +#else +#define _STARPU_FUT_FULL_PROBE7STR(KEYMASK, CODE, P1, P2, P3, P4, P5, P6, P7, str) \ +do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + _STARPU_FUT_ALWAYS_PROBE7STR(CODE, P1, P2, P3, P4, P5, P6, P7, str); \ + } \ +} while (0) +#endif + +#ifndef FUT_RAW_PROBE7 +#define FUT_RAW_PROBE7(CODE,P1,P2,P3,P4,P5,P6,P7) do { \ + if(STARPU_UNLIKELY(fut_active)) { \ + unsigned long *__args __attribute__((unused))= \ + fut_getstampedbuffer(CODE, \ + FUT_SIZE(7)); \ + *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4);*(__args++)=(unsigned long)(P5);*(__args++)=(unsigned long)(P6);*(__args++)=(unsigned long)(P7); \ + _STARPU_FUT_COMMIT(FUT_SIZE(7)); \ + } \ + } while (0) +#endif + +#ifndef FUT_RAW_ALWAYS_PROBE1 +#define FUT_RAW_ALWAYS_PROBE1(CODE,P1) do { \ + unsigned long *__args __attribute__((unused))= \ + fut_getstampedbuffer(CODE, \ + FUT_SIZE(1)); \ + *(__args++)=(unsigned long)(P1); \ + fut_commitstampedbuffer(FUT_SIZE(1)); \ + } while (0) +#endif +#define FUT_DO_ALWAYS_PROBE1(CODE,P1) do { \ + FUT_RAW_ALWAYS_PROBE1(FUT_CODE(CODE, 1),P1); \ +} while (0) + +#ifndef FUT_RAW_ALWAYS_PROBE2 +#define FUT_RAW_ALWAYS_PROBE2(CODE,P1,P2) do { \ + unsigned long *__args __attribute__((unused))= \ + fut_getstampedbuffer(CODE, \ + FUT_SIZE(2)); \ + *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2); \ + fut_commitstampedbuffer(FUT_SIZE(2)); \ + } while (0) +#endif +#define FUT_DO_ALWAYS_PROBE2(CODE,P1,P2) do { \ + FUT_RAW_ALWAYS_PROBE2(FUT_CODE(CODE, 2),P1,P2); \ +} while (0) + +#ifndef FUT_RAW_ALWAYS_PROBE3 +#define FUT_RAW_ALWAYS_PROBE3(CODE,P1,P2,P3) do { \ + unsigned long *__args __attribute__((unused))= \ + fut_getstampedbuffer(CODE, \ + FUT_SIZE(3)); \ + *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3); \ + fut_commitstampedbuffer(FUT_SIZE(3)); \ + } while (0) +#endif +#define FUT_DO_ALWAYS_PROBE3(CODE,P1,P2,P3) do { \ + FUT_RAW_ALWAYS_PROBE3(FUT_CODE(CODE, 3),P1,P2,P3); \ +} while (0) + +#ifndef FUT_RAW_ALWAYS_PROBE4 +#define FUT_RAW_ALWAYS_PROBE4(CODE,P1,P2,P3,P4) do { \ + unsigned long *__args __attribute__((unused))= \ + fut_getstampedbuffer(CODE, \ + FUT_SIZE(4)); \ + *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4); \ + fut_commitstampedbuffer(FUT_SIZE(4)); \ + } while (0) +#endif +#define FUT_DO_ALWAYS_PROBE4(CODE,P1,P2,P3,P4) do { \ + FUT_RAW_ALWAYS_PROBE4(FUT_CODE(CODE, 4),P1,P2,P3,P4); \ +} while (0) + +#ifndef FUT_RAW_ALWAYS_PROBE5 +#define FUT_RAW_ALWAYS_PROBE5(CODE,P1,P2,P3,P4,P5) do { \ + unsigned long *__args __attribute__((unused))= \ + fut_getstampedbuffer(CODE, \ + FUT_SIZE(5)); \ + *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4);*(__args++)=(unsigned long)(P5); \ + fut_commitstampedbuffer(FUT_SIZE(5)); \ + } while (0) +#endif +#define FUT_DO_ALWAYS_PROBE5(CODE,P1,P2,P3,P4,P5) do { \ + FUT_RAW_ALWAYS_PROBE5(FUT_CODE(CODE, 5),P1,P2,P3,P4,P5); \ +} while (0) + +#ifndef FUT_RAW_ALWAYS_PROBE6 +#define FUT_RAW_ALWAYS_PROBE6(CODE,P1,P2,P3,P4,P5,P6) do { \ + unsigned long *__args __attribute__((unused))= \ + fut_getstampedbuffer(CODE, \ + FUT_SIZE(6)); \ + *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4);*(__args++)=(unsigned long)(P5);*(__args++)=(unsigned long)(P6); \ + fut_commitstampedbuffer(FUT_SIZE(6)); \ + } while (0) +#endif +#define FUT_DO_ALWAYS_PROBE6(CODE,P1,P2,P3,P4,P5,P6) do { \ + FUT_RAW_ALWAYS_PROBE6(FUT_CODE(CODE, 6),P1,P2,P3,P4,P5,P6); \ +} while (0) + +#ifndef FUT_RAW_ALWAYS_PROBE7 +#define FUT_RAW_ALWAYS_PROBE7(CODE,P1,P2,P3,P4,P5,P6,P7) do { \ + unsigned long *__args __attribute__((unused))= \ + fut_getstampedbuffer(CODE, \ + FUT_SIZE(7)); \ + *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4);*(__args++)=(unsigned long)(P5);*(__args++)=(unsigned long)(P6);*(__args++)=(unsigned long)(P7); \ + fut_commitstampedbuffer(FUT_SIZE(7)); \ + } while (0) +#endif +#define FUT_DO_ALWAYS_PROBE7(CODE,P1,P2,P3,P4,P5,P6,P7) do { \ + FUT_RAW_ALWAYS_PROBE7(FUT_CODE(CODE, 7),P1,P2,P3,P4,P5,P6,P7); \ +} while (0) + +#ifndef FUT_RAW_ALWAYS_PROBE8 +#define FUT_RAW_ALWAYS_PROBE8(CODE,P1,P2,P3,P4,P5,P6,P7,P8) do { \ + unsigned long *__args __attribute__((unused))= \ + fut_getstampedbuffer(CODE, \ + FUT_SIZE(8)); \ + *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4);*(__args++)=(unsigned long)(P5);*(__args++)=(unsigned long)(P6);*(__args++)=(unsigned long)(P7);*(__args++)=(unsigned long)(P8); \ + fut_commitstampedbuffer(FUT_SIZE(8)); \ + } while (0) +#endif +#define FUT_DO_ALWAYS_PROBE8(CODE,P1,P2,P3,P4,P5,P6,P7,P8) do { \ + FUT_RAW_ALWAYS_PROBE8(FUT_CODE(CODE, 8),P1,P2,P3,P4,P5,P6,P7,P8); \ +} while (0) + +#ifndef FUT_RAW_ALWAYS_PROBE9 +#define FUT_RAW_ALWAYS_PROBE9(CODE,P1,P2,P3,P4,P5,P6,P7,P8,P9) do { \ + unsigned long *__args __attribute__((unused))= \ + fut_getstampedbuffer(CODE, \ + FUT_SIZE(9)); \ + *(__args++)=(unsigned long)(P1);*(__args++)=(unsigned long)(P2);*(__args++)=(unsigned long)(P3);*(__args++)=(unsigned long)(P4);*(__args++)=(unsigned long)(P5);*(__args++)=(unsigned long)(P6);*(__args++)=(unsigned long)(P7);*(__args++)=(unsigned long)(P8);*(__args++)=(unsigned long)(P9); \ + fut_commitstampedbuffer(FUT_SIZE(9)); \ + } while (0) +#endif +#define FUT_DO_ALWAYS_PROBE9(CODE,P1,P2,P3,P4,P5,P6,P7,P8,P9) do { \ + FUT_RAW_ALWAYS_PROBE9(FUT_CODE(CODE, 9),P1,P2,P3,P4,P5,P6,P7,P8,P9); \ +} while (0) + +/* full probes */ +#ifndef FUT_FULL_PROBE0 +#define FUT_FULL_PROBE0(KEYMASK,CODE) do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + FUT_RAW_ALWAYS_PROBE0(FUT_CODE(CODE, 0)); \ + } \ +} while(0) +#endif + +#ifndef FUT_FULL_PROBE1 +#define FUT_FULL_PROBE1(KEYMASK,CODE,P1) do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + FUT_RAW_ALWAYS_PROBE1(FUT_CODE(CODE, 1),P1); \ + } \ +} while(0) +#endif + +#ifndef FUT_FULL_PROBE2 +#define FUT_FULL_PROBE2(KEYMASK,CODE,P1,P2) do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + FUT_RAW_ALWAYS_PROBE2(FUT_CODE(CODE, 2),P1,P2); \ + } \ +} while(0) +#endif + +#ifndef FUT_FULL_PROBE3 +#define FUT_FULL_PROBE3(KEYMASK,CODE,P1,P2,P3) do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + FUT_RAW_ALWAYS_PROBE3(FUT_CODE(CODE, 3),P1,P2,P3); \ + } \ +} while(0) +#endif + +#ifndef FUT_FULL_PROBE4 +#define FUT_FULL_PROBE4(KEYMASK,CODE,P1,P2,P3,P4) do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + FUT_RAW_ALWAYS_PROBE4(FUT_CODE(CODE, 4),P1,P2,P3,P4); \ + } \ +} while(0) +#endif + +#ifndef FUT_FULL_PROBE5 +#define FUT_FULL_PROBE5(KEYMASK,CODE,P1,P2,P3,P4,P5) do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + FUT_RAW_ALWAYS_PROBE5(FUT_CODE(CODE, 5),P1,P2,P3,P4,P5); \ + } \ +} while(0) +#endif + +#ifndef FUT_FULL_PROBE6 +#define FUT_FULL_PROBE6(KEYMASK,CODE,P1,P2,P3,P4,P5,P6) do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + FUT_RAW_ALWAYS_PROBE6(FUT_CODE(CODE, 6),P1,P2,P3,P4,P5,P6); \ + } \ +} while(0) +#endif + +#ifndef FUT_FULL_PROBE7 +#define FUT_FULL_PROBE7(KEYMASK,CODE,P1,P2,P3,P4,P5,P6,P7) do { \ + if (STARPU_UNLIKELY(KEYMASK & fut_active)) { \ + FUT_RAW_ALWAYS_PROBE7(FUT_CODE(CODE, 7),P1,P2,P3,P4,P5,P6,P7); \ + } \ +} while(0) +#endif + +#ifndef FUT_FULL_PROBE8 +#define FUT_FULL_PROBE8(KEYMASK,CODE,P1,P2,P3,P4,P5,P6,P7,P8) do { \ + if(KEYMASK & fut_active) { \ + FUT_RAW_ALWAYS_PROBE8(FUT_CODE(CODE, 8),P1,P2,P3,P4,P5,P6,P7,P8); \ + } \ +} while(0) +#endif + +#ifndef FUT_FULL_PROBE9 +#define FUT_FULL_PROBE9(KEYMASK,CODE,P1,P2,P3,P4,P5,P6,P7,P8,P9) do { \ + if(KEYMASK & fut_active) { \ + FUT_RAW_ALWAYS_PROBE9(FUT_CODE(CODE, 9),P1,P2,P3,P4,P5,P6,P7,P8,P9); \ + } \ +} while(0) +#endif + + + + + + + + +/* TODO: the following macros are never called + * -> shall we remove them ? + */ + +#endif // STARPU_USE_FXT + +#pragma GCC visibility pop + +#endif // __FXT_H__ diff --git a/src/profiling/profiling.c b/src/profiling/profiling.c index 554745a9db..d58e769e30 100644 --- a/src/profiling/profiling.c +++ b/src/profiling/profiling.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #ifdef STARPU_PAPI @@ -116,7 +116,7 @@ int starpu_profiling_status_set(int status) _starpu_profiling = status; ANNOTATE_HAPPENS_BEFORE(&_starpu_profiling); - _STARPU_TRACE_SET_PROFILING(status); + _starpu_trace_set_profiling(status); /* If we enable profiling, we reset the counters. */ if (status == STARPU_PROFILING_ENABLE) @@ -240,7 +240,7 @@ void _starpu_profiling_papi_task_stop_counters(struct starpu_task *task) PAPI_stop(profiling_info->papi_event_set, profiling_info->papi_values); for(i=0; ipapi_values[i]); + _starpu_trace_papi_task_event(papi_events[i], task, profiling_info->papi_values[i]); } PAPI_cleanup_eventset(profiling_info->papi_event_set); PAPI_destroy_eventset(&profiling_info->papi_event_set); diff --git a/src/profiling/starpu_tracing.c b/src/profiling/starpu_tracing.c new file mode 100644 index 0000000000..2c3a00b9e2 --- /dev/null +++ b/src/profiling/starpu_tracing.c @@ -0,0 +1,2313 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria, Télécom SudParis + * Copyright (C) 2023-2025 École de Technologie Supérieure (ETS, Montréal) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ +#include +#include +#include +#include +#include +#include + +#include "starpu_tracing.h" + +extern struct _starpu_machine_config _starpu_config; + +int _starpu_trace_initialize() +{ +#ifdef STARPU_USE_FXT + _starpu_fxt_init_profiling(_starpu_config.conf.trace_buffer_size); +#endif + +#ifdef STARPU_PROF_TOOL + if(starpu_prof_tool_callbacks.starpu_prof_tool_event_init) + { + struct starpu_prof_tool_info pi; + + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_init, 0, 0, starpu_prof_tool_driver_cpu, -1, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_init(&pi, NULL, NULL); + } +#endif + +#ifdef STARPU_PROF_TASKSTUBS + TASKTIMER_INITIALIZE(); +#endif + return 0; +} + +int _starpu_trace_finalize() +{ +#ifdef STARPU_USE_FXT + _starpu_stop_fxt_profiling(); +#endif + +#ifdef STARPU_PROF_TOOL + if(starpu_prof_tool_callbacks.starpu_prof_tool_event_terminate) + { + struct starpu_prof_tool_info pi; + pi = _starpu_prof_tool_get_info_init(starpu_prof_tool_event_terminate, 0, starpu_prof_tool_driver_cpu, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_terminate(&pi, NULL, NULL); + } +#endif + +#ifdef STARPU_PROF_TASKSTUBS + TASKTIMER_FINALIZE(); +#endif + + return 0; +} + +int _starpu_trace_initialize_begin() +{ +#ifdef STARPU_PROF_TOOL + if(starpu_prof_tool_callbacks.starpu_prof_tool_event_init_begin) + { + struct starpu_prof_tool_info pi; + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_init_begin, 0, 0, starpu_prof_tool_driver_cpu, -1, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_init_begin(&pi, NULL, NULL); + } +#endif + return 0; +} + + +/** + * A new memory node is registered. + * \p nodeid is the id of the new node. + */ +int _starpu_trace_new_mem_node(int nodeid STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + if (_starpu_fxt_started) + FUT_DO_ALWAYS_PROBE2(_STARPU_FUT_NEW_MEM_NODE, nodeid, _starpu_gettid()); +#endif + return 0; +} + +/** + * A new worker thread is registered. + * \p is the bind id the driver bound to (logical index). + */ +int _starpu_trace_register_thread(int bindid STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + if (_starpu_fxt_started) + FUT_DO_ALWAYS_PROBE2(FUT_NEW_LWP_CODE, bindid, _starpu_gettid()); +#endif + return 0; +} + +int _starpu_trace_worker_initialize() +{ + return 0; +} + +int _starpu_trace_worker_finalize() +{ + return 0; +} + +/** + * A worker has started its shutdown process. + */ +int _starpu_trace_worker_deinit_start() +{ +#ifdef STARPU_USE_FXT + if (_starpu_fxt_started) + FUT_DO_ALWAYS_PROBE1(_STARPU_FUT_WORKER_DEINIT_START, _starpu_gettid()); +#endif + + return 0; +} + +/** + * A worker has completed its shutdown process. + * \p workerkind is the worker id shut down. + */ +int _starpu_trace_worker_deinit_end(unsigned workerid, enum starpu_worker_archtype workerkind) +{ +#ifdef STARPU_USE_FXT + if (_starpu_fxt_started) + FUT_DO_ALWAYS_PROBE2(_STARPU_FUT_WORKER_DEINIT_END, _STARPU_FUT_WORKER_KEY(workerkind), _starpu_gettid()); +#endif + +#ifdef STARPU_PROF_TOOL + if(starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_deinit) + { + enum starpu_prof_tool_driver_type drivertype; + switch(workerkind) + { + case STARPU_CPU_WORKER: drivertype = starpu_prof_tool_driver_cpu; break; + case STARPU_CUDA_WORKER: drivertype = starpu_prof_tool_driver_gpu; break; + case STARPU_OPENCL_WORKER: drivertype = starpu_prof_tool_driver_ocl; break; + case STARPU_HIP_WORKER: drivertype = starpu_prof_tool_driver_hip; break; + default: drivertype = starpu_prof_tool_driver_cpu; break; + } + + struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_deinit, workerid, workerid, drivertype, -1, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_deinit(&pi, NULL, NULL); + } +#endif + return 0; +} + +int _starpu_trace_start_executing(struct _starpu_job *j, struct starpu_task *worker_task, struct _starpu_worker *worker, void* func) +{ +#ifdef STARPU_USE_FXT + /** + * The execution of the job starts at the device driver level. + * \p job is the job instance. + */ + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_START_EXECUTING, _starpu_gettid(), (j)->job_id); +#endif + +#ifdef STARPU_PROF_TOOL + starpu_prof_tool_cb_func callback = NULL; + /* First, we need to find if the worker is a cpu, a gpu, etc. */ + switch(worker->arch) + { + case STARPU_CPU_WORKER: + callback = starpu_prof_tool_callbacks.starpu_prof_tool_event_start_cpu_exec; + break; + case STARPU_CUDA_WORKER: + case STARPU_HIP_WORKER: + case STARPU_OPENCL_WORKER: + callback = starpu_prof_tool_callbacks.starpu_prof_tool_event_start_gpu_exec; + break; + default: + callback = NULL; + } + + if(callback) + { + struct starpu_prof_tool_info pi; + int devid = worker->devid; + + enum starpu_prof_tool_event event_type = starpu_prof_tool_event_start_cpu_exec; + enum starpu_prof_tool_driver_type driver_type = starpu_prof_tool_driver_cpu; + switch(worker->arch) + { + case STARPU_CPU_WORKER: + event_type = starpu_prof_tool_event_start_cpu_exec; + driver_type = starpu_prof_tool_driver_cpu; + break; + case STARPU_CUDA_WORKER: + case STARPU_HIP_WORKER: + case STARPU_OPENCL_WORKER: + event_type = starpu_prof_tool_event_start_gpu_exec; + driver_type = starpu_prof_tool_driver_gpu; + break; + default: + goto out; + } + + pi = _starpu_prof_tool_get_info(event_type, devid, worker_task->workerid, driver_type, -1, func); + pi.model_name = _starpu_job_get_model_name(j); + pi.task_name = _starpu_job_get_task_name(j); + + callback(&pi, NULL, NULL); + out: + ; + } +#endif + +#ifdef STARPU_PROF_TASKSTUBS + + unsigned long tid = j->job_id; + char* name = NULL; + /* a timer should have been created when the task was submitted */ + if(NULL == j->ps_task_timer) + { +/* j-> job_successors: list of all the completion groups that depend on the job */ + tasktimer_argument_value_t args[1]; + args[0].type = TASKTIMER_LONG_INTEGER_TYPE; + args[0].l_value = tid; + + uint64_t* parents = NULL; + uint64_t myguid = tid; + + if(NULL != j->task->name) + { + name = j->task->name; + } + else + { + asprintf(&name, "%s %p", "UNRESOLVED ADDR", func); + // TODO memory leak here + } + + TASKTIMER_CREATE(func, name, myguid, parents, j->job_successors.ndeps, tt); + j->ps_task_timer = tt; + + for (int i = 0; i < j->job_successors.ndeps; i++) + { + TASKTIMER_ADD_PARENTS(j->ps_task_timer, ((struct _starpu_job*)(j->job_successors.deps[i]->deps))->job_id, 1); + } + + } + + tasktimer_execution_space_t resource; + resource.type = TASKTIMER_DEVICE_CPU;/* tmp until I find what to put here */ + resource.device_id = 0; + resource.instance_id = _starpu_gettid; + + TASKTIMER_START(j->ps_task_timer, &resource); + +// if(NULL != name) free(name); + #endif + return 0; +} + +/** + * The execution of the job has been completed at the device driver level. + * \p job is the job instance. + */ +int _starpu_trace_end_executing(struct _starpu_job *job, struct _starpu_worker *worker) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_END_EXECUTING, _starpu_gettid(), (job)->job_id); +#endif + +#ifdef STARPU_PROF_TOOL + starpu_prof_tool_cb_func callback = NULL; + /* First, we need to find if the worker is a cpu, a gpu, etc. */ + switch(worker->arch) + { + case STARPU_CPU_WORKER: + callback = starpu_prof_tool_callbacks.starpu_prof_tool_event_end_cpu_exec; + break; + case STARPU_CUDA_WORKER: + case STARPU_HIP_WORKER: + case STARPU_OPENCL_WORKER: + callback = starpu_prof_tool_callbacks.starpu_prof_tool_event_end_gpu_exec; + break; + default: + goto out; + } + + if(callback) + { + struct starpu_task *worker_task = job->task; + struct starpu_codelet *cl = worker_task->cl; + // crash here + void* func = _starpu_task_get_cpu_nth_implementation(cl, job->nimpl); + + struct starpu_prof_tool_info pi; +// int devid = cpu_args->devid; + // how do I get this? In the driver it is cpu_args->devid + int devid = -1; + enum starpu_prof_tool_driver_type driver_type; + enum starpu_prof_tool_event event_type; + + switch(worker->arch) + { + case STARPU_CPU_WORKER: + event_type = starpu_prof_tool_event_end_cpu_exec; + driver_type = starpu_prof_tool_driver_cpu; + break; + case STARPU_CUDA_WORKER: + case STARPU_HIP_WORKER: + case STARPU_OPENCL_WORKER: + event_type = starpu_prof_tool_event_end_gpu_exec; + driver_type = starpu_prof_tool_driver_gpu; + break; + default: + goto out; + } + + pi = _starpu_prof_tool_get_info(event_type, devid, worker->workerid, driver_type, -1, func); + pi.model_name = _starpu_job_get_model_name(job); + pi.task_name = _starpu_job_get_task_name(job); + + callback(&pi, NULL, NULL); + } + out: + ; +#endif + +#ifdef STARPU_PROF_TASKSTUBS + TASKTIMER_STOP(job->ps_task_timer); + #endif + + return 0; +} + +/** + * The execution of a codelet implementation routine of a task instance has been started. + * \p job is the job instance. + * \p nimpl is the routine implementation number in the codelet routines list for the worker architecture. + * \p perf_arch is the performance model structure for the codelet on the worker architecture. + * \p workerid is the id of the worker. + * \p rank is the instance rank in a parallel team of workers in the case of a parallel task, or 0 for a sequential task. + */ +int _starpu_trace_start_codelet_body(struct _starpu_job *job STARPU_ATTRIBUTE_UNUSED, int nimpl STARPU_ATTRIBUTE_UNUSED, struct starpu_perfmodel_arch* perf_arch STARPU_ATTRIBUTE_UNUSED, int workerid STARPU_ATTRIBUTE_UNUSED, int rank STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK|_STARPU_FUT_KEYMASK_TASK_VERBOSE|_STARPU_FUT_KEYMASK_DATA|_STARPU_FUT_KEYMASK_TASK_VERBOSE_EXTRA) & fut_active)) + { + int mem_node = workerid == -1 ? -1 : (int)starpu_worker_get_memory_node(workerid); + int codelet_null = job->task->cl == NULL; + int nowhere = (job->task->where == STARPU_NOWHERE) || (job->task->cl != NULL && job->task->cl->where == STARPU_NOWHERE); + enum starpu_node_kind kind = workerid == -1 ? STARPU_UNUSED : starpu_worker_get_memory_node_kind(starpu_worker_get_type(workerid)); + FUT_FULL_PROBE6(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_START_CODELET_BODY, job->job_id, (job->task)->sched_ctx, workerid, mem_node, _starpu_gettid(), (codelet_null == 1 || nowhere == 1)); + if (rank == 0 && job->task->cl) + { + const int __nbuffers = STARPU_TASK_GET_NBUFFERS(job->task); + char __buf[FXT_MAX_PARAMS*sizeof(long)]; + int __i; + for (__i = 0; __i < __nbuffers; __i++) + { + starpu_data_handle_t __handle = STARPU_TASK_GET_HANDLE(job->task, __i); + void *__interface = _STARPU_TASK_GET_INTERFACES(job->task)[__i]; + if (__interface && __handle->ops->describe) + { + __handle->ops->describe(__interface, __buf, sizeof(__buf)); + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_DATA, _STARPU_FUT_CODELET_DATA, workerid, _starpu_gettid(), __buf); + } + FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_CODELET_DATA_HANDLE, job->job_id, (__handle), _starpu_data_get_size(__handle), STARPU_TASK_GET_MODE(job->task, __i)); + /* Regarding the memory location: + * - if the data interface doesn't provide to_pointer operation, NULL will be returned + * and the location will be -1, which is fine; + * - we have to check whether the memory is on an actual NUMA node (and not on GPU + * memory, for instance); + * - looking at memory location before executing the task isn't the best choice: + * the page can be not allocated yet. A solution would be to get the memory + * location at the end of the task, but there is no FxT probe where we iterate over + * handles, after task execution. + * */ + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_TASK_VERBOSE_EXTRA, _STARPU_FUT_CODELET_DATA_HANDLE_NUMA_ACCESS, job->job_id, (__i), kind == STARPU_CPU_RAM && starpu_task_get_current_data_node(__i) >= 0 ? starpu_get_memory_location_bitmap(starpu_data_handle_to_pointer(__handle, (unsigned) starpu_task_get_current_data_node(__i)), starpu_data_get_size(__handle)) : -1); + } + } + if (!(codelet_null == 1 || nowhere == 1)) + { + const size_t __job_size = (perf_arch == NULL) ? 0 : _starpu_job_get_data_size(job->task->cl?job->task->cl->model:NULL, perf_arch, nimpl, job); + const uint32_t __job_hash = (perf_arch == NULL) ? 0 : _starpu_compute_buffers_footprint(job->task->cl?job->task->cl->model:NULL, perf_arch, nimpl, job); + FUT_FULL_PROBE8(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_CODELET_DETAILS, (job->task)->sched_ctx, __job_size, __job_hash, job->task->flops / 1000 / (job->task->cl && job->task->cl->type != STARPU_SEQ ? job->task_size : 1), job->task->tag_id, workerid, (job->job_id), _starpu_gettid()); + } + } + +#endif + + return 0; +} + +/** + * The execution of a codelet implementation routine of a task instance has been completed. + * \p job is the job instance. + * \p nimpl is the routine implementation number in the codelet routines list for the worker architecture. + * \p perf_arch is the performance model structure for the codelet on the worker architecture. + * \p workerid is the id of the worker. + * \p rank is the instance rank in a parallel team of workers in the case of a parallel task, or 0 for a sequential task. + */ +int _starpu_trace_end_codelet_body(struct _starpu_job *job STARPU_ATTRIBUTE_UNUSED, unsigned nimpl STARPU_ATTRIBUTE_UNUSED, struct starpu_perfmodel_arch* perf_arch STARPU_ATTRIBUTE_UNUSED, int workerid STARPU_ATTRIBUTE_UNUSED, int rank STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active)) + { + const size_t job_size = (perf_arch == NULL) ? 0 : _starpu_job_get_data_size(job->task->cl?job->task->cl->model:NULL, perf_arch, nimpl, job); + const uint32_t job_hash = (perf_arch == NULL) ? 0 : _starpu_compute_buffers_footprint(job->task->cl?job->task->cl->model:NULL, perf_arch, nimpl, job); + char _archname[32]=""; + if (perf_arch) starpu_perfmodel_get_arch_name(perf_arch, _archname, 32, 0); + int nowhere = (job->task->where == STARPU_NOWHERE) || (job->task->cl != NULL && job->task->cl->where == STARPU_NOWHERE); + int codelet_null = job->task->cl == NULL; + _STARPU_FUT_FULL_PROBE6STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_END_CODELET_BODY, job->job_id, (job_size), (job_hash), workerid, _starpu_gettid(), (codelet_null == 1 || nowhere == 1), _archname); + } +#endif + + return 0; +} + +/** + * A parallel team member of a parallel task has completed the task execution for its rank and enters the ending team synchronization barrier. + * \p job is the job instance. + */ +int _starpu_trace_start_parallel_sync(struct _starpu_job *job STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_START_PARALLEL_SYNC, _starpu_gettid(), (job)->job_id); +#endif + return 0; +} + +/** + * A parallel team member of a parallel task has crossed the ending team synchronization barrier. + * \p job is the job instance. + */ +int _starpu_trace_end_parallel_sync(struct _starpu_job *job STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_END_PARALLEL_SYNC, _starpu_gettid(), (job)->job_id); +#endif + return 0; +} + +/** + * The execution of a user callback associated to a task has been started. + * \p job is the job instance. + */ +int _starpu_trace_start_callback(struct _starpu_job *job STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_START_CALLBACK, job, _starpu_gettid()); +#endif + return 0; +} + +/** + * The execution of a user callback associated to a task has been completed. + * \p job is the job instance. + */ +int _starpu_trace_end_callback(struct _starpu_job *job STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_END_CALLBACK, job, _starpu_gettid()); +#endif + return 0; +} + +/** + * A task is pushed to a worker. + * \p task is the task instance. + * \p prio is the priority of the task instance. + */ +int _starpu_trace_job_push(struct starpu_task* task STARPU_ATTRIBUTE_UNUSED, int prio STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_JOB_PUSH, _starpu_get_job_associated_to_task(task)->job_id, prio, _starpu_gettid()); +#endif + return 0; +} + +/** + * A task is poped from a queue. + * \p task is the task instance. + * \p prio is the priority of the task instance. + */ +int _starpu_trace_job_pop(struct starpu_task* task STARPU_ATTRIBUTE_UNUSED, int prio STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_JOB_POP, _starpu_get_job_associated_to_task(task)->job_id, prio, _starpu_gettid()); +#endif + return 0; +} + +/** + * Obsolete? Used only once with counter=0. + */ +int _starpu_trace_update_task_cnt(int counter STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_UPDATE_TASK_CNT, counter, _starpu_gettid()); +#endif + return 0; +} + +/** + * A synchronous data transfer has started to serve a task input dependence. + * \p job is the job instance. + */ +int _starpu_trace_start_fetch_input(struct _starpu_job *job STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_FETCH_INPUT_ON_TID, job, _starpu_gettid()); +#endif + return 0; +} + +/** + * A synchronous data transfer has completed serving a task input dependence. + * \p job is the job instance. + */ +int _starpu_trace_end_fetch_input(struct _starpu_job *job STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_FETCH_INPUT_ON_TID, job, _starpu_gettid()); +#endif + return 0; +} + +/** + * A data transfer has started to serve a task output dependence. + * \p job is the job instance. + */ +int _starpu_trace_start_push_output(struct _starpu_job *job STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_PUSH_OUTPUT_ON_TID, job, _starpu_gettid()); +#endif + return 0; +} + +/** + * A data transfer has completed serving a task output dependence. + * \p job is the job instance. + */ +int _starpu_trace_end_push_output(struct _starpu_job *job STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_PUSH_OUTPUT_ON_TID, job, _starpu_gettid()); +#endif + return 0; +} + +/** + * An asynchronous data transfer has completed serving a task input dependence. + * \p job is the job instance. + * \p id is the worker id. + * Note: This trace event does not seem to be used. + */ +int _starpu_trace_worker_end_fetch_input(struct _starpu_job *job STARPU_ATTRIBUTE_UNUSED, int id STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_END_FETCH_INPUT, job, id); +#endif + return 0; +} + +/** + * An asynchronous data transfer has started to serve a task input dependence. + * \p job is the job instance. It is NULL in every occurrence. + * \p id is the worker id. + */ +int _starpu_trace_worker_start_fetch_input(struct _starpu_job *job STARPU_ATTRIBUTE_UNUSED, int id STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_START_FETCH_INPUT, job, id); +#endif + return 0; +} + +/** + * A task is associated with a dependence tag. + * \p tag is the tag id. + * \p job is the job instance. + */ +int _starpu_trace_tag(starpu_tag_t* tag STARPU_ATTRIBUTE_UNUSED, struct _starpu_job *job STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TAG, tag, (job)->job_id); +#endif + return 0; +} + +/** + * A dependence is declared between two tags. + * \p tag_child is the successor dependence tag id. + * \p tag_parent is the predecessor dependence tag id. + */ +int _starpu_trace_tag_deps(starpu_tag_t* tag_child STARPU_ATTRIBUTE_UNUSED, starpu_tag_t* tag_parent STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TAG_DEPS, tag_child, tag_parent); +#endif + return 0; +} + +/** + * A dependence is declared between two tasks. + * \p job_prev is the predecessor job. + * \p job_succ is the successor job. + */ +int _starpu_trace_task_deps(struct _starpu_job *job_prev STARPU_ATTRIBUTE_UNUSED, struct _starpu_job *job_succ STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + _STARPU_FUT_FULL_PROBE4STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_DEPS, (job_prev)->job_id, (job_succ)->job_id, (job_succ)->task->type, 1, "task"); +#endif + +#ifdef STARPU_PROF_TASKSTUBS + /* looks like the succ is the current task */ + +#if 0 + TASKTIMER_ADD_PARENTS(job_succ->ps_task_timer, job_prev->job_id, 1); +#endif +#endif + return 0; +} + +/** + * An end dependence between a predecessor task and successor task whose completion had been deferred has now been resolved. + * \p job_prev is the predecessor job. + * \p job_succ is the successor job with deferred completion. + */ +int _starpu_trace_task_end_dep(struct _starpu_job *job_prev STARPU_ATTRIBUTE_UNUSED, struct _starpu_job *job_succ STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_DO_PROBE2(_STARPU_FUT_TASK_END_DEP, (job_prev)->job_id, (job_succ)->job_id); +#endif + return 0; +} + +/** + * A dependence edge between a defunct task and a successor task has been detected. + * \p ghost_prev_id is the predecessor ghost id. + * \p job_succ is the successor job. + */ +int _starpu_trace_ghost_task_deps(unsigned ghost_prev_id STARPU_ATTRIBUTE_UNUSED, struct _starpu_job *job_succ STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + _STARPU_FUT_FULL_PROBE4STR(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_DEPS, (ghost_prev_id), (job_succ)->job_id, (job_succ)->task->type, 1, "ghost"); +#endif + return 0; +} + +int _starpu_trace_bubble_task_deps(unsigned long prev_id STARPU_ATTRIBUTE_UNUSED, struct _starpu_job *job_succ STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + +#endif + return 0; +} + +int _starpu_trace_recursive_task_deps(unsigned long prev_id STARPU_ATTRIBUTE_UNUSED, struct _starpu_job *job_succ STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_RECURSIVE_TASKS +#ifdef STARPU_USE_FXT + _STARPU_FUT_FULL_PROBE4STR(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_DEPS, (prev_id), (job_succ)->job_id, (job_succ)->task->type, 1, "recursive_task"); +#endif +#endif + return 0; +} + +int _starpu_trace_recursive_task(struct _starpu_job *job STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_RECURSIVE_TASKS +#ifdef STARPU_USE_FXT + if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active)) + { + unsigned int is_recursive_task=(job)->is_recursive_task; + unsigned long recursive_task_parent=(job)->task->recursive_task_parent; + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_RECURSIVE_TASK, (job)->job_id, is_recursive_task, recursive_task_parent); + } +#endif +#endif + return 0; +} + +/** + * A task is marked to be ignored in debugging tools. + * \p job is the job to be ignored. + */ +int _starpu_trace_task_exclude_from_dag(struct _starpu_job *job STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + unsigned exclude_from_dag = (job)->exclude_from_dag; + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_EXCLUDE_FROM_DAG, (job)->job_id, (long unsigned)exclude_from_dag); +#endif + return 0; +} + +/** + * A task is assigned a name, line and color metadata. + * \p job is the corresponding job. + */ +int _starpu_trace_task_name_line_color(struct _starpu_job *job STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + _starpu_trace_task_color(job); + _starpu_trace_task_name(job); + _starpu_trace_task_line(job); +#endif + return 0; +} + +/** + * A task is assigned a line metadata. + * \p job is the corresponding job. + */ +int _starpu_trace_task_line(struct _starpu_job *job STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + if ((job)->task->file) + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_LINE, (job)->job_id, (job)->task->line, (job)->task->file); +#endif + return 0; +} + +int _starpu_trace_bubble(struct _starpu_job *job STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + +#endif + return 0; +} + +/** + * A task is assigned a name metadata. + * \p job is the corresponding job. + */ +int _starpu_trace_task_name(struct _starpu_job *job STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active)) + { + const char *model_name = _starpu_job_get_model_name((job)); + const char *name = _starpu_job_get_task_name((job)); + if (name) + { + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_NAME, (job)->job_id, _starpu_gettid(), name); + } + else + { + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_NAME, (job)->job_id, _starpu_gettid(), "unknown"); + } + if (model_name) + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_MODEL_NAME, (job)->job_id, _starpu_gettid(), model_name); + } +#endif + return 0; +} + +/** + * A task is assigned a color metadata. + * \p job is the corresponding job. + */ +int _starpu_trace_task_color(struct _starpu_job *job STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active)) + { + if ((job)->task->color != 0) + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_COLOR, (job)->job_id, (job)->task->color); + else if ((job)->task->cl && (job)->task->cl->color != 0) + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_COLOR, (job)->job_id, (job)->task->cl->color); + } +#endif + return 0; +} + +/** + * A task has completed its codelet routine execution and its epilogue steps. + * \p job is the completed job. + */ +int _starpu_trace_task_done(struct _starpu_job *job STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_DONE, (job)->job_id, _starpu_gettid()); +#endif + return 0; +} + +/** + * A dependence tag is releasing its dependences. + * \p tag is the done tag. + */ +int _starpu_trace_tag_done(struct _starpu_tag* tag STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK) & fut_active)) + { + struct _starpu_job *job = (tag)->job; + const char *model_name = _starpu_job_get_task_name((job)); + if (model_name) + { + _STARPU_FUT_FULL_PROBE3STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TAG_DONE, (tag)->id, _starpu_gettid(), 1, model_name); + } + else + { + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TAG_DONE, (tag)->id, _starpu_gettid(), 0); + } + } +#endif + return 0; +} + +/** + * A data handle is assigned a name metadata. + * \p handle is the corresponding data handle. + * \p name is the name to be assigned to the data handle. + */ +int _starpu_trace_data_name(starpu_data_handle_t *handle STARPU_ATTRIBUTE_UNUSED, const char* name STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + _STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_DATA_NAME, handle, name); +#endif + return 0; +} + +/** + * A data handle is assigned coordinates metadata. + * \p handle is the corresponding data handle. + * \p dim is the number of dimensions for the coordinates. + * \p v is the array of coordinates. + */ +int _starpu_trace_data_coordinates(starpu_data_handle_t *handle STARPU_ATTRIBUTE_UNUSED, unsigned dim STARPU_ATTRIBUTE_UNUSED, int v[] STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + switch (dim) + { + case 1: FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_DATA_COORDINATES, handle, dim, v[0]); break; + case 2: FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_DATA_COORDINATES, handle, dim, v[0], v[1]); break; + case 3: FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_DATA_COORDINATES, handle, dim, v[0], v[1], v[2]); break; + case 4: FUT_FULL_PROBE6(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_DATA_COORDINATES, handle, dim, v[0], v[1], v[2], v[3]); break; + default: FUT_FULL_PROBE7(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_DATA_COORDINATES, handle, dim, v[0], v[1], v[2], v[3], v[4]); break; + } +#endif + return 0; +} + +/** + * A copy of data has been performed from one memory node to another memory node. + * \p src_node is the source node of the copy. + * \p dst_node is the destination node of the copy. + * \p size is the length of the copy in bytes. + */ +int _starpu_trace_data_copy(unsigned src_node STARPU_ATTRIBUTE_UNUSED, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_DATA_COPY, src_node, dst_node, size); +#endif + return 0; +} + +/** + * A data handle has been marked as eligible for cache eviction. + * \p handle is the corresponding data handle. + */ +int _starpu_trace_data_wont_use(starpu_data_handle_t *handle STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_DATA, _STARPU_FUT_DATA_WONT_USE, handle, _starpu_fxt_get_submit_order(), _starpu_fxt_get_job_id(), _starpu_gettid()); +#endif + return 0; +} + +/** + * A data handle cache eviction mark is being processed. + * \p handle is the corresponding data handle. + */ +int _starpu_trace_data_doing_wont_use(starpu_data_handle_t *handle STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_DATA_DOING_WONT_USE, handle); +#endif + return 0; +} + +/** + * A data copy has been started by a device driver between two nodes. + * \p src_node is the source node. + * \p dst_node is the destination node. + * \p size is the data copy length in bytes. + * \p com_id is the communication id. + * \p prefetch is the prefetch level. + * \p handle is the corresponding data handle. + */ +int _starpu_trace_start_driver_copy(unsigned src_node STARPU_ATTRIBUTE_UNUSED, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size STARPU_ATTRIBUTE_UNUSED, unsigned long com_id STARPU_ATTRIBUTE_UNUSED, enum starpu_is_prefetch prefetch STARPU_ATTRIBUTE_UNUSED, starpu_data_handle_t *handle STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE6(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_DRIVER_COPY, src_node, dst_node, size, com_id, prefetch, handle); +#endif + return 0; +} + +/** + * A synchronous data copy has been completed by a device driver between two nodes. + * \p src_node is the source node. + * \p dst_node is the destination node. + * \p size is the data copy length in bytes. + * \p com_id is the communication id. + * \p prefetch is the prefetch level. + * \p handle is the corresponding data handle. + */ +int _starpu_trace_end_driver_copy(unsigned src_node STARPU_ATTRIBUTE_UNUSED, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size STARPU_ATTRIBUTE_UNUSED, unsigned long com_id STARPU_ATTRIBUTE_UNUSED,enum starpu_is_prefetch prefetch STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_DRIVER_COPY, src_node, dst_node, size, com_id, prefetch); +#endif + return 0; +} + +/** + * An asynchronous data copy has been started by a device driver between two nodes. + * \p src_node is the source node. + * \p dst_node is the destination node. + */ +int _starpu_trace_start_driver_copy_async(unsigned src_node STARPU_ATTRIBUTE_UNUSED, unsigned dst_node STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_DRIVER_COPY_ASYNC, src_node, dst_node); +#endif + return 0; +} + +/** + * A synchronous data copy has been completed by a device driver between two nodes. + * \p src_node is the source node. + * \p dst_node is the destination node. + */ +int _starpu_trace_end_driver_copy_async(unsigned src_node STARPU_ATTRIBUTE_UNUSED, unsigned dst_node STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_DRIVER_COPY_ASYNC, src_node, dst_node); +#endif + return 0; +} + +/** + * A task has been stolen by an idle worker from a victim worker. + * \p empty_q is the workerid of the idle thief worker. + * \p victim_q is the workerid of the victim worker. + */ +int _starpu_trace_work_stealing(unsigned empty_q STARPU_ATTRIBUTE_UNUSED, unsigned victim_q STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_SCHED_VERBOSE, _STARPU_FUT_WORK_STEALING, empty_q, victim_q); +#endif + return 0; +} + +/** + * A worker has started electing a new task to execute. + */ +int _starpu_trace_worker_scheduling_start() +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_WORKER_SCHEDULING_START, _starpu_gettid()); +#endif + return 0; +} + +/** + * A worker has completed electing a new task to execute. + */ +int _starpu_trace_worker_scheduling_end() +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_WORKER_SCHEDULING_END, _starpu_gettid()); +#endif + return 0; +} + +int _starpu_trace_worker_scheduling_push() +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_WORKER_SCHEDULING_PUSH, _starpu_gettid()); +#endif + return 0; +} + +int _starpu_trace_worker_scheduling_pop() +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_WORKER_SCHEDULING_POP, _starpu_gettid()); +#endif + return 0; +} + +/** + * An idle worker has fell asleep. + */ +int _starpu_trace_worker_sleep_start() +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER, _STARPU_FUT_WORKER_SLEEP_START, _starpu_gettid()); +#endif + return 0; +} + +/** + * An idle worker has woken up. + */ +int _starpu_trace_worker_sleep_end() +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_WORKER, _STARPU_FUT_WORKER_SLEEP_END, _starpu_gettid()); +#endif + return 0; +} + +/** + * A new task has been submitted. + * \p job is the new job being submitted. + * \p iter is the optional outermost iteration number metadata in which the task submission occurs. + * \p iter is the optional innermost iteration number metadata in which the task submission occurs. + */ +int _starpu_trace_task_submit(struct _starpu_job *job STARPU_ATTRIBUTE_UNUSED, long iter STARPU_ATTRIBUTE_UNUSED, long subiter STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE7(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_SUBMIT, (job)->job_id, iter, subiter, (job)->task->no_submitorder?0:_starpu_fxt_get_submit_order(), (job)->task->priority, (job)->task->type, _starpu_gettid()); +#endif + +#ifdef STARPU_PROF_TASKSTUBS +// unsigned long starpu_task_get_job_id(struct starpu_task *task); + + unsigned long tid = job->job_id; + + char* name = NULL; + if(NULL == job->ps_task_timer) + { + void* func = NULL; +/* j-> job_successors: list of all the completion groups that depend on the job */ + tasktimer_argument_value_t args[1]; + args[0].type = TASKTIMER_LONG_INTEGER_TYPE; + args[0].l_value = tid; + + uint64_t* parents = NULL; + uint64_t myguid = tid; + + if(NULL != job->task->name) + { + name = job->task->name; + } + else + { + name = ""; + } + + /* TODO update the address later? */ + TASKTIMER_CREATE(func, name, myguid, parents, 0, tt); + job->ps_task_timer = tt; + } + + for (int i = 0; i < job->job_successors.ndeps; i++) + { + TASKTIMER_ADD_PARENTS(job->ps_task_timer, ((struct _starpu_job*)(job->job_successors.deps[i]->deps))->job_id, 1); + } + +#endif + + return 0; +} + +/** + * A task submission process has been started. + */ +int _starpu_trace_task_submit_start() +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_SUBMIT_START, _starpu_gettid()); +#endif + return 0; +} + +/** + * A task submission process has been completed. + */ +int _starpu_trace_task_submit_end() +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_SUBMIT_END, _starpu_gettid()); +#endif + return 0; +} + +/** + * A task submission throttling process has been engaged, task submission will be blocked until the throttling process gets disengaged. + */ +int _starpu_trace_task_throttle_start() +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_THROTTLE_START, _starpu_gettid()); +#endif + return 0; +} + +/** + * A task submission throttling process has been disengaged. + */ +int _starpu_trace_task_throttle_end() +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_THROTTLE_END, _starpu_gettid()); +#endif + return 0; +} + +/** + * A task building operation has been started. + */ +int _starpu_trace_task_build_start() +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_BUILD_START, _starpu_gettid()); +#endif + return 0; +} + +/** + * A task building operation has been completed. + */ +int _starpu_trace_task_build_end() +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_BUILD_END, _starpu_gettid()); +#endif + return 0; +} + +/** + * A StarPU-MPI task decoding operation has been started. + */ +int _starpu_trace_task_mpi_decode_start() +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_DECODE_START, _starpu_gettid()); +#endif + return 0; +} + +/** + * A StarPU-MPI task decoding operation has been completed. + */ +int _starpu_trace_task_mpi_decode_end() +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_DECODE_END, _starpu_gettid()); +#endif + return 0; +} + +/** + * A StarPU-MPI pre-task communication phase has been started. + */ +int _starpu_trace_task_mpi_pre_start() +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_PRE_START, _starpu_gettid()); +#endif + return 0; +} + +/** + * A StarPU-MPI pre-task communication phase has been completed. + */ +int _starpu_trace_task_mpi_pre_end() +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_PRE_END, _starpu_gettid()); +#endif + return 0; +} + +/** + * A StarPU-MPI post-task communication phase has been started. + */ +int _starpu_trace_task_mpi_post_start() +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_POST_START, _starpu_gettid()); +#endif + return 0; +} + +/** + * A StarPU-MPI post-task communication phase has been completed. + */ +int _starpu_trace_task_mpi_post_end() +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI_VERBOSE, _STARPU_FUT_TASK_MPI_POST_END, _starpu_gettid()); +#endif + return 0; +} + +/** + * A wait operation on a specific task has been started. + * \p job is the task being waited for completion. + */ +int _starpu_trace_task_wait_start(struct _starpu_job *job STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_WAIT_START, (job)->job_id, _starpu_gettid()); +#endif + return 0; +} + +/** + * A wait operation on a specific task has been completed. + */ +int _starpu_trace_task_wait_end() +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_WAIT_END, _starpu_gettid()); +#endif + return 0; +} + +/** + * A wait operation on all submitted tasks has been started. + */ +int _starpu_trace_task_wait_for_all_start() +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_WAIT_FOR_ALL_START, _starpu_gettid()); +#endif + return 0; +} + +/** + * A wait operation on all submitted tasks has been completed. + */ +int _starpu_trace_task_wait_for_all_end() +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_TASK_WAIT_FOR_ALL_END, _starpu_gettid()); +#endif + return 0; +} + +/** + * A fresh memory allocation operation has been started. + * \p memnode is the memory node on which the allocation is requested. + * \p size is the size in bytes of the allocation request. + * \p handle is the corresponding data handle. + * \p is_prefetch is a boolean indicating whether the operation is speculative of performed by necessity. + */ +int _starpu_trace_start_alloc(unsigned memnode STARPU_ATTRIBUTE_UNUSED, size_t size STARPU_ATTRIBUTE_UNUSED, starpu_data_handle_t *handle STARPU_ATTRIBUTE_UNUSED, enum starpu_is_prefetch is_prefetch STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_ALLOC, memnode, _starpu_gettid(), size, handle, is_prefetch); +#endif + return 0; +} + +/** + * A fresh memory allocation operation has been completed. + * \p memnode is the memory node on which the allocation is requested. + * \p handle is the corresponding data handle. + * \p r is the size of the memory allocated. + */ +int _starpu_trace_end_alloc(unsigned memnode STARPU_ATTRIBUTE_UNUSED, starpu_data_handle_t *handle STARPU_ATTRIBUTE_UNUSED, starpu_ssize_t r STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_ALLOC, memnode, _starpu_gettid(), handle, r); +#endif + return 0; +} + +/** + * A fresh or cached memory allocation operation has been started. + * \p memnode is the memory node on which the allocation is requested. + * \p size is the size in bytes of the allocation request. + * \p handle is the corresponding data handle. + * \p is_prefetch is a boolean indicating whether the operation is speculative of performed by necessity. + */ +int _starpu_trace_start_alloc_reuse(unsigned memnode STARPU_ATTRIBUTE_UNUSED, size_t size STARPU_ATTRIBUTE_UNUSED, starpu_data_handle_t *handle STARPU_ATTRIBUTE_UNUSED, enum starpu_is_prefetch is_prefetch STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_START_ALLOC_REUSE, memnode, _starpu_gettid(), size, handle, is_prefetch); +#endif + return 0; +} + +/** + * A fresh or cached memory allocation operation has been completed. + * \p memnode is the memory node on which the allocation is requested. + * \p handle is the corresponding data handle. + * \p r is the size of the memory allocated. + */ +int _starpu_trace_end_alloc_reuse(unsigned memnode STARPU_ATTRIBUTE_UNUSED, starpu_data_handle_t *handle STARPU_ATTRIBUTE_UNUSED, starpu_ssize_t r STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_END_ALLOC_REUSE, memnode, _starpu_gettid(), handle, r); +#endif + return 0; +} + +/** + * A memory free operation has been started. + * \p memnode is the memory node on which the allocation is requested. + * \p size is the size of the memory allocated. + * \p handle is the corresponding data handle. + */ +int _starpu_trace_start_free(unsigned memnode STARPU_ATTRIBUTE_UNUSED, size_t size STARPU_ATTRIBUTE_UNUSED, starpu_data_handle_t *handle STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_START_FREE, memnode, _starpu_gettid(), size, handle); +#endif + return 0; +} + +/** + * A memory free operation has been completed. + * \p memnode is the memory node on which the allocation is requested. + * \p size is the size of the memory allocated. + * \p handle is the corresponding data handle. + */ +int _starpu_trace_end_free(unsigned memnode STARPU_ATTRIBUTE_UNUSED, starpu_data_handle_t *handle STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_END_FREE, memnode, _starpu_gettid(), handle); +#endif + return 0; +} + +/** + * A synchronous cache writeback data transfer has been started. + * \p memnode is the destination node. + * \p handle is the corresponding data handle. + */ +int _starpu_trace_start_writeback(unsigned memnode STARPU_ATTRIBUTE_UNUSED, starpu_data_handle_t *handle STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_START_WRITEBACK, memnode, _starpu_gettid(), handle); +#endif + return 0; +} + +/** + * A synchronous cache writeback data transfer has been completed. + * \p memnode is the destination node. + * \p handle is the corresponding data handle. + */ +int _starpu_trace_end_writeback(unsigned memnode STARPU_ATTRIBUTE_UNUSED, starpu_data_handle_t *handle STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_END_WRITEBACK, memnode, _starpu_gettid(), handle); +#endif + return 0; +} + +/** + * The memory usage statistics on a memory node has been updated. + * \p memnode is the corresponding memory node. + * \p used is the updated amount of memory used on the memory node, in bytes. + */ +int _starpu_trace_used_mem(unsigned memnode STARPU_ATTRIBUTE_UNUSED, size_t used STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_USED_MEM, memnode, used, _starpu_gettid()); +#endif + return 0; +} + +/** + * A memory reclaiming process has been started. + * \p memnode is the destination node. + * \p is_prefetch is a boolean indicating whether the operation is speculative of performed by necessity. + */ +int _starpu_trace_start_memreclaim(unsigned memnode STARPU_ATTRIBUTE_UNUSED,enum starpu_is_prefetch is_prefetch STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_START_MEMRECLAIM, memnode, is_prefetch, _starpu_gettid()); +#endif + return 0; +} + +/** + * A memory reclaiming process has been completed. + * \p memnode is the destination node. + * \p is_prefetch is a boolean indicating whether the operation is speculative of performed by necessity. + */ +int _starpu_trace_end_memreclaim(unsigned memnode STARPU_ATTRIBUTE_UNUSED, enum starpu_is_prefetch is_prefetch STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_END_MEMRECLAIM, memnode, is_prefetch, _starpu_gettid()); +#endif + return 0; +} + +/** + * An asynchronous cache writeback data transfer has been started. + * \p memnode is the destination node. + */ +int _starpu_trace_start_writeback_async(unsigned memnode STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_START_WRITEBACK_ASYNC, memnode, _starpu_gettid()); +#endif + return 0; +} + +/** + * An asynchronous cache writeback data transfer has been completed. + * \p memnode is the destination node. + */ +int _starpu_trace_end_writeback_async(unsigned memnode STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_END_WRITEBACK_ASYNC, memnode, _starpu_gettid()); +#endif + return 0; +} + +/** + * A PAPI task event has been collected. + * \p event_id is the PAPI event id. + * \p task is the corresponding task. + * \p value is the value collected from the PAPI event. + */ +int _starpu_trace_papi_task_event(int event_id STARPU_ATTRIBUTE_UNUSED, struct starpu_task* task STARPU_ATTRIBUTE_UNUSED, long long int value STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_DO_PROBE3(_STARPU_FUT_PAPI_TASK_EVENT_VALUE, event_id, _starpu_get_job_associated_to_task(task)->job_id, value); +#endif + return 0; +} + +/* We skip these events because they are called so often that they cause FxT to + * fail and make the overall trace unreadable anyway. */ +/** + * A data transfer progression phase has been started for a memory node. + * \p memnode is the corresponding memory node. + */ +int _starpu_trace_start_progress(unsigned memnode STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *worker STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_START_PROGRESS_ON_TID, memnode, _starpu_gettid()); +#endif + +#ifdef STARPU_PROF_TOOL + if(starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer) + { + struct starpu_prof_tool_info pi; + enum starpu_prof_tool_driver_type driver_type; + switch(worker->arch) + { + case STARPU_CPU_WORKER: + driver_type = starpu_prof_tool_driver_cpu; + break; + case STARPU_CUDA_WORKER: + case STARPU_HIP_WORKER: + case STARPU_OPENCL_WORKER: + driver_type = starpu_prof_tool_driver_gpu; + break; + default: + goto out; + } + +// pi = _starpu_prof_tool_get_info_d(starpu_prof_tool_event_start_transfer, workerid, workerid, starpu_prof_tool_driver_cpu, memnode, cpu_worker->nb_buffers_totransfer, cpu_worker->nb_buffers_transferred); + // we can pass more info here + pi = _starpu_prof_tool_get_info_d(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, driver_type, memnode, 0, 0); + starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); + out: + ; + } +#endif + +#ifdef STARPU_PROF_TASKSTUBS + //TASKTIMER_DATA_TRANSFER_RESUME(100); /* TODO */ +#endif + +return 0; +} + +/** + * A data transfer progression phase has been completed for a memory node. + * \p memnode is the corresponding memory node. + */ +int _starpu_trace_end_progress(unsigned memnode STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *worker STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_WORKER_VERBOSE, _STARPU_FUT_END_PROGRESS_ON_TID, memnode, _starpu_gettid()); +#endif + +#ifdef STARPU_PROF_TOOL + if(starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer) + { + struct starpu_prof_tool_info pi; + enum starpu_prof_tool_driver_type driver_type; + switch(worker->arch) + { + case STARPU_CPU_WORKER: + driver_type = starpu_prof_tool_driver_cpu; + break; + case STARPU_CUDA_WORKER: + case STARPU_HIP_WORKER: + case STARPU_OPENCL_WORKER: + driver_type = starpu_prof_tool_driver_gpu; + break; + default: + goto out; + } + + // we can pass more info here + pi = _starpu_prof_tool_get_info_d(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, driver_type, memnode, 0, 0); + +// pi = _starpu_prof_tool_get_info_d(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_cpu, memnode, cpu_worker->nb_buffers_totransfer, cpu_worker->nb_buffers_transferred); + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); + out: + ; + } +#endif + +#ifdef STARPU_PROF_TASKSTUBS + //TASKTIMER_DATA_TRANSFER_STOP(100); /* TODO */ +#endif + + return 0; +} + +/** + * A user-defined event has occurred. + * \p code is the user-defined event code + */ +int _starpu_trace_user_event(unsigned long code STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_USER, _STARPU_FUT_USER_EVENT, code, _starpu_gettid()); +#endif + return 0; +} + +/** + * A trace meta-event has been recorded. + * \p S is the trace meta-event string. + */ +int _starpu_trace_meta(const char* S STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT +#ifdef FUT_DO_ALWAYS_PROBESTR + FUT_FULL_PROBESTR(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_EVENT,S); +#endif +#endif + return 0; +} + +/** + * The profiling status has been updated. + * \p status is the new profiling status. + */ +int _starpu_trace_set_profiling(int status STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_SET_PROFILING, status, _starpu_gettid()); +#endif + return 0; +} + +/** + * Obsolete? Does not seem to be used anymore. + */ +int _starpu_trace_task_wait_for_all() +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE0(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_TASK_WAIT_FOR_ALL); +#endif + return 0; +} + +/** + * An unconditional event has been recorded. + * \p S is the event string. + */ +int _starpu_trace_event_always(const char* S STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT +#ifdef FUT_DO_ALWAYS_PROBESTR + if (_starpu_fxt_started) + FUT_DO_ALWAYS_PROBESTR(_STARPU_FUT_EVENT,S); +#endif +#endif + return 0; +} + +/** + * An default verbosity level event has been recorded. + * \p S is the event string. + */ +int _starpu_trace_event(const char* S STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT +#ifdef FUT_DO_ALWAYS_PROBESTR + FUT_FULL_PROBESTR(_STARPU_FUT_KEYMASK_EVENT, _STARPU_FUT_EVENT,S); +#endif +#endif + return 0; +} + +/** + * An verbose level event has been recorded. + * \p S is the event string. + */ +int _starpu_trace_event_verbose(const char* S STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT +#ifdef FUT_DO_ALWAYS_PROBESTR + FUT_FULL_PROBESTR(_STARPU_FUT_KEYMASK_EVENT_VERBOSE, _STARPU_FUT_EVENT,S); +#endif +#endif + return 0; +} + +/** + * Obsolete? Does not seem to be used anymore. + */ +int _starpu_trace_thread_event(const char* S STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + _STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_WORKER, _STARPU_FUT_THREAD_EVENT, _starpu_gettid(), S); +#endif + return 0; +} + +/** + * A scheduling context hypervisor operation has been started. + */ +int _starpu_trace_hypervisor_begin() +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_HYP, _STARPU_FUT_HYPERVISOR_BEGIN, _starpu_gettid()); +#endif + return 0; +} + +/** + * A scheduling context hypervisor operation has been completed. + */ +int _starpu_trace_hypervisor_end() +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_HYP, _STARPU_FUT_HYPERVISOR_END, _starpu_gettid()); +#endif + return 0; +} + +/** + * A mutex lock operation has been started. + */ +int _starpu_trace_locking_mutex() +{ +#ifdef STARPU_FXT_LOCK_TRACES +#ifdef STARPU_USE_FXT + const char *file; + file = strrchr(__FILE__,'/') + 1; + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_LOCKING_MUTEX,__LINE__,_starpu_gettid(),file); +#endif +#endif + return 0; +} + +/** + * A mutex lock operation has been completed. + */ +int _starpu_trace_mutex_locked() +{ +#ifdef STARPU_FXT_LOCK_TRACES +#ifdef STARPU_USE_FXT + const char *file; + file = strrchr(__FILE__,'/') + 1; + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_MUTEX_LOCKED,__LINE__,_starpu_gettid(),file); +#endif +#endif + return 0; +} + +/** + * A mutex unlock operation has been started. + */ +int _starpu_trace_unlocking_mutex() +{ +#ifdef STARPU_FXT_LOCK_TRACES +#ifdef STARPU_USE_FXT + const char *file; + file = strrchr(__FILE__,'/') + 1; + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_UNLOCKING_MUTEX,__LINE__,_starpu_gettid(),file); +#endif +#endif + return 0; +} + +/** + * A mutex unlock operation has been completed. + */ +int _starpu_trace_mutex_unlocked() +{ +#ifdef STARPU_FXT_LOCK_TRACES +#ifdef STARPU_USE_FXT + const char *file; + file = strrchr(__FILE__,'/') + 1; + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_MUTEX_UNLOCKED,__LINE__,_starpu_gettid(),file); +#endif +#endif + return 0; +} + +/** + * A mutex trylock operation has been attempted. + */ +int _starpu_trace_trylock_mutex() +{ +#ifdef STARPU_FXT_LOCK_TRACES +#ifdef STARPU_USE_FXT + const char *file; + file = strrchr(__FILE__,'/') + 1; + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_TRYLOCK_MUTEX,__LINE__,_starpu_gettid(),file); +#endif +#endif + return 0; +} + +/** + * A rwlock read-lock operation has been started. + */ +int _starpu_trace_rdlocking_rwlock() +{ +#ifdef STARPU_FXT_LOCK_TRACES +#ifdef STARPU_USE_FXT + const char *file; + file = strrchr(__FILE__,'/') + 1; + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_RDLOCKING_RWLOCK,__LINE__,_starpu_gettid(),file); +#endif +#endif + return 0; +} + +/** + * A rwlock read-lock operation has been completed. + */ +int _starpu_trace_rwlock_rdlocked() +{ +#ifdef STARPU_FXT_LOCK_TRACES +#ifdef STARPU_USE_FXT + const char *file; + file = strrchr(__FILE__,'/') + 1; + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_RWLOCK_RDLOCKED,__LINE__,_starpu_gettid(),file); +#endif +#endif + return 0; +} + +/** + * A rwlock write-lock operation has been started. + */ +int _starpu_trace_wrlocking_rwlock() +{ +#ifdef STARPU_FXT_LOCK_TRACES +#ifdef STARPU_USE_FXT + const char *file; + file = strrchr(__FILE__,'/') + 1; + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_WRLOCKING_RWLOCK,__LINE__,_starpu_gettid(),file); +#endif +#endif + return 0; +} + +/** + * A rwlock write-lock operation has been completed. + */ +int _starpu_trace_rwlock_wrlocked() +{ +#ifdef STARPU_FXT_LOCK_TRACES +#ifdef STARPU_USE_FXT + const char *file; + file = strrchr(__FILE__,'/') + 1; + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_RWLOCK_WRLOCKED,__LINE__,_starpu_gettid(),file); +#endif +#endif + return 0; +} + +/** + * A rwlock unlock operation has been started. + */ +int _starpu_trace_unlocking_rwlock() +{ +#ifdef STARPU_FXT_LOCK_TRACES +#ifdef STARPU_USE_FXT + const char *file; + file = strrchr(__FILE__,'/') + 1; + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_UNLOCKING_RWLOCK,__LINE__,_starpu_gettid(),file); +#endif +#endif + return 0; +} + +/** + * A rwlock unlock operation has been completed. + */ +int _starpu_trace_rwlock_unlocked() +{ +#ifdef STARPU_FXT_LOCK_TRACES +#ifdef STARPU_USE_FXT + const char *file; + file = strrchr(__FILE__,'/') + 1; + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_RWLOCK_UNLOCKED,__LINE__,_starpu_gettid(),file); +#endif +#endif + return 0; +} + + +#ifdef STARPU_FXT_LOCK_TRACES +#ifdef STARPU_USE_FXT +#define _STARPU_TRACE_SPINLOCK_CONDITION (starpu_worker_get_type(starpu_worker_get_id()) == STARPU_CUDA_WORKER); +#endif +#endif + +/** + * A spin-lock lock operation has been started. + */ +int _starpu_trace_spinlock_locked(const char* file STARPU_ATTRIBUTE_UNUSED, int line STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_FXT_LOCK_TRACES +#ifdef STARPU_USE_FXT + if (_STARPU_TRACE_SPINLOCK_CONDITION) + { + const char *xfile; + xfile = strrchr(file,'/') + 1; + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_SPINLOCK_LOCKED,line,_starpu_gettid(),xfile); + } +#endif +#endif + return 0; +} + +/** + * A spin-lock lock operation has been started. + */ +int _starpu_trace_locking_spinlock(const char* file STARPU_ATTRIBUTE_UNUSED, int line STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_FXT_LOCK_TRACES +#ifdef STARPU_USE_FXT + if (_STARPU_TRACE_SPINLOCK_CONDITION) + { + const char *xfile; + xfile = strrchr(file,'/') + 1; + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_LOCKING_SPINLOCK,line,_starpu_gettid(),xfile); + } +#endif +#endif + return 0; +} + +/** + * A spin-lock unlock operation has been started. + */ +int _starpu_trace_unlocking_spinlock(const char* file STARPU_ATTRIBUTE_UNUSED, int line STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_FXT_LOCK_TRACES +#ifdef STARPU_USE_FXT + if (_STARPU_TRACE_SPINLOCK_CONDITION) + { + const char *xfile; + xfile = strrchr(file,'/') + 1; + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_UNLOCKING_SPINLOCK,line,_starpu_gettid(),xfile); + } +#endif +#endif + return 0; +} + +/** + * A spin-lock unlock operation has been completed. + */ +int _starpu_trace_spinlock_unlocked(const char* file STARPU_ATTRIBUTE_UNUSED, int line STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_FXT_LOCK_TRACES +#ifdef STARPU_USE_FXT + if (_STARPU_TRACE_SPINLOCK_CONDITION) + { + const char *xfile; + xfile = strrchr(file,'/') + 1; + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_SPINLOCK_UNLOCKED,line,_starpu_gettid(),xfile); + } +#endif +#endif + return 0; +} + +/** + * A spin-lock trylock operation has been attempted. + */ +int _starpu_trace_trylock_spinlock(const char* file STARPU_ATTRIBUTE_UNUSED, int line STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_FXT_LOCK_TRACES +#ifdef STARPU_USE_FXT + if (_STARPU_TRACE_SPINLOCK_CONDITION) + { + const char *xfile; + xfile = strrchr(file,'/') + 1; + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK_VERBOSE, _STARPU_FUT_TRYLOCK_SPINLOCK,line,_starpu_gettid(),xfile); + } +#endif +#endif + return 0; +} + +/** + * A wait operation on a condition has been started. + */ +int _starpu_trace_cond_wait_begin() +{ +#ifdef STARPU_FXT_LOCK_TRACES +#ifdef STARPU_USE_FXT + const char *file; + file = strrchr(__FILE__,'/') + 1; + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_COND_WAIT_BEGIN,__LINE__,_starpu_gettid(),file); +#endif +#endif + return 0; +} + +/** + * A wait operation on a condition has been completed. + */ +int _starpu_trace_cond_wait_end() +{ +#ifdef STARPU_FXT_LOCK_TRACES +#ifdef STARPU_USE_FXT + const char *file; + file = strrchr(__FILE__,'/') + 1; + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_COND_WAIT_END,__LINE__,_starpu_gettid(),file); +#endif +#endif + return 0; +} + +/** + * A thread has entered a barrier. + */ +int _starpu_trace_barrier_wait_begin() +{ +#ifdef STARPU_FXT_LOCK_TRACES +#ifdef STARPU_USE_FXT + const char *file; + file = strrchr(__FILE__,'/') + 1; + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_BARRIER_WAIT_BEGIN,__LINE__,_starpu_gettid(),file); +#endif +#endif + return 0; +} + +/** + * A thread has left a barrier. + */ +int _starpu_trace_barrier_wait_end() +{ +#ifdef STARPU_FXT_LOCK_TRACES +#ifdef STARPU_USE_FXT + const char *file; + file = strrchr(__FILE__,'/') + 1; + _STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_LOCK, _STARPU_FUT_BARRIER_WAIT_END,__LINE__,_starpu_gettid(),file); +#endif +#endif + return 0; +} + +int _starpu_trace_data_load(int workerid STARPU_ATTRIBUTE_UNUSED, size_t size STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_DATA_LOAD, workerid, size, _starpu_gettid()); +#endif + return 0; +} + +int _starpu_trace_start_unpartition(starpu_data_handle_t *handle STARPU_ATTRIBUTE_UNUSED, unsigned memnode STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_UNPARTITION_ON_TID, memnode, _starpu_gettid(), handle); +#endif + return 0; +} + +int _starpu_trace_end_unpartition(starpu_data_handle_t *handle STARPU_ATTRIBUTE_UNUSED, unsigned memnode STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_END_UNPARTITION_ON_TID, memnode, _starpu_gettid(), handle); +#endif + return 0; +} + +int _starpu_trace_sched_component_push_prio(struct starpu_sched_component * component STARPU_ATTRIBUTE_UNUSED, unsigned ntasks STARPU_ATTRIBUTE_UNUSED, double exp_len STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + if (fut_active) + { + int workerid = STARPU_NMAXWORKERS + 1; + if((component->nchildren == 1) && starpu_sched_component_is_worker(component->children[0])) + workerid = starpu_sched_component_worker_get_workerid(component->children[0]); + FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_PUSH_PRIO, _starpu_gettid(), workerid, ntasks, exp_len); + } +#endif + return 0; +} + +int _starpu_trace_sched_component_pop_prio(struct starpu_sched_component * component STARPU_ATTRIBUTE_UNUSED, unsigned ntasks STARPU_ATTRIBUTE_UNUSED, double exp_len STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + if (fut_active) + { + int workerid = STARPU_NMAXWORKERS + 1; + if((component->nchildren == 1) && starpu_sched_component_is_worker(component->children[0])) + workerid = starpu_sched_component_worker_get_workerid(component->children[0]); + FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_POP_PRIO, _starpu_gettid(), workerid, ntasks, exp_len); + } +#endif + return 0; +} + +int _starpu_trace_sched_component_new(struct starpu_sched_component* component STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + if (STARPU_UNLIKELY(fut_active)) _STARPU_FUT_ALWAYS_PROBE1STR(_STARPU_FUT_SCHED_COMPONENT_NEW, component, (component)->name); +#endif + return 0; +} + +int _starpu_trace_sched_component_connect(struct starpu_sched_component* parent STARPU_ATTRIBUTE_UNUSED, struct starpu_sched_component* child STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + if (STARPU_UNLIKELY(fut_active)) FUT_RAW_ALWAYS_PROBE2(FUT_CODE(_STARPU_FUT_SCHED_COMPONENT_CONNECT,2), parent, child); +#endif + return 0; +} + +int _starpu_trace_sched_component_push(struct starpu_sched_component* from STARPU_ATTRIBUTE_UNUSED, struct starpu_sched_component* to STARPU_ATTRIBUTE_UNUSED, struct starpu_task* task STARPU_ATTRIBUTE_UNUSED, int prio STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_PUSH, _starpu_gettid(), from, to, task, prio); +#endif + return 0; +} + +int _starpu_trace_sched_component_pull(struct starpu_sched_component* from STARPU_ATTRIBUTE_UNUSED, struct starpu_sched_component* to STARPU_ATTRIBUTE_UNUSED, struct starpu_task* task STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_SCHED, _STARPU_FUT_SCHED_COMPONENT_PULL, _starpu_gettid(), from, to, task, (task)->priority); +#endif + return 0; +} + +int _starpu_trace_handle_data_register(starpu_data_handle_t *handle STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_META) & fut_active)) + { + const size_t __data_size = (*handle)->ops->get_size((*handle)); + const starpu_ssize_t __max_data_size = _starpu_data_get_max_size((*handle)); + char __buf[(FXT_MAX_PARAMS-4)*sizeof(long)]; + void *__interface = (*handle)->per_node[0].data_interface; + if ((*handle)->ops->describe) + (*handle)->ops->describe(__interface, __buf, sizeof(__buf)); + else + __buf[0] = 0; + _STARPU_FUT_FULL_PROBE4STR(_STARPU_FUT_KEYMASK_META, _STARPU_FUT_HANDLE_DATA_REGISTER, (*handle), __data_size, __max_data_size, (*handle)->home_node, __buf); + } +#endif + return 0; +} + +int _starpu_trace_handle_data_unregister(starpu_data_handle_t *handle STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_DATA, _STARPU_FUT_HANDLE_DATA_UNREGISTER, handle); +#endif + return 0; +} + +//Coherency Data Traces +int _starpu_trace_data_state_invalid(starpu_data_handle_t *handle STARPU_ATTRIBUTE_UNUSED, unsigned node STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_DATA_STATE_INVALID, handle, node); +#endif + return 0; +} + +int _starpu_trace_data_state_owner(starpu_data_handle_t *handle STARPU_ATTRIBUTE_UNUSED, unsigned node STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_DATA_STATE_OWNER, handle, node); +#endif + return 0; +} + +int _starpu_trace_data_state_shared(starpu_data_handle_t *handle STARPU_ATTRIBUTE_UNUSED, unsigned node STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_DATA_STATE_SHARED, handle, node); +#endif + return 0; +} + +int _starpu_trace_data_request_created(starpu_data_handle_t *handle STARPU_ATTRIBUTE_UNUSED, int orig STARPU_ATTRIBUTE_UNUSED, int dest STARPU_ATTRIBUTE_UNUSED, int prio STARPU_ATTRIBUTE_UNUSED, enum starpu_is_prefetch is_prefetch STARPU_ATTRIBUTE_UNUSED, struct _starpu_data_request *req STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE6(_STARPU_FUT_KEYMASK_DSM_VERBOSE, _STARPU_FUT_DATA_REQUEST_CREATED, orig, dest, prio, handle, is_prefetch, req); +#endif + return 0; +} + +int _starpu_trace_memory_full(size_t size STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_MEMORY_FULL,size,_starpu_gettid()); +#endif + return 0; +} + +int _starpu_trace_start_transfer(unsigned memnode STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *worker STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + _starpu_trace_start_progress(memnode, worker); +#endif + +#ifdef STARPU_PROF_TOOL + if(starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer) + { + struct starpu_prof_tool_info pi; + enum starpu_prof_tool_driver_type driver_type; + switch(worker->arch) + { + case STARPU_CPU_WORKER: + driver_type = starpu_prof_tool_driver_cpu; + break; + case STARPU_CUDA_WORKER: + case STARPU_HIP_WORKER: + case STARPU_OPENCL_WORKER: + driver_type = starpu_prof_tool_driver_gpu; + break; + default: + goto out; + } + + pi = _starpu_prof_tool_get_info_d(starpu_prof_tool_event_start_transfer, worker->workerid, worker->workerid, driver_type, memnode, worker->nb_buffers_totransfer, worker->nb_buffers_transferred); + + starpu_prof_tool_callbacks.starpu_prof_tool_event_start_transfer(&pi, NULL, NULL); + out: + ; + } +#endif + +#ifdef STARPU_PROF_TASKSTUBS + uint64_t myguid = 0;// //new_guid(); TODO + + tasktimer_execution_space_t source_info, dest_info; /* TODO will set that later */ + tasktimer_execution_space_p sip = &source_info; + tasktimer_execution_space_p dip = &dest_info; + source_info.type = TASKTIMER_DEVICE_CPU; + source_info.device_id = 0; + source_info.instance_id = 0; + dest_info.type = TASKTIMER_DEVICE_CPU; + dest_info.device_id = 0; + dest_info.instance_id = 0; + + char* source = &memnode, dest = &memnode;/* TODO will set that later */ + + // TASKTIMER_DATA_TRANSFER_START(myguid, sip, "source", (void*)source, dip, "dest", (void*)dest); +#endif +return 0; +} + +int _starpu_trace_end_transfer(unsigned memnode STARPU_ATTRIBUTE_UNUSED, struct _starpu_worker *worker STARPU_ATTRIBUTE_UNUSED) +{ +#ifdef STARPU_USE_FXT + _starpu_trace_end_progress(memnode, worker); +#endif + +#ifdef STARPU_PROF_TOOL + if(starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer) + { + struct starpu_prof_tool_info pi; + enum starpu_prof_tool_driver_type driver_type; + switch(worker->arch) + { + case STARPU_CPU_WORKER: + driver_type = starpu_prof_tool_driver_cpu; + break; + case STARPU_CUDA_WORKER: + case STARPU_HIP_WORKER: + case STARPU_OPENCL_WORKER: + driver_type = starpu_prof_tool_driver_gpu; + break; + default: + goto out; + } + +// pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_transfer, workerid, workerid, starpu_prof_tool_driver_cpu, memnode, NULL); + pi = _starpu_prof_tool_get_info_d(starpu_prof_tool_event_end_transfer, worker->workerid, worker->workerid, driver_type, memnode, worker->nb_buffers_totransfer, worker->nb_buffers_transferred); + + /* pi.model_name = _starpu_job_get_model_name(j); + pi.task_name = _starpu_job_get_task_name(j); */ + starpu_prof_tool_callbacks.starpu_prof_tool_event_end_transfer(&pi, NULL, NULL); + out: + ; + } +#endif + +#ifdef STARPU_PROF_TASKSTUBS +// TASKTIMER_DATA_TRANSFER_STOP(100); /* TODO */ +#endif + return 0; +} + +/** + * A worker thread initialization has been started. + * \p archtype is the architecture type. + * \p sync is unused. + */ +int _starpu_trace_worker_init_start(struct _starpu_worker *worker STARPU_ATTRIBUTE_UNUSED, + enum starpu_worker_archtype archtype STARPU_ATTRIBUTE_UNUSED, + unsigned sync STARPU_ATTRIBUTE_UNUSED) +{ + unsigned devid = worker->devid; + unsigned memnode = worker->memory_node; + (void) devid; + (void) memnode; + +#ifdef STARPU_USE_FXT + if (_starpu_fxt_started) + FUT_DO_ALWAYS_PROBE7(_STARPU_FUT_WORKER_INIT_START, _STARPU_FUT_WORKER_KEY(archtype), worker->workerid, devid, memnode, worker->bindid, sync, _starpu_gettid()); +#endif + +#ifdef STARPU_PROF_TOOL + if(starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_start) + { + enum starpu_prof_tool_driver_type drivertype; + switch(archtype) + { + case STARPU_CPU_WORKER: drivertype = starpu_prof_tool_driver_cpu; break; + case STARPU_CUDA_WORKER: drivertype = starpu_prof_tool_driver_gpu; break; + case STARPU_OPENCL_WORKER: drivertype = starpu_prof_tool_driver_ocl; break; + case STARPU_HIP_WORKER: drivertype = starpu_prof_tool_driver_hip; break; + default: drivertype = starpu_prof_tool_driver_cpu; break; + } + + struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init, devid, worker->workerid, drivertype, -1, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init(&pi, NULL, NULL); + + pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_driver_init_start, devid, worker->workerid, drivertype, -1, NULL); + starpu_prof_tool_callbacks.starpu_prof_tool_event_driver_init_start(&pi, NULL, NULL); + } +#endif + + return 0; +} + +/** + * A worker thread initialization has been completed. + * \p workerid is the id of the worker. + */ +int _starpu_trace_worker_init_end(struct _starpu_worker *worker STARPU_ATTRIBUTE_UNUSED, + enum starpu_worker_archtype archtype STARPU_ATTRIBUTE_UNUSED) +{ + /* todo: replace starpu_prof_tool_driver_type with enum starpu_worker_archtype to make the API consistent ? */ +#ifdef STARPU_USE_FXT + if (_starpu_fxt_started) + FUT_DO_ALWAYS_PROBE2(_STARPU_FUT_WORKER_INIT_END, _starpu_gettid(), worker->workerid); +#endif + +#ifdef STARPU_PROF_TOOL + if(starpu_prof_tool_callbacks.starpu_prof_tool_event_init_end) + { + enum starpu_prof_tool_driver_type driver_type; + switch(archtype) + { + case STARPU_CPU_WORKER: driver_type = starpu_prof_tool_driver_cpu; break; + case STARPU_CUDA_WORKER: driver_type = starpu_prof_tool_driver_gpu; break; + case STARPU_OPENCL_WORKER: driver_type = starpu_prof_tool_driver_ocl; break; + case STARPU_HIP_WORKER: driver_type = starpu_prof_tool_driver_hip; break; + default: driver_type = starpu_prof_tool_driver_cpu; break; + } + + struct starpu_prof_tool_info pi = _starpu_prof_tool_get_info_init(starpu_prof_tool_event_init_end, 0, driver_type, &(_starpu_config.conf)); + pi.conf = &_starpu_config.conf; + starpu_prof_tool_callbacks.starpu_prof_tool_event_init_end(&pi, NULL, NULL); + } +#endif + + return 0; +} diff --git a/src/profiling/starpu_tracing.h b/src/profiling/starpu_tracing.h new file mode 100644 index 0000000000..c5a2e79bc8 --- /dev/null +++ b/src/profiling/starpu_tracing.h @@ -0,0 +1,341 @@ +/* StarPU --- Runtime system for heterogeneous multicore architectures. + * + * Copyright (C) 2022-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria, Télécom SudParis + * Copyright (C) 2023-2025 École de Technologie Supérieure (ETS, Montréal) + * + * StarPU is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * StarPU is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * See the GNU Lesser General Public License in COPYING.LGPL for more details. + */ + +#ifndef STARPU_TRACE_H +#define STARPU_TRACE_H + +#ifdef STARPU_PROF_TOOL +#include "callbacks/callbacks.h" +#endif + +#ifdef STARPU_PROF_TASKSTUBS + +#include +#include + +uint64_t new_guid(); +#endif + +#define STARPU_TRACE_API_VERSION 1 + +struct _starpu_tag; +struct _starpu_data_request; +struct _starpu_worker; +struct _starpu_job; +struct starpu_sched_component; + +/* Initialize any existing tracing/profiling tool */ +int _starpu_trace_initialize(); +/* Finalize any existing tracing/profiling tool */ +int _starpu_trace_finalize(); + +/* Set profiling status */ +int _starpu_trace_set_profiling(int status); +/* Stop PAPI counters */ +int _starpu_trace_papi_task_event(int event_id, struct starpu_task* task, long long int value); +/* Called with "start_profiling" at the beginning of FXT profiling, with "stop_profiling" at the end of FXT profiling, and with an arbitrary string by FXT (FXT only) */ +int _starpu_trace_meta(const char* S); +/* FXT user event (FXT only) */ +int _starpu_trace_user_event(unsigned long code); +/* Unused */ +int _starpu_trace_event_always(const char* S); +/* FXT event (FXT only) */ +int _starpu_trace_event(const char* S); +/* Unused */ +int _starpu_trace_event_verbose(const char* S); +/* Unused */ +int _starpu_trace_thread_event(const char* S); + +/* Register a new memory node */ +int _starpu_trace_new_mem_node(int nodeid); +/* Beginning of the initialization of the driver for a worker */ +int _starpu_trace_worker_init_start(struct _starpu_worker *worker, enum starpu_worker_archtype archtype, unsigned sync); +/* End of the initialization of the driver for a worker */ +int _starpu_trace_worker_init_end(struct _starpu_worker *worker, enum starpu_worker_archtype archtype); +/* When a new worker starts, register it */ +int _starpu_trace_register_thread(int bindid); +/* Initialize a worker at the beginning of the execution of the application */ +int _starpu_trace_worker_initialize(); +/* Finalize a worker at the end of the execution of the application */ +int _starpu_trace_worker_finalize(); +/* Beginning of the finalization of a worker */ +int _starpu_trace_worker_deinit_start(); +/* End of the finalization of a worker */ +int _starpu_trace_worker_deinit_end(unsigned workerid, enum starpu_worker_archtype workerkind); + +/* Start the execution of a codelet */ +int _starpu_trace_start_codelet_body(struct _starpu_job *job, int nimpl, struct starpu_perfmodel_arch* perf_arch, int workerid, int rank); +/* End the execution of a codelet */ +int _starpu_trace_end_codelet_body(struct _starpu_job *job, unsigned nimpl, struct starpu_perfmodel_arch* perf_arch, int workerid, int rank); +/* Start the execution of a codelet on the worker it was assigned to */ +int _starpu_trace_start_executing(struct _starpu_job *j, struct starpu_task *worker_task, struct _starpu_worker *cpu_args, void* func); +/* End the execution of the codelet on the worker */ +int _starpu_trace_end_executing(struct _starpu_job *job, struct _starpu_worker *worker); +/* Before a call to the epilogue callback */ +int _starpu_trace_start_callback(struct _starpu_job *job); +/* After a call to the epilogue callback */ +int _starpu_trace_end_callback(struct _starpu_job *job); +/* Push a task on a specific worker, called once per worker */ +int _starpu_trace_job_push(struct starpu_task* task, int prio); +/* Pop a task from the scheduler, either to resubmit it or at the end of its execution */ +int _starpu_trace_job_pop(struct starpu_task* task, int prio); +/* Set a task counter */ +int _starpu_trace_update_task_cnt(int counter); + +/* Begin fetching a task's data input */ +int _starpu_trace_start_fetch_input(struct _starpu_job *job); +/* End fetching a task's data input */ +int _starpu_trace_end_fetch_input(struct _starpu_job *job); +/* Begin pushing a task output */ +int _starpu_trace_start_push_output(struct _starpu_job *job); +/* End pushing a task output (unused) */ +int _starpu_trace_worker_end_fetch_input(struct _starpu_job *job, int id); +/* Beginning of a data fetch operation for a given task */ +int _starpu_trace_worker_start_fetch_input(struct _starpu_job *job, int id); +/* End of a data fetch operation for a given task */ +int _starpu_trace_end_push_output(struct _starpu_job *job); + +/* Declare a tag */ +int _starpu_trace_tag(starpu_tag_t* tag, struct _starpu_job *job); +/* Declare a tag's dependencies (called once per dependency) */ +int _starpu_trace_tag_deps(starpu_tag_t* tag_child, starpu_tag_t* tag_parent); +/* Declare a tasks's dependencies (called once per dependency) */ +int _starpu_trace_task_deps(struct _starpu_job *job_prev, struct _starpu_job *job_succ); +/* Release task dependency and terminate the job */ +int _starpu_trace_task_end_dep(struct _starpu_job *job_prev, struct _starpu_job *job_succ); +/* Add a ghost dependency */ +int _starpu_trace_ghost_task_deps(unsigned ghost_prev_id, struct _starpu_job *job_succ); +/* Unused */ +int _starpu_trace_bubble_task_deps(unsigned long prev_id, struct _starpu_job *job_succ); +/* Push a non-root recursive task */ +int _starpu_trace_recursive_task_deps(unsigned long prev_id, struct _starpu_job *job_succ); + +/* Execute a recursive task */ +int _starpu_trace_recursive_task(struct _starpu_job *job); +/* Exclude a task from the DAG */ +int _starpu_trace_task_exclude_from_dag(struct _starpu_job *job); +/* Unused */ +int _starpu_trace_bubble(struct _starpu_job *job); +/* Set the task's line number as set by the programmer to be used by an external profiling system. */ +int _starpu_trace_task_line(struct _starpu_job *job); +/* Set the task's name as set by the programmer to be used by an external profiling system. */ +int _starpu_trace_task_name(struct _starpu_job *job); +/* Set the task's color as set by the programmer to be used by an external profiling system. */ +int _starpu_trace_task_color(struct _starpu_job *job); +/* Set the task's name, line number, and color as set by the programmer to be used by an external profiling system. */ +int _starpu_trace_task_name_line_color(struct _starpu_job *job); +/* The task execution is finished, it is going to be destroyed */ +int _starpu_trace_task_done(struct _starpu_job *job); +/* Notify that a tag is done */ +int _starpu_trace_tag_done(struct _starpu_tag* tag); + +/* Set the data's name */ +int _starpu_trace_data_name(starpu_data_handle_t *handle, const char* name); +/* Set the data's coordinates array */ +int _starpu_trace_data_coordinates(starpu_data_handle_t *handle, unsigned dim, int v[]); +/* Copy data */ +int _starpu_trace_data_copy(unsigned src_node, unsigned dst_node, size_t size); +/* Set all my children's handles as not being used in the future */ +int _starpu_trace_data_wont_use(starpu_data_handle_t *handle); +/* Set all my memory chunks as not being used in the future */ +int _starpu_trace_data_doing_wont_use(starpu_data_handle_t *handle); +/* Start data copy request */ +int _starpu_trace_start_driver_copy(unsigned src_node, unsigned dst_node, size_t size, unsigned long com_id, enum starpu_is_prefetch prefetch, starpu_data_handle_t *handle); +/* Data copy request completed */ +int _starpu_trace_end_driver_copy(unsigned src_node, unsigned dst_node, size_t size, unsigned long com_id,enum starpu_is_prefetch prefetch); +/* Start asynchronous data request */ +int _starpu_trace_start_driver_copy_async(unsigned src_node, unsigned dst_node); +/* Asynchronous data request completed */ +int _starpu_trace_end_driver_copy_async(unsigned src_node, unsigned dst_node); + +/* Register a data handle */ +int _starpu_trace_handle_data_register(starpu_data_handle_t *handle); +/* Unregister a data handle */ +int _starpu_trace_handle_data_unregister(starpu_data_handle_t *handle); +/* Set the data's state as invalid */ +int _starpu_trace_data_state_invalid(starpu_data_handle_t *handle, unsigned node); +/* Set the data's owner */ +int _starpu_trace_data_state_owner(starpu_data_handle_t *handle, unsigned node); +/* Set the data's state as shared */ +int _starpu_trace_data_state_shared(starpu_data_handle_t *handle, unsigned node); +/* Create a data request */ +int _starpu_trace_data_request_created(starpu_data_handle_t *handle, int orig, int dest, int prio, enum starpu_is_prefetch is_prefetch, struct _starpu_data_request *req); + +/* Start unapplying a filter */ +int _starpu_trace_start_unpartition(starpu_data_handle_t *handle, unsigned memnode); +/* Finished unapplying a filter */ +int _starpu_trace_end_unpartition(starpu_data_handle_t *handle, unsigned memnode); + +/* Schedule a task (work stealing scheduling policy) */ +int _starpu_trace_work_stealing(unsigned empty_q, unsigned victim_q); +/* Scheduling start, so set the status as "scheduling" */ +int _starpu_trace_worker_scheduling_start(); +/* Scheduling done, so set the status as "scheduling done" */ +int _starpu_trace_worker_scheduling_end(); +/* Enqueue a task into the list of tasks explicitly attached to a worker */ +int _starpu_trace_worker_scheduling_push(); +/* After the scheduler has pushed a task to a queue but just before releasing mutexes */ +int _starpu_trace_worker_scheduling_pop(); +/* Set status as "sleeping" */ +int _starpu_trace_worker_sleep_start(); +/* Wake up, so clear the status */ +int _starpu_trace_worker_sleep_end(); +/* Submit a task */ +int _starpu_trace_task_submit(struct _starpu_job *job, long iter, long subiter); +/* Before a task is submitted to the scheduler */ +int _starpu_trace_task_submit_start(); +/* After a task is submitted to the scheduler */ +int _starpu_trace_task_submit_end(); +/* Throttle a task to wait until the number of submitted tasks gets below a certain limit */ +int _starpu_trace_task_throttle_start(); +/* Un-throttle a task */ +int _starpu_trace_task_throttle_end(); +/* Before the creation of the data structure that holds a task */ +int _starpu_trace_task_build_start(); +/* After the creation of the data structure that holds a task */ +int _starpu_trace_task_build_end(); +/* Wait until a task is started */ +int _starpu_trace_task_wait_start(struct _starpu_job *job); +/* After a task is started */ +int _starpu_trace_task_wait_end(); +/* Before waiting for all the tasks of the scheduling context */ +int _starpu_trace_task_wait_for_all_start(); +/* After waiting for all the tasks of the scheduling context */ +int _starpu_trace_task_wait_for_all_end(); +/* Unused, see _starpu_trace_task_wait_for_all_start() and _starpu_trace_task_wait_for_all_end() */ +int _starpu_trace_task_wait_for_all(); + +/* Push a task (prio scheduler) */ +int _starpu_trace_sched_component_push_prio(struct starpu_sched_component * component, unsigned ntasks, double exp_len); +/* Pop a task (prio scheduler) */ +int _starpu_trace_sched_component_pop_prio(struct starpu_sched_component * component, unsigned ntasks, double exp_len); +/* Create a new scheduling component */ +int _starpu_trace_sched_component_new(struct starpu_sched_component* component); +/* Attach a component to its parent (scheduling) */ +int _starpu_trace_sched_component_connect(struct starpu_sched_component* parent, struct starpu_sched_component* child); +/* Push a task to a component (scheduling) */ +int _starpu_trace_sched_component_push(struct starpu_sched_component* from, struct starpu_sched_component* to, struct starpu_task* task, int prio); +/* Pull a task from a component (scheduling) */ +int _starpu_trace_sched_component_pull(struct starpu_sched_component* from, struct starpu_sched_component* to, struct starpu_task* task); + +/* Before sending a notification to the scheduling context (only if STARPU_USE_SC_HYPERVISOR is enabled ) */ +int _starpu_trace_hypervisor_begin(); +/* After sending a notification to the scheduling context (only if STARPU_USE_SC_HYPERVISOR is enabled ) */ +int _starpu_trace_hypervisor_end(); + +/* Beginning of a memory allocation */ +int _starpu_trace_start_alloc(unsigned memnode, size_t size, starpu_data_handle_t *handle, enum starpu_is_prefetch is_prefetch); +/* End of a memory allocation */ +int _starpu_trace_end_alloc(unsigned memnode, starpu_data_handle_t *handle, starpu_ssize_t r); +/* Beginning of a memory allocation using allocation cache */ +int _starpu_trace_start_alloc_reuse(unsigned memnode, size_t size, starpu_data_handle_t *handle, enum starpu_is_prefetch is_prefetch); +/* End of a memory allocation using allocation cache */ +int _starpu_trace_end_alloc_reuse(unsigned memnode, starpu_data_handle_t *handle, starpu_ssize_t r); +/* Before memory is freeed */ +int _starpu_trace_start_free(unsigned memnode, size_t size, starpu_data_handle_t *handle); +/* After memory is freeed */ +int _starpu_trace_end_free(unsigned memnode, starpu_data_handle_t *handle); +/* Before a subtree is transfered to a node */ +int _starpu_trace_start_writeback(unsigned memnode, starpu_data_handle_t *handle); +/* After a subtree is transfered to a node */ +int _starpu_trace_end_writeback(unsigned memnode, starpu_data_handle_t *handle); +/* Allocate memory */ +int _starpu_trace_used_mem(unsigned memnode, size_t used); +/* Before trying to free the buffers currently in use on the memory node */ +int _starpu_trace_start_memreclaim(unsigned memnode,enum starpu_is_prefetch is_prefetch); +/* After trying to free the buffers currently in use on the memory node */ +int _starpu_trace_end_memreclaim(unsigned memnode, enum starpu_is_prefetch is_prefetch); +/* Periodic tidy of available memory: start cleaning the memory */ +int _starpu_trace_start_writeback_async(unsigned memnode); +/* Periodic tidy of available memory: finished cleaning the memory */ +int _starpu_trace_end_writeback_async(unsigned memnode); +/* Memory allocation failed */ +int _starpu_trace_memory_full(size_t size); + +/* Start a data transfer */ +int _starpu_trace_start_transfer(unsigned memnode, struct _starpu_worker *worker); +/* End a data transfer */ +int _starpu_trace_end_transfer(unsigned memnode, struct _starpu_worker *worker); +/* Start a progress operation on a data transfer */ +int _starpu_trace_start_progress(unsigned memnode, struct _starpu_worker *worker); +/* End a progress operation on a data transfer */ +int _starpu_trace_end_progress(unsigned memnode, struct _starpu_worker *worker); + +/* Beginning of the function that finds out whether we are to execute the data because we own the data to be written to (MPI mode). */ +int _starpu_trace_task_mpi_decode_start(); +/* End of the function that finds out whether we are to execute the data because we own the data to be written to (MPI mode). */ +int _starpu_trace_task_mpi_decode_end(); +/* Start building the necessary data to execute a task, involving a communication to send and receive the necessary data (MPI mode) */ +int _starpu_trace_task_mpi_pre_start(); +/* Finish building the necessary data to execute a task, involving a communication to send and receive the necessary data (MPI mode) */ +int _starpu_trace_task_mpi_pre_end(); +/* Start exchanging and clearing data after the execution of a task (MPI mode) */ +int _starpu_trace_task_mpi_post_start(); +/* Start exchanging and clearing data after the execution of a task (MPI mode) */ +int _starpu_trace_task_mpi_post_end(); + +/* Start locking a pthread mutex */ +int _starpu_trace_locking_mutex(); +/* A pthread mutex has been locked */ +int _starpu_trace_mutex_locked(); +/* Start unlocking a pthread mutex */ +int _starpu_trace_unlocking_mutex(); +/* A pthread mutex has been unlocked */ +int _starpu_trace_mutex_unlocked(); +/* Start trylock a pthread mutex */ +int _starpu_trace_trylock_mutex(); +/* Before locking the rw lock */ +int _starpu_trace_rdlocking_rwlock(); +/* After rw lock has been successfully locked by trylock */ +int _starpu_trace_rwlock_rdlocked(); +/* Before locking the rw lock */ +int _starpu_trace_wrlocking_rwlock(); +/* After rw lock has been locked */ +int _starpu_trace_rwlock_wrlocked(); +/* Before unlocking the rw lock */ +int _starpu_trace_unlocking_rwlock(); +/* After unlocking the rw lock */ +int _starpu_trace_rwlock_unlocked(); +/* Unused */ +int _starpu_trace_spinlock_conditition(); +/* After a lock is taken and the function that called it last is set */ +int _starpu_trace_spinlock_locked(const char* file, int line); +/* Before a lock is taken and the function that called it last is set */ +int _starpu_trace_locking_spinlock(const char* file, int line); +/* Before a lock is released and the function that called it last is set */ +int _starpu_trace_unlocking_spinlock(const char* file, int line); +/* After a lock is released and the function that called it last is set */ +int _starpu_trace_spinlock_unlocked(const char* file, int line); +/* Before we try to take a lock with trylock and, if the lock is granted, the function that called it last is set */ +int _starpu_trace_trylock_spinlock(const char* file, int line); +/* Before a condition variable is initialized and waited on */ +int _starpu_trace_cond_wait_begin(); +/* After the wait on the condition wait has returned */ +int _starpu_trace_cond_wait_end(); +/* Before a barrier */ +int _starpu_trace_barrier_wait_begin(); +/* After a barrier */ +int _starpu_trace_barrier_wait_end(); +/* Finished filling the codelet's interfaces */ +int _starpu_trace_data_load(int workerid, size_t size); +/* Before a barrier synchronizing the threads */ +int _starpu_trace_start_parallel_sync(struct _starpu_job *job); +/* After a barrier synchronizing the threads */ +int _starpu_trace_end_parallel_sync(struct _starpu_job *job); + + +#endif /* #ifndef STARPU_TRACE_H */ diff --git a/src/sched_policies/component_prio.c b/src/sched_policies/component_prio.c index 4ad4dc800e..19b5fdd64e 100644 --- a/src/sched_policies/component_prio.c +++ b/src/sched_policies/component_prio.c @@ -17,33 +17,9 @@ #include #include #include -#include #include #include -#ifdef STARPU_USE_FXT -#define STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(component,ntasks,exp_len) do { \ - if (fut_active) { \ - int workerid = STARPU_NMAXWORKERS + 1; \ - if((component->nchildren == 1) && starpu_sched_component_is_worker(component->children[0])) \ - workerid = starpu_sched_component_worker_get_workerid(component->children[0]); \ - _STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(workerid, ntasks, exp_len); \ - } \ -} while (0) - -#define STARPU_TRACE_SCHED_COMPONENT_POP_PRIO(component,ntasks,exp_len) do { \ - if (fut_active) { \ - int workerid = STARPU_NMAXWORKERS + 1; \ - if((component->nchildren == 1) && starpu_sched_component_is_worker(component->children[0])) \ - workerid = starpu_sched_component_worker_get_workerid(component->children[0]); \ - _STARPU_TRACE_SCHED_COMPONENT_POP_PRIO(workerid, ntasks, exp_len); \ - } \ -} while (0) -#else -#define STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(component,ntasks,exp_len) do { } while (0) -#define STARPU_TRACE_SCHED_COMPONENT_POP_PRIO(component,ntasks,exp_len) do { } while (0) -#endif - struct _starpu_prio_data { struct starpu_st_prio_deque prio; @@ -175,7 +151,7 @@ static int prio_push_local_task(struct starpu_sched_component * component, struc { ret = starpu_st_prio_deque_push_back_task(queue,task); starpu_sched_component_prefetch_on_node(component, task); - STARPU_TRACE_SCHED_COMPONENT_PUSH_PRIO(component, queue->ntasks, exp_len); + _starpu_trace_sched_component_push_prio(component, queue->ntasks, exp_len); } STARPU_COMPONENT_MUTEX_UNLOCK(mutex); if(!is_pushback) @@ -248,7 +224,7 @@ static struct starpu_task * prio_pull_task(struct starpu_sched_component * compo queue->exp_len = 0.0; } if(task) - STARPU_TRACE_SCHED_COMPONENT_POP_PRIO(component, queue->ntasks, queue->exp_len); + _starpu_trace_sched_component_pop_prio(component, queue->ntasks, queue->exp_len); STARPU_ASSERT(!isnan(queue->exp_end)); STARPU_ASSERT(!isnan(queue->exp_len)); STARPU_ASSERT(!isnan(queue->exp_start)); diff --git a/src/sched_policies/component_sched.c b/src/sched_policies/component_sched.c index c16b3770ae..2e61234cb2 100644 --- a/src/sched_policies/component_sched.c +++ b/src/sched_policies/component_sched.c @@ -361,7 +361,7 @@ void starpu_sched_component_connect(struct starpu_sched_component *parent, struc { parent->add_child(parent, child); child->add_parent(child, parent); - _STARPU_TRACE_SCHED_COMPONENT_CONNECT(parent,child); + _starpu_trace_sched_component_connect(parent,child); } int starpu_sched_tree_push_task(struct starpu_task * task) @@ -384,7 +384,7 @@ int starpu_sched_component_push_task(struct starpu_sched_component *from STARPU_ int priority = task->priority; pushback = to->push_task(to, task); if (!pushback) - _STARPU_TRACE_SCHED_COMPONENT_PUSH(from, to, task, priority); + _starpu_trace_sched_component_push(from, to, task, priority); return pushback; } @@ -403,7 +403,7 @@ struct starpu_task * starpu_sched_component_pull_task(struct starpu_sched_compon { struct starpu_task *task = from->pull_task(from, to); if (task) - _STARPU_TRACE_SCHED_COMPONENT_PULL(from, to, task); + _starpu_trace_sched_component_pull(from, to, task); return task; } @@ -783,6 +783,6 @@ struct starpu_sched_component * starpu_sched_component_create(struct starpu_sche component->deinit_data = take_component_and_does_nothing; component->notify_change_workers = take_component_and_does_nothing; component->name = strdup(name); - _STARPU_TRACE_SCHED_COMPONENT_NEW(component); + _starpu_trace_sched_component_new(component); return component; } diff --git a/src/sched_policies/component_worker.c b/src/sched_policies/component_worker.c index eff1bd4455..680b5dc43e 100644 --- a/src/sched_policies/component_worker.c +++ b/src/sched_policies/component_worker.c @@ -631,7 +631,7 @@ static int combined_worker_push_task(struct starpu_sched_component * component, task_alias[0]->task->destroy = 1; task_alias[0]->left = NULL; task_alias[0]->ntasks = data->parallel_worker.worker_size; - _STARPU_TRACE_JOB_PUSH(task_alias[0]->task, task_alias[0]->task->priority > 0); + _starpu_trace_job_push(task_alias[0]->task, task_alias[0]->task->priority > 0); unsigned i; for(i = 1; i < data->parallel_worker.worker_size; i++) { @@ -642,7 +642,7 @@ static int combined_worker_push_task(struct starpu_sched_component * component, task_alias[i]->left = task_alias[i-1]; task_alias[i - 1]->right = task_alias[i]; task_alias[i]->pntasks = &(task_alias[0]->ntasks); - _STARPU_TRACE_JOB_PUSH(task_alias[i]->task, task_alias[i]->task->priority > 0); + _starpu_trace_job_push(task_alias[i]->task, task_alias[i]->task->priority > 0); } starpu_pthread_mutex_t * mutex_to_unlock = NULL; diff --git a/src/sched_policies/deque_modeling_policy_data_aware.c b/src/sched_policies/deque_modeling_policy_data_aware.c index 16a41d24f7..94ea23de4e 100644 --- a/src/sched_policies/deque_modeling_policy_data_aware.c +++ b/src/sched_policies/deque_modeling_policy_data_aware.c @@ -25,7 +25,6 @@ #include #include -#include #include #include #include diff --git a/src/sched_policies/eager_central_priority_policy.c b/src/sched_policies/eager_central_priority_policy.c index cc52be1ba0..a133b51988 100644 --- a/src/sched_policies/eager_central_priority_policy.c +++ b/src/sched_policies/eager_central_priority_policy.c @@ -29,7 +29,6 @@ #include #include -#include #include #include diff --git a/src/sched_policies/fifo_queues.c b/src/sched_policies/fifo_queues.c index 94cfc6edf6..8b7f517ec6 100644 --- a/src/sched_policies/fifo_queues.c +++ b/src/sched_policies/fifo_queues.c @@ -22,7 +22,6 @@ #include #include -#include #include #include diff --git a/src/sched_policies/heteroprio.c b/src/sched_policies/heteroprio.c index fb29791547..385ca8ebce 100644 --- a/src/sched_policies/heteroprio.c +++ b/src/sched_policies/heteroprio.c @@ -26,7 +26,6 @@ #include #include "heteroprio.h" -#include #include #include #include diff --git a/src/sched_policies/parallel_eager.c b/src/sched_policies/parallel_eager.c index ba890e6b39..db84cb75b1 100644 --- a/src/sched_policies/parallel_eager.c +++ b/src/sched_policies/parallel_eager.c @@ -326,7 +326,7 @@ static struct starpu_task *pop_task_peager_policy(unsigned sched_ctx_id) struct starpu_task *alias = starpu_task_dup(task); int local_worker = combined_workerid[i]; alias->destroy = 1; - _STARPU_TRACE_JOB_PUSH(alias, alias->priority > 0); + _starpu_trace_job_push(alias, alias->priority > 0); starpu_st_fifo_taskq_push_task(&data->local_fifo[local_worker], alias); } @@ -337,7 +337,7 @@ static struct starpu_task *pop_task_peager_policy(unsigned sched_ctx_id) STARPU_PTHREAD_MUTEX_UNLOCK(&data->policy_mutex); - _STARPU_TRACE_JOB_PUSH(master_alias, master_alias->priority > 0); + _starpu_trace_job_push(master_alias, master_alias->priority > 0); for (i = 1; i < worker_size; i++) { diff --git a/src/sched_policies/parallel_heft.c b/src/sched_policies/parallel_heft.c index bd21af35e9..a5b62e7f4c 100644 --- a/src/sched_policies/parallel_heft.c +++ b/src/sched_policies/parallel_heft.c @@ -183,7 +183,7 @@ static int push_task_on_best_worker(struct starpu_task *task, int best_workerid, ntasks[local_combined_workerid]++; starpu_worker_unlock(local_combined_workerid); - _STARPU_TRACE_JOB_PUSH(alias, alias->priority > 0); + _starpu_trace_job_push(alias, alias->priority > 0); ret |= starpu_push_local_task(local_combined_workerid, alias, prio); } diff --git a/src/sched_policies/work_stealing_policy.c b/src/sched_policies/work_stealing_policy.c index adfe881234..cc02a51832 100644 --- a/src/sched_policies/work_stealing_policy.c +++ b/src/sched_policies/work_stealing_policy.c @@ -623,7 +623,7 @@ static struct starpu_task *ws_pop_task(unsigned sched_ctx_id) if (task) { - _STARPU_TRACE_WORK_STEALING(workerid, victim); + _starpu_trace_work_stealing(workerid, victim); starpu_sched_task_break(task); starpu_sched_ctx_list_task_counters_decrement(sched_ctx_id, victim); record_data_locality(task, workerid); diff --git a/src/util/execute_on_all.c b/src/util/execute_on_all.c index b889f2a53d..2dcbbe30fa 100644 --- a/src/util/execute_on_all.c +++ b/src/util/execute_on_all.c @@ -19,7 +19,6 @@ #include #include #include -#include struct wrapper_func_args { @@ -30,22 +29,8 @@ struct wrapper_func_args static void wrapper_func(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *_args) { struct wrapper_func_args *args = (struct wrapper_func_args *) _args; -#ifdef STARPU_PROF_TOOL - struct starpu_prof_tool_info pi; -#endif - -#ifdef STARPU_PROF_TOOL - int worker = starpu_worker_get_id(); - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_start_gpu_exec, worker, worker, starpu_prof_tool_driver_gpu, -1, (void*)args->func); - starpu_prof_tool_callbacks.starpu_prof_tool_event_start_gpu_exec(&pi, NULL, NULL); -#endif args->func(args->arg); - -#ifdef STARPU_PROF_TOOL - pi = _starpu_prof_tool_get_info(starpu_prof_tool_event_end_gpu_exec, worker, worker, starpu_prof_tool_driver_gpu, -1, (void*)args->func); - starpu_prof_tool_callbacks.starpu_prof_tool_event_end_gpu_exec(&pi, NULL, NULL); -#endif } /** diff --git a/src/util/starpu_task_insert_utils.c b/src/util/starpu_task_insert_utils.c index 1c277d9ada..6320d9c542 100644 --- a/src/util/starpu_task_insert_utils.c +++ b/src/util/starpu_task_insert_utils.c @@ -266,7 +266,7 @@ int _starpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task *ta struct starpu_task **task_deps_array = NULL; struct starpu_task **task_end_deps_array = NULL; - _STARPU_TRACE_TASK_BUILD_START(); + _starpu_trace_task_build_start(); task->cl = cl; current_buffer = 0; @@ -655,7 +655,7 @@ int _starpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task *ta starpu_task_declare_end_deps_array(task, nend_deps, task_end_deps_array); } - _STARPU_TRACE_TASK_BUILD_END(); + _starpu_trace_task_build_end(); return 0; } @@ -669,7 +669,7 @@ int _fstarpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task *t struct starpu_task **task_deps_array = NULL; struct starpu_task **task_end_deps_array = NULL; - _STARPU_TRACE_TASK_BUILD_START(); + _starpu_trace_task_build_start(); struct starpu_codelet_pack_arg_data state; starpu_codelet_pack_arg_init(&state); @@ -1109,7 +1109,7 @@ int _fstarpu_task_insert_create(struct starpu_codelet *cl, struct starpu_task *t starpu_task_declare_end_deps_array(task, nend_deps, task_end_deps_array); } - _STARPU_TRACE_TASK_BUILD_END(); + _starpu_trace_task_build_end(); return 0; } diff --git a/tools/starpu_fxt_stats.c b/tools/starpu_fxt_stats.c index 0008f94660..781a508c9e 100644 --- a/tools/starpu_fxt_stats.c +++ b/tools/starpu_fxt_stats.c @@ -14,8 +14,6 @@ * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ -//#include "fxt_tool.h" - #include #include @@ -25,7 +23,7 @@ #include #include -#include +#include #include #include #include diff --git a/tools/starpu_fxt_tool.c b/tools/starpu_fxt_tool.c index 114c4138dd..c6bcede3c8 100644 --- a/tools/starpu_fxt_tool.c +++ b/tools/starpu_fxt_tool.c @@ -21,7 +21,8 @@ #include #include -#include +//#include +#include #define PROGNAME "starpu_fxt_tool" diff --git a/tools/starpu_perfmodel_plot.c b/tools/starpu_perfmodel_plot.c index 7df0d04986..bd01b39b62 100644 --- a/tools/starpu_perfmodel_plot.c +++ b/tools/starpu_perfmodel_plot.c @@ -23,7 +23,7 @@ #include #include #ifdef STARPU_USE_FXT -#include +#include #endif #include