Trying to improve the benchmark by providing a new method to mesure

thread context switches The new methods rely on sched_yield().
jhnphm · Mar 1, 2012 · ede72f7 · ede72f7
1 parent 42610b7
commit ede72f7
Show file tree

Hide file tree

Showing 5 changed files with 72 additions and 3 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,4 @@ timectxsw
 timectxswws
 timesyscall
 timetctxsw
+timetctxsw2
diff --git a/Makefile b/Makefile
@@ -1,11 +1,16 @@
 CC = gcc
-CFLAGS = -mtune=native -O3 -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -std=c99 \
+CFLAGS = -march=native -O3 -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -std=c99 \
          -W -Wall -Werror
-LDFLAGS = -lrt
+LDFLAGS = -lrt -lpthread
+
+TARGETS = timectxsw timectxswws timesyscall timetctxsw timetctxsw2
 
 all: bench
 
-bench: timectxsw timectxswws timesyscall timetctxsw
+bench: $(TARGETS)
 	./cpubench.sh
 
+clean:
+	rm -f $(TARGETS)
+
 .PHONY: all bench
diff --git a/README b/README
@@ -6,3 +6,14 @@ timectxsw:   Benchmarks the overhead of context switching between 2 processes.
 timetctxsw:  Benchmarks the overhead of context switching between 2 threads.
 timectxswws: Benchmarks the overhead of context switching between 2 processes
              using a working set of the size specified in argument.
+timetctxsw2: Benchmarks the overhead of context switching between 2 threads,
+             by using a shed_yield() method.
+             If you do taskset -a 1, all threads should be scheduled on the
+             same processor, so you are really doing thread context switch.
+             Then to be sure that you are really doing it, just do:
+               strace -ff -tt -v taskset -a 1 ./timetctxsw2
+             Now why sched_yield() is enough for testing ? Because, it place
+             the current thread at the end of the ready queue. So the next
+             ready thread will be scheduled.
+             I also added sched_setscheduler(SCHED_FIFO) to get the best
+             performances.
diff --git a/cpubench.sh b/cpubench.sh
@@ -28,10 +28,14 @@ runbench() {
   $* ./timesyscall
   $* ./timectxsw
   $* ./timetctxsw
+  $* ./timetctxsw2
 }
 
 echo '-- No CPU affinity --'
 runbench
 
 echo '-- With CPU affinity --'
 runbench taskset -c $((total - 1))
+
+echo '-- With CPU affinity to CPU 0 --'
+runbench taskset -a 1
diff --git a/timetctxsw2.c b/timetctxsw2.c
@@ -0,0 +1,48 @@
+#include <sched.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <string.h>
+#include <errno.h>
+
+static inline long long unsigned time_ns(struct timespec* const ts) {
+  if (clock_gettime(CLOCK_REALTIME, ts)) {
+    exit(1);
+  }
+  return ((long long unsigned) ts->tv_sec) * 1000000000LLU
+    + (long long unsigned) ts->tv_nsec;
+}
+
+static const int iterations = 500000;
+
+static void* thread(void*ctx) {
+  (void)ctx;
+  for (int i = 0; i < iterations; i++)
+      sched_yield();
+  return NULL;
+}
+
+int main(void) {
+  struct sched_param param;
+  param.sched_priority = 1;
+  if (sched_setscheduler(getpid(), SCHED_FIFO, &param))
+    fprintf(stderr, "sched_setscheduler(): %s\n", strerror(errno));
+
+  struct timespec ts;
+  pthread_t thd;
+  if (pthread_create(&thd, NULL, thread, NULL)) {
+    return 1;
+  }
+
+  long long unsigned start_ns = time_ns(&ts);
+  for (int i = 0; i < iterations; i++)
+      sched_yield();
+  long long unsigned delta = time_ns(&ts) - start_ns;
+
+  const int nswitches = iterations << 2;
+  printf("%i  thread context switches in %lluns (%.1fns/ctxsw)\n",
+         nswitches, delta, (delta / (float) nswitches));
+  return 0;
+}