Skip to content

Commit

Permalink
Trying to improve the benchmark by providing a new method to mesure
Browse files Browse the repository at this point in the history
thread context switches

The new methods rely on sched_yield().
  • Loading branch information
abique committed Mar 1, 2012
1 parent 42610b7 commit ede72f7
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 3 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ timectxsw
timectxswws
timesyscall
timetctxsw
timetctxsw2
11 changes: 8 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
CC = gcc
CFLAGS = -mtune=native -O3 -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -std=c99 \
CFLAGS = -march=native -O3 -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -std=c99 \
-W -Wall -Werror
LDFLAGS = -lrt
LDFLAGS = -lrt -lpthread

TARGETS = timectxsw timectxswws timesyscall timetctxsw timetctxsw2

all: bench

bench: timectxsw timectxswws timesyscall timetctxsw
bench: $(TARGETS)
./cpubench.sh

clean:
rm -f $(TARGETS)

.PHONY: all bench
11 changes: 11 additions & 0 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,14 @@ timectxsw: Benchmarks the overhead of context switching between 2 processes.
timetctxsw: Benchmarks the overhead of context switching between 2 threads.
timectxswws: Benchmarks the overhead of context switching between 2 processes
using a working set of the size specified in argument.
timetctxsw2: Benchmarks the overhead of context switching between 2 threads,
by using a shed_yield() method.
If you do taskset -a 1, all threads should be scheduled on the
same processor, so you are really doing thread context switch.
Then to be sure that you are really doing it, just do:
strace -ff -tt -v taskset -a 1 ./timetctxsw2
Now why sched_yield() is enough for testing ? Because, it place
the current thread at the end of the ready queue. So the next
ready thread will be scheduled.
I also added sched_setscheduler(SCHED_FIFO) to get the best
performances.
4 changes: 4 additions & 0 deletions cpubench.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,14 @@ runbench() {
$* ./timesyscall
$* ./timectxsw
$* ./timetctxsw
$* ./timetctxsw2
}

echo '-- No CPU affinity --'
runbench

echo '-- With CPU affinity --'
runbench taskset -c $((total - 1))

echo '-- With CPU affinity to CPU 0 --'
runbench taskset -a 1
48 changes: 48 additions & 0 deletions timetctxsw2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#include <sched.h>
#include <pthread.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <errno.h>

static inline long long unsigned time_ns(struct timespec* const ts) {
if (clock_gettime(CLOCK_REALTIME, ts)) {
exit(1);
}
return ((long long unsigned) ts->tv_sec) * 1000000000LLU
+ (long long unsigned) ts->tv_nsec;
}

static const int iterations = 500000;

static void* thread(void*ctx) {
(void)ctx;
for (int i = 0; i < iterations; i++)
sched_yield();
return NULL;
}

int main(void) {
struct sched_param param;
param.sched_priority = 1;
if (sched_setscheduler(getpid(), SCHED_FIFO, &param))
fprintf(stderr, "sched_setscheduler(): %s\n", strerror(errno));

struct timespec ts;
pthread_t thd;
if (pthread_create(&thd, NULL, thread, NULL)) {
return 1;
}

long long unsigned start_ns = time_ns(&ts);
for (int i = 0; i < iterations; i++)
sched_yield();
long long unsigned delta = time_ns(&ts) - start_ns;

const int nswitches = iterations << 2;
printf("%i thread context switches in %lluns (%.1fns/ctxsw)\n",
nswitches, delta, (delta / (float) nswitches));
return 0;
}

0 comments on commit ede72f7

Please sign in to comment.