Skip to content

Commit

Permalink
Merge pull request #21 from robertdfrench/select-candidates-from-dylibs
Browse files Browse the repository at this point in the history
Measure invocation cost for dynamic symbols. Fixes #20
  • Loading branch information
robertdfrench authored Sep 4, 2024
2 parents bb669df + f21895d commit 7a49338
Show file tree
Hide file tree
Showing 20 changed files with 233 additions and 128 deletions.
43 changes: 29 additions & 14 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -45,21 +45,21 @@ rigorous_speed_demo: clean $(RIGOROUS_SPEED_STATS) #: Really, how slow is it?
@printf "ifunc\t"; cat speed_demo_ifunc.stats.txt
@echo ""

RIDICULOUS_STATS=speed_demo_fixed.stats.txt \
speed_demo_ifunc.stats.txt \
speed_demo_pointer.stats.txt \
speed_demo_struct.stats.txt \
speed_demo_always.stats.txt \
speed_demo_upfront.stats.txt
RIDICULOUS_STATS=fixed.stats.txt \
ifunc.stats.txt \
pointer.stats.txt \
struct.stats.txt \
always.stats.txt \
upfront.stats.txt
ridiculous_speed_demo: clean $(RIDICULOUS_STATS) #: Compare other techniques
$(call banner, Final Results)
@echo "TEST LOW HIGH AVG"
@printf "fixed\t"; cat speed_demo_fixed.stats.txt
@printf "pointer\t"; cat speed_demo_pointer.stats.txt
@printf "struct\t"; cat speed_demo_struct.stats.txt
@printf "ifunc\t"; cat speed_demo_ifunc.stats.txt
@printf "upfront\t"; cat speed_demo_upfront.stats.txt
@printf "always\t"; cat speed_demo_always.stats.txt
@printf "fixed\t"; cat fixed.stats.txt
@printf "pointer\t"; cat pointer.stats.txt
@printf "struct\t"; cat struct.stats.txt
@printf "ifunc\t"; cat ifunc.stats.txt
@printf "upfront\t"; cat upfront.stats.txt
@printf "always\t"; cat always.stats.txt
@echo ""

%.stats.txt: %.low.txt %.high.txt %.avg.txt
Expand All @@ -74,7 +74,7 @@ ridiculous_speed_demo: clean $(RIDICULOUS_STATS) #: Compare other techniques
%.avg.txt: %.timings.txt
awk '{ sum += $$1 }; END { print sum/NR }' $< > $@

%.timings.txt: %.exe
%.timings.txt: code/speed_demo/%/main.exe
$(call banner, Sampling performance of $<)
(for i in `seq 1 10`; do \
time -f "%U" ./$<; \
Expand All @@ -95,9 +95,18 @@ vector_add.exe: code/vector_add.c
plt_example.exe: code/plt_example.c
gcc -fPIC -no-pie -o $@ $<

%.exe: code/%.c
cpu_demo.exe: code/cpu_demo.c
gcc -o $@ $<

tty_demo.exe: code/tty_demo.c
gcc -o $@ $<

speed_demo_%.exe: code/speed_demo/%/main.exe
cp $< $@

code/speed_demo/%/main.exe:
make -C code/speed_demo/$* main.exe

%.plt: %.exe
objdump -d -r $< \
| awk '/section/ { plt=0 }; /section .plt/ { plt=1 }; { if (plt) { print } }'
Expand All @@ -112,4 +121,10 @@ enter_container: docker_build.txt #: Run an interactive shell in the container
docker run -it -v $(PWD):/workspace ifuncd-up:latest

clean: #: Remove any build detritus
make -C code/speed_demo/always clean
make -C code/speed_demo/fixed clean
make -C code/speed_demo/ifunc clean
make -C code/speed_demo/pointer clean
make -C code/speed_demo/struct clean
make -C code/speed_demo/upfront clean
rm -f *.txt *.exe
14 changes: 14 additions & 0 deletions code/speed_demo/always/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
test: main.exe
time -p ./main.exe

main.exe: main.o libincrement.so
gcc -Wl,-rpath,$(CURDIR) -o $@ $< -L. -lincrement

libincrement.so: libincrement.o
gcc -shared -o $@ $<

%.o: %.c
gcc -fPIC -Wall -c $< -o $@

clean:
rm -f *.exe *.o *.so
10 changes: 10 additions & 0 deletions code/speed_demo/always/libincrement.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// Use this incrementer algorithm if AVX2 is available.
int fancy_incrementer(int x) {
return x + 1;
}

// Use this if AVX2 is not available. It's the same as above, because we don't
// actually rely on AVX2.
int normal_incrementer(int x) {
return x + 1;
}
27 changes: 27 additions & 0 deletions code/speed_demo/always/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// This program is part of an experiment to compare the performance of GNU IFUNC
// vs plain-old function pointers. Run `make rigorous_speed_demo` to see a full
// comparison of speeds.
//
// This particular program selects an "appropriate" incrementer function lazily,
// via the GNU IFUNC facility. The choice of incrementer is irrelevant, since
// they are the same; our concern is the cost of invoking the chosen incrementer
// based on what strategy we use to select it.
#include <limits.h>
#include <stddef.h>

int fancy_incrementer(int);
int normal_incrementer(int);

int main() {
__builtin_cpu_init ();
// Count to ~ 2 Billion by calling a dynamically-resolved incrementer
int counter = 0;
while (counter < INT_MAX) {
if (__builtin_cpu_supports("avx2")) {
counter = fancy_incrementer(counter);
} else {
counter = normal_incrementer(counter);
}
}
return 0;
}
14 changes: 14 additions & 0 deletions code/speed_demo/fixed/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
test: main.exe
time -p ./main.exe

main.exe: main.o libincrement.so
gcc -Wl,-rpath,$(CURDIR) -o $@ $< -L. -lincrement

libincrement.so: libincrement.o
gcc -shared -o $@ $<

%.o: %.c
gcc -fPIC -Wall -c $< -o $@

clean:
rm -f *.exe *.o *.so
3 changes: 3 additions & 0 deletions code/speed_demo/fixed/libincrement.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
int increment(int x) {
return x + 1;
}
4 changes: 3 additions & 1 deletion code/speed_demo_fixed.c → code/speed_demo/fixed/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@
// should be compared to the performance of this one.
#include <limits.h>

int increment(int);

int main() {
int counter = 0;
// Count to ~ 2 Billion in a tight loop.
while (counter < INT_MAX) {
counter += 1;
counter = increment(counter);
}
return 0;
}
14 changes: 14 additions & 0 deletions code/speed_demo/ifunc/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
test: main.exe
time -p ./main.exe

main.exe: main.o libincrement.so
gcc -Wl,-rpath,$(CURDIR) -o $@ $< -L. -lincrement

libincrement.so: libincrement.o
gcc -shared -o $@ $<

%.o: %.c
gcc -fPIC -Wall -c $< -o $@

clean:
rm -f *.exe *.o *.so
29 changes: 6 additions & 23 deletions code/speed_demo_ifunc.c → code/speed_demo/ifunc/libincrement.c
Original file line number Diff line number Diff line change
@@ -1,26 +1,17 @@
// This program is part of an experiment to compare the performance of GNU IFUNC
// vs plain-old function pointers. Run `make rigorous_speed_demo` to see a full
// comparison of speeds.
//
// This particular program selects an "appropriate" incrementer function lazily,
// via the GNU IFUNC facility. The choice of incrementer is irrelevant, since
// they are the same; our concern is the cost of invoking the chosen incrementer
// based on what strategy we use to select it.
#include <limits.h>
#include <stddef.h>
static int counter = 0;

// Use this incrementer algorithm if AVX2 is available.
void fancy_incrementer() {
counter += 1;
int fancy_incrementer(int x) {
return x + 1;
}

// Use this if AVX2 is not available. It's the same as above, because we don't
// actually rely on AVX2.
void normal_incrementer() {
counter += 1;
int normal_incrementer(int x) {
return x + 1;
}


// Select an "appropriate" incrementer based on CPU features. The actual choice
// doesn't matter in this case, we just need something for the resolver to do.
static void* resolver(void) {
Expand All @@ -37,12 +28,4 @@ static void* resolver(void) {
// Once the "real" function is selected, its address will be stored in the
// Global Offset Table. When this stub is invoked in the future, the PLT will
// cause the program to jump directly to the selected function.
void increment_counter() __attribute__((ifunc ("resolver")));

int main() {
// Count to ~ 2 Billion by calling a dynamically-resolved incrementer
while (counter < INT_MAX) {
increment_counter();
}
return 0;
}
int increment(int) __attribute__((ifunc ("resolver")));
20 changes: 20 additions & 0 deletions code/speed_demo/ifunc/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// This program is part of an experiment to compare the performance of GNU IFUNC
// vs plain-old function pointers. Run `make rigorous_speed_demo` to see a full
// comparison of speeds.
//
// This particular program selects an "appropriate" incrementer function lazily,
// via the GNU IFUNC facility. The choice of incrementer is irrelevant, since
// they are the same; our concern is the cost of invoking the chosen incrementer
// based on what strategy we use to select it.
#include <limits.h>

int increment(int);

int main() {
// Count to ~ 2 Billion by calling a dynamically-resolved incrementer
int counter = 0;
while (counter < INT_MAX) {
counter = increment(counter);
}
return 0;
}
14 changes: 14 additions & 0 deletions code/speed_demo/pointer/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
test: main.exe
time -p ./main.exe

main.exe: main.o libincrement.so
gcc -Wl,-rpath,$(CURDIR) -o $@ $< -L. -lincrement

libincrement.so: libincrement.o
gcc -shared -o $@ $<

%.o: %.c
gcc -fPIC -Wall -c $< -o $@

clean:
rm -f *.exe *.o *.so
10 changes: 10 additions & 0 deletions code/speed_demo/pointer/libincrement.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// Use this incrementer algorithm if AVX2 is available.
int fancy_incrementer(int x) {
return x + 1;
}

// Use this if AVX2 is not available. It's the same as above, because we don't
// actually rely on AVX2.
int normal_incrementer(int x) {
return x + 1;
}
22 changes: 7 additions & 15 deletions code/speed_demo_pointer.c → code/speed_demo/pointer/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,28 +9,19 @@
// the chosen incrementer based on what strategy we use to select it.
#include <limits.h>
#include <stddef.h>
static int counter = 0;

// This is a function pointer, which will eventually point to one of the two
// incrementer functions defined below. The choice of incrementer is determined
// by the resolver function. Once the "real" function is selected, its address
// is stored here.
static void (*increment_counter)(void) = 0;
static int (*increment)(int) = 0;

// Use this incrementer algorithm if AVX2 is available.
void fancy_incrementer() {
counter += 1;
}

// Use this if AVX2 is not available. It's the same as above, because we don't
// actually rely on AVX2.
void normal_incrementer() {
counter += 1;
}
int fancy_incrementer(int);
int normal_incrementer(int);

// Select an "appropriate" incrementer based on CPU features. The actual choice
// doesn't matter in this case, we just need something for the resolver to do.
void (*resolver(void))(void) {
int (*resolver(void))(int) {
__builtin_cpu_init();
if (__builtin_cpu_supports("avx2")) {
return fancy_incrementer;
Expand All @@ -42,11 +33,12 @@ void (*resolver(void))(void) {
int main() {
// Select the appropriate incrementer before doing any other work. This
// kind of symbol resolution is "proactive" rather than "lazy".
increment_counter = resolver();
increment = resolver();

// Count to ~ 2 Billion by calling a dynamically-resolved incrementer
int counter = 0;
while (counter < INT_MAX) {
increment_counter();
counter = increment(counter);
}
return 0;
}
14 changes: 14 additions & 0 deletions code/speed_demo/struct/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
test: main.exe
time -p ./main.exe

main.exe: main.o libincrement.so
gcc -Wl,-rpath,$(CURDIR) -o $@ $< -L. -lincrement

libincrement.so: libincrement.o
gcc -shared -o $@ $<

%.o: %.c
gcc -fPIC -Wall -c $< -o $@

clean:
rm -f *.exe *.o *.so
10 changes: 10 additions & 0 deletions code/speed_demo/struct/libincrement.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// Use this incrementer algorithm if AVX2 is available.
int fancy_incrementer(int x) {
return x + 1;
}

// Use this if AVX2 is not available. It's the same as above, because we don't
// actually rely on AVX2.
int normal_incrementer(int x) {
return x + 1;
}
23 changes: 7 additions & 16 deletions code/speed_demo_struct.c → code/speed_demo/struct/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,15 @@
#include <stdio.h>
#include <sys/mman.h>
#include <unistd.h>
static int counter = 0;

struct myfuncs {
void (*increment_counter)();
int (*increment)(int);
};

static struct myfuncs *page;


// Use this incrementer algorithm if AVX2 is available.
void fancy_incrementer() {
counter += 1;
}

// Use this if AVX2 is not available. It's the same as above, because we don't
// actually rely on AVX2.
void normal_incrementer() {
counter += 1;
}
int fancy_incrementer(int);
int normal_incrementer(int);

void initfuncs(void) {
int pagesize = getpagesize();
Expand All @@ -34,9 +24,9 @@ void initfuncs(void) {

__builtin_cpu_init();
if (__builtin_cpu_supports("avx2")) {
page->increment_counter = fancy_incrementer;
page->increment = fancy_incrementer;
} else {
page->increment_counter = normal_incrementer;
page->increment = normal_incrementer;
}

if(mprotect(page, pagesize, PROT_READ))
Expand All @@ -48,8 +38,9 @@ void initfuncs(void) {
int main(void) {
initfuncs();

int counter = 0;
while (counter < INT_MAX) {
page->increment_counter();
counter = page->increment(counter);
}

return 0;
Expand Down
Loading

0 comments on commit 7a49338

Please sign in to comment.