diff --git a/Makefile b/Makefile index 1ed8a04..48a9d1f 100644 --- a/Makefile +++ b/Makefile @@ -45,21 +45,21 @@ rigorous_speed_demo: clean $(RIGOROUS_SPEED_STATS) #: Really, how slow is it? @printf "ifunc\t"; cat speed_demo_ifunc.stats.txt @echo "" -RIDICULOUS_STATS=speed_demo_fixed.stats.txt \ - speed_demo_ifunc.stats.txt \ - speed_demo_pointer.stats.txt \ - speed_demo_struct.stats.txt \ - speed_demo_always.stats.txt \ - speed_demo_upfront.stats.txt +RIDICULOUS_STATS=fixed.stats.txt \ + ifunc.stats.txt \ + pointer.stats.txt \ + struct.stats.txt \ + always.stats.txt \ + upfront.stats.txt ridiculous_speed_demo: clean $(RIDICULOUS_STATS) #: Compare other techniques $(call banner, Final Results) @echo "TEST LOW HIGH AVG" - @printf "fixed\t"; cat speed_demo_fixed.stats.txt - @printf "pointer\t"; cat speed_demo_pointer.stats.txt - @printf "struct\t"; cat speed_demo_struct.stats.txt - @printf "ifunc\t"; cat speed_demo_ifunc.stats.txt - @printf "upfront\t"; cat speed_demo_upfront.stats.txt - @printf "always\t"; cat speed_demo_always.stats.txt + @printf "fixed\t"; cat fixed.stats.txt + @printf "pointer\t"; cat pointer.stats.txt + @printf "struct\t"; cat struct.stats.txt + @printf "ifunc\t"; cat ifunc.stats.txt + @printf "upfront\t"; cat upfront.stats.txt + @printf "always\t"; cat always.stats.txt @echo "" %.stats.txt: %.low.txt %.high.txt %.avg.txt @@ -74,7 +74,7 @@ ridiculous_speed_demo: clean $(RIDICULOUS_STATS) #: Compare other techniques %.avg.txt: %.timings.txt awk '{ sum += $$1 }; END { print sum/NR }' $< > $@ -%.timings.txt: %.exe +%.timings.txt: code/speed_demo/%/main.exe $(call banner, Sampling performance of $<) (for i in `seq 1 10`; do \ time -f "%U" ./$<; \ @@ -95,9 +95,18 @@ vector_add.exe: code/vector_add.c plt_example.exe: code/plt_example.c gcc -fPIC -no-pie -o $@ $< -%.exe: code/%.c +cpu_demo.exe: code/cpu_demo.c gcc -o $@ $< +tty_demo.exe: code/tty_demo.c + gcc -o $@ $< + +speed_demo_%.exe: code/speed_demo/%/main.exe + cp $< $@ + +code/speed_demo/%/main.exe: + make -C code/speed_demo/$* main.exe + %.plt: %.exe objdump -d -r $< \ | awk '/section/ { plt=0 }; /section .plt/ { plt=1 }; { if (plt) { print } }' @@ -112,4 +121,10 @@ enter_container: docker_build.txt #: Run an interactive shell in the container docker run -it -v $(PWD):/workspace ifuncd-up:latest clean: #: Remove any build detritus + make -C code/speed_demo/always clean + make -C code/speed_demo/fixed clean + make -C code/speed_demo/ifunc clean + make -C code/speed_demo/pointer clean + make -C code/speed_demo/struct clean + make -C code/speed_demo/upfront clean rm -f *.txt *.exe diff --git a/code/speed_demo/always/Makefile b/code/speed_demo/always/Makefile new file mode 100644 index 0000000..9b87782 --- /dev/null +++ b/code/speed_demo/always/Makefile @@ -0,0 +1,14 @@ +test: main.exe + time -p ./main.exe + +main.exe: main.o libincrement.so + gcc -Wl,-rpath,$(CURDIR) -o $@ $< -L. -lincrement + +libincrement.so: libincrement.o + gcc -shared -o $@ $< + +%.o: %.c + gcc -fPIC -Wall -c $< -o $@ + +clean: + rm -f *.exe *.o *.so diff --git a/code/speed_demo/always/libincrement.c b/code/speed_demo/always/libincrement.c new file mode 100644 index 0000000..b576649 --- /dev/null +++ b/code/speed_demo/always/libincrement.c @@ -0,0 +1,10 @@ +// Use this incrementer algorithm if AVX2 is available. +int fancy_incrementer(int x) { + return x + 1; +} + +// Use this if AVX2 is not available. It's the same as above, because we don't +// actually rely on AVX2. +int normal_incrementer(int x) { + return x + 1; +} diff --git a/code/speed_demo/always/main.c b/code/speed_demo/always/main.c new file mode 100644 index 0000000..1fc3922 --- /dev/null +++ b/code/speed_demo/always/main.c @@ -0,0 +1,27 @@ +// This program is part of an experiment to compare the performance of GNU IFUNC +// vs plain-old function pointers. Run `make rigorous_speed_demo` to see a full +// comparison of speeds. +// +// This particular program selects an "appropriate" incrementer function lazily, +// via the GNU IFUNC facility. The choice of incrementer is irrelevant, since +// they are the same; our concern is the cost of invoking the chosen incrementer +// based on what strategy we use to select it. +#include +#include + +int fancy_incrementer(int); +int normal_incrementer(int); + +int main() { + __builtin_cpu_init (); + // Count to ~ 2 Billion by calling a dynamically-resolved incrementer + int counter = 0; + while (counter < INT_MAX) { + if (__builtin_cpu_supports("avx2")) { + counter = fancy_incrementer(counter); + } else { + counter = normal_incrementer(counter); + } + } + return 0; +} diff --git a/code/speed_demo/fixed/Makefile b/code/speed_demo/fixed/Makefile new file mode 100644 index 0000000..9b87782 --- /dev/null +++ b/code/speed_demo/fixed/Makefile @@ -0,0 +1,14 @@ +test: main.exe + time -p ./main.exe + +main.exe: main.o libincrement.so + gcc -Wl,-rpath,$(CURDIR) -o $@ $< -L. -lincrement + +libincrement.so: libincrement.o + gcc -shared -o $@ $< + +%.o: %.c + gcc -fPIC -Wall -c $< -o $@ + +clean: + rm -f *.exe *.o *.so diff --git a/code/speed_demo/fixed/libincrement.c b/code/speed_demo/fixed/libincrement.c new file mode 100644 index 0000000..aec0c87 --- /dev/null +++ b/code/speed_demo/fixed/libincrement.c @@ -0,0 +1,3 @@ +int increment(int x) { + return x + 1; +} diff --git a/code/speed_demo_fixed.c b/code/speed_demo/fixed/main.c similarity index 91% rename from code/speed_demo_fixed.c rename to code/speed_demo/fixed/main.c index e5f612a..3e9aef0 100644 --- a/code/speed_demo_fixed.c +++ b/code/speed_demo/fixed/main.c @@ -7,11 +7,13 @@ // should be compared to the performance of this one. #include +int increment(int); + int main() { int counter = 0; // Count to ~ 2 Billion in a tight loop. while (counter < INT_MAX) { - counter += 1; + counter = increment(counter); } return 0; } diff --git a/code/speed_demo/ifunc/Makefile b/code/speed_demo/ifunc/Makefile new file mode 100644 index 0000000..9b87782 --- /dev/null +++ b/code/speed_demo/ifunc/Makefile @@ -0,0 +1,14 @@ +test: main.exe + time -p ./main.exe + +main.exe: main.o libincrement.so + gcc -Wl,-rpath,$(CURDIR) -o $@ $< -L. -lincrement + +libincrement.so: libincrement.o + gcc -shared -o $@ $< + +%.o: %.c + gcc -fPIC -Wall -c $< -o $@ + +clean: + rm -f *.exe *.o *.so diff --git a/code/speed_demo_ifunc.c b/code/speed_demo/ifunc/libincrement.c similarity index 51% rename from code/speed_demo_ifunc.c rename to code/speed_demo/ifunc/libincrement.c index 720e1d4..d5bdd73 100644 --- a/code/speed_demo_ifunc.c +++ b/code/speed_demo/ifunc/libincrement.c @@ -1,26 +1,17 @@ -// This program is part of an experiment to compare the performance of GNU IFUNC -// vs plain-old function pointers. Run `make rigorous_speed_demo` to see a full -// comparison of speeds. -// -// This particular program selects an "appropriate" incrementer function lazily, -// via the GNU IFUNC facility. The choice of incrementer is irrelevant, since -// they are the same; our concern is the cost of invoking the chosen incrementer -// based on what strategy we use to select it. -#include #include -static int counter = 0; // Use this incrementer algorithm if AVX2 is available. -void fancy_incrementer() { - counter += 1; +int fancy_incrementer(int x) { + return x + 1; } // Use this if AVX2 is not available. It's the same as above, because we don't // actually rely on AVX2. -void normal_incrementer() { - counter += 1; +int normal_incrementer(int x) { + return x + 1; } + // Select an "appropriate" incrementer based on CPU features. The actual choice // doesn't matter in this case, we just need something for the resolver to do. static void* resolver(void) { @@ -37,12 +28,4 @@ static void* resolver(void) { // Once the "real" function is selected, its address will be stored in the // Global Offset Table. When this stub is invoked in the future, the PLT will // cause the program to jump directly to the selected function. -void increment_counter() __attribute__((ifunc ("resolver"))); - -int main() { - // Count to ~ 2 Billion by calling a dynamically-resolved incrementer - while (counter < INT_MAX) { - increment_counter(); - } - return 0; -} +int increment(int) __attribute__((ifunc ("resolver"))); diff --git a/code/speed_demo/ifunc/main.c b/code/speed_demo/ifunc/main.c new file mode 100644 index 0000000..32f0a85 --- /dev/null +++ b/code/speed_demo/ifunc/main.c @@ -0,0 +1,20 @@ +// This program is part of an experiment to compare the performance of GNU IFUNC +// vs plain-old function pointers. Run `make rigorous_speed_demo` to see a full +// comparison of speeds. +// +// This particular program selects an "appropriate" incrementer function lazily, +// via the GNU IFUNC facility. The choice of incrementer is irrelevant, since +// they are the same; our concern is the cost of invoking the chosen incrementer +// based on what strategy we use to select it. +#include + +int increment(int); + +int main() { + // Count to ~ 2 Billion by calling a dynamically-resolved incrementer + int counter = 0; + while (counter < INT_MAX) { + counter = increment(counter); + } + return 0; +} diff --git a/code/speed_demo/pointer/Makefile b/code/speed_demo/pointer/Makefile new file mode 100644 index 0000000..9b87782 --- /dev/null +++ b/code/speed_demo/pointer/Makefile @@ -0,0 +1,14 @@ +test: main.exe + time -p ./main.exe + +main.exe: main.o libincrement.so + gcc -Wl,-rpath,$(CURDIR) -o $@ $< -L. -lincrement + +libincrement.so: libincrement.o + gcc -shared -o $@ $< + +%.o: %.c + gcc -fPIC -Wall -c $< -o $@ + +clean: + rm -f *.exe *.o *.so diff --git a/code/speed_demo/pointer/libincrement.c b/code/speed_demo/pointer/libincrement.c new file mode 100644 index 0000000..b576649 --- /dev/null +++ b/code/speed_demo/pointer/libincrement.c @@ -0,0 +1,10 @@ +// Use this incrementer algorithm if AVX2 is available. +int fancy_incrementer(int x) { + return x + 1; +} + +// Use this if AVX2 is not available. It's the same as above, because we don't +// actually rely on AVX2. +int normal_incrementer(int x) { + return x + 1; +} diff --git a/code/speed_demo_pointer.c b/code/speed_demo/pointer/main.c similarity index 77% rename from code/speed_demo_pointer.c rename to code/speed_demo/pointer/main.c index 5f37d60..686d347 100644 --- a/code/speed_demo_pointer.c +++ b/code/speed_demo/pointer/main.c @@ -9,28 +9,19 @@ // the chosen incrementer based on what strategy we use to select it. #include #include -static int counter = 0; // This is a function pointer, which will eventually point to one of the two // incrementer functions defined below. The choice of incrementer is determined // by the resolver function. Once the "real" function is selected, its address // is stored here. -static void (*increment_counter)(void) = 0; +static int (*increment)(int) = 0; -// Use this incrementer algorithm if AVX2 is available. -void fancy_incrementer() { - counter += 1; -} - -// Use this if AVX2 is not available. It's the same as above, because we don't -// actually rely on AVX2. -void normal_incrementer() { - counter += 1; -} +int fancy_incrementer(int); +int normal_incrementer(int); // Select an "appropriate" incrementer based on CPU features. The actual choice // doesn't matter in this case, we just need something for the resolver to do. -void (*resolver(void))(void) { +int (*resolver(void))(int) { __builtin_cpu_init(); if (__builtin_cpu_supports("avx2")) { return fancy_incrementer; @@ -42,11 +33,12 @@ void (*resolver(void))(void) { int main() { // Select the appropriate incrementer before doing any other work. This // kind of symbol resolution is "proactive" rather than "lazy". - increment_counter = resolver(); + increment = resolver(); // Count to ~ 2 Billion by calling a dynamically-resolved incrementer + int counter = 0; while (counter < INT_MAX) { - increment_counter(); + counter = increment(counter); } return 0; } diff --git a/code/speed_demo/struct/Makefile b/code/speed_demo/struct/Makefile new file mode 100644 index 0000000..9b87782 --- /dev/null +++ b/code/speed_demo/struct/Makefile @@ -0,0 +1,14 @@ +test: main.exe + time -p ./main.exe + +main.exe: main.o libincrement.so + gcc -Wl,-rpath,$(CURDIR) -o $@ $< -L. -lincrement + +libincrement.so: libincrement.o + gcc -shared -o $@ $< + +%.o: %.c + gcc -fPIC -Wall -c $< -o $@ + +clean: + rm -f *.exe *.o *.so diff --git a/code/speed_demo/struct/libincrement.c b/code/speed_demo/struct/libincrement.c new file mode 100644 index 0000000..b576649 --- /dev/null +++ b/code/speed_demo/struct/libincrement.c @@ -0,0 +1,10 @@ +// Use this incrementer algorithm if AVX2 is available. +int fancy_incrementer(int x) { + return x + 1; +} + +// Use this if AVX2 is not available. It's the same as above, because we don't +// actually rely on AVX2. +int normal_incrementer(int x) { + return x + 1; +} diff --git a/code/speed_demo_struct.c b/code/speed_demo/struct/main.c similarity index 57% rename from code/speed_demo_struct.c rename to code/speed_demo/struct/main.c index e4c504b..f818078 100644 --- a/code/speed_demo_struct.c +++ b/code/speed_demo/struct/main.c @@ -4,25 +4,15 @@ #include #include #include -static int counter = 0; - struct myfuncs { - void (*increment_counter)(); + int (*increment)(int); }; static struct myfuncs *page; -// Use this incrementer algorithm if AVX2 is available. -void fancy_incrementer() { - counter += 1; -} - -// Use this if AVX2 is not available. It's the same as above, because we don't -// actually rely on AVX2. -void normal_incrementer() { - counter += 1; -} +int fancy_incrementer(int); +int normal_incrementer(int); void initfuncs(void) { int pagesize = getpagesize(); @@ -34,9 +24,9 @@ void initfuncs(void) { __builtin_cpu_init(); if (__builtin_cpu_supports("avx2")) { - page->increment_counter = fancy_incrementer; + page->increment = fancy_incrementer; } else { - page->increment_counter = normal_incrementer; + page->increment = normal_incrementer; } if(mprotect(page, pagesize, PROT_READ)) @@ -48,8 +38,9 @@ void initfuncs(void) { int main(void) { initfuncs(); + int counter = 0; while (counter < INT_MAX) { - page->increment_counter(); + counter = page->increment(counter); } return 0; diff --git a/code/speed_demo/upfront/Makefile b/code/speed_demo/upfront/Makefile new file mode 100644 index 0000000..9b87782 --- /dev/null +++ b/code/speed_demo/upfront/Makefile @@ -0,0 +1,14 @@ +test: main.exe + time -p ./main.exe + +main.exe: main.o libincrement.so + gcc -Wl,-rpath,$(CURDIR) -o $@ $< -L. -lincrement + +libincrement.so: libincrement.o + gcc -shared -o $@ $< + +%.o: %.c + gcc -fPIC -Wall -c $< -o $@ + +clean: + rm -f *.exe *.o *.so diff --git a/code/speed_demo/upfront/libincrement.c b/code/speed_demo/upfront/libincrement.c new file mode 100644 index 0000000..b576649 --- /dev/null +++ b/code/speed_demo/upfront/libincrement.c @@ -0,0 +1,10 @@ +// Use this incrementer algorithm if AVX2 is available. +int fancy_incrementer(int x) { + return x + 1; +} + +// Use this if AVX2 is not available. It's the same as above, because we don't +// actually rely on AVX2. +int normal_incrementer(int x) { + return x + 1; +} diff --git a/code/speed_demo_upfront.c b/code/speed_demo/upfront/main.c similarity index 75% rename from code/speed_demo_upfront.c rename to code/speed_demo/upfront/main.c index c177cac..40f4b6c 100644 --- a/code/speed_demo_upfront.c +++ b/code/speed_demo/upfront/main.c @@ -10,27 +10,18 @@ #include #include #include -static int counter = 0; static bool cpu_has_avx2 = false; -// Use this incrementer algorithm if AVX2 is available. -void avx2_incrementer() { - counter += 1; -} - -// Use this if AVX2 is not available. It's the same as above, because we don't -// actually rely on AVX2. -void normal_incrementer() { - counter += 1; -} +int fancy_incrementer(int); +int normal_incrementer(int); // Select an "appropriate" incrementer based on CPU features. The actual choice // doesn't matter in this case, we just need something for the resolver to do. -void increment_counter() { +int increment(int x) { if (cpu_has_avx2) { - avx2_incrementer(); + return fancy_incrementer(x); } else { - normal_incrementer(); + return normal_incrementer(x); } } @@ -43,8 +34,9 @@ int main() { detect_cpu_features(); // Count to ~ 2 Billion by calling a dynamically-resolved incrementer + int counter = 0; while (counter < INT_MAX) { - increment_counter(); + counter = increment(counter); } return 0; } diff --git a/code/speed_demo_always.c b/code/speed_demo_always.c deleted file mode 100644 index e572bd6..0000000 --- a/code/speed_demo_always.c +++ /dev/null @@ -1,44 +0,0 @@ -// This program is part of an experiment to compare the performance of GNU IFUNC -// vs plain-old function pointers. Run `make rigorous_speed_demo` to see a full -// comparison of speeds. -// -// This particular program selects an "appropriate" incrementer function lazily, -// via the GNU IFUNC facility. The choice of incrementer is irrelevant, since -// they are the same; our concern is the cost of invoking the chosen incrementer -// based on what strategy we use to select it. -#include -#include -static int counter = 0; - -// Use this incrementer algorithm if AVX2 is available. -void avx2_incrementer() { - counter += 1; -} - -// Use this if AVX2 is not available. It's the same as above, because we don't -// actually rely on AVX2. -void normal_incrementer() { - counter += 1; -} - -// This is the ifunc "stub" function. The first time it is called, the -// `resolver` will be invoked in order to select an appropriate "real" function. -// Once the "real" function is selected, its address will be stored in the -// Global Offset Table. When this stub is invoked in the future, the PLT will -// cause the program to jump directly to the selected function. -void increment_counter() { - __builtin_cpu_init (); - if (__builtin_cpu_supports("avx2")) { - avx2_incrementer(); - } else { - normal_incrementer(); - } -} - -int main() { - // Count to ~ 2 Billion by calling a dynamically-resolved incrementer - while (counter < INT_MAX) { - increment_counter(); - } - return 0; -}