From 795d28dd70236d72b3e1674e3cb8f34a53aafa07 Mon Sep 17 00:00:00 2001 From: Ronan Collobert Date: Tue, 10 Mar 2015 15:56:29 -0400 Subject: [PATCH] Squashed 'tommyds/' changes from 183b568..56fae10 56fae10 Some more comments about the hash used 92d1e36 Update comments about integer hash functions 2762316 Add a note about the hash function used the benchmark 085d105 Make more clear that hash function we use 1180371 Split the googlelibchash and concurrencykit graph c4c3660 Add a new list count operation with O(n) complexity b3af524 Changes to version 2.1. 0823405 Updates the HISTORY. 37e36bf Drops doxygen dependency for Travis CI. 828e796 Uses a more complete Travis CI script. 6af2f22 Fixes another typo in the legend. 3389d11 Uses explict var for objdump and drop -fverbose-asm. 4d318ab Removes index and remainder uses as old gcc raise a warning for them. 055884d Fixes a typo in the legend. a253eeb Removes compiler warnings due different type size. c203b5c Moves the addressing functions of hashlin to the header. 6bbcc56 Allows to redefines CC and CXX with environment variables. 5eac92a Uses new compiler options for generating the benchmark. 3af910f Uses fixed arch options to build the benchmark. 56c897e Doesn't build by default the bench prog. b60c7b6 Decreases the requirement of C++11 std. C++0x seems enough. a8d61cb Uses gcc options valid for any native machine. 852031d Adds support for Travis CI. ee7d007 Some documentation changes. 494a394 Default run for valgrind is the check program. 84fc45f Uses a simpler statement to compute the split bit in hashlin. e3da5a4 Reimplements the bucket addressing for hashling. 1f3b338 More documentation changes. 7266227 Removes an unnecessary integer division in hashdyn. 7e1dac6 Various documentation changes. 21140d2 Adjusts the HISTORY with latest changes. e10f5da Even more code coverage. Now at 100%. 9b37acc Removes some unnecessary parenthesis from the code. aec0d2f Updates the design comments. 108f669 Adds more test coverage. Now at 99%. 3f9ef98 Fixes tommy_trie_inplace when removing duplicate elements. b4719b6 Minor changes at the documentation. 6d1b159 Adds support for code coverage. 12143b9 Cleans up the benchmark source. git-subtree-dir: tommyds git-subtree-split: 56fae105c119defcedf219f9a4668342e84d370b --- .gitignore | 6 + .travis.yml | 10 + HISTORY | 8 +- INSTALL | 5 +- Makefile | 73 +- README | 12 +- benchmark.cc | 68 +- benchmark.vcxproj | 2 +- benchmark/gr_all.sh | 3 +- benchmark/gr_common.gnu | 2 +- benchmark/gr_def_random_change.gnu | 2 +- benchmark/gr_def_random_hit.gnu | 2 +- ...ow_problem.gnu => gr_other_ck_problem.gnu} | 4 +- benchmark/gr_other_googlelibchash_problem.gnu | 14 + check.c | 1020 ++++++++++++++--- tommyds/tommy.h | 162 +-- tommyds/tommyalloc.c | 3 +- tommyds/tommychain.h | 5 +- tommyds/tommyhash.h | 10 +- tommyds/tommyhashdyn.c | 16 +- tommyds/tommyhashdyn.h | 3 +- tommyds/tommyhashlin.c | 104 +- tommyds/tommyhashlin.h | 50 +- tommyds/tommyhashtbl.c | 5 +- tommyds/tommyhashtbl.h | 3 +- tommyds/tommylist.c | 3 +- tommyds/tommylist.h | 66 +- tommyds/tommytrie.c | 3 +- tommyds/tommytrie.h | 5 +- tommyds/tommytrieinp.c | 30 +- tommyds/tommytrieinp.h | 2 +- tommyds/tommytypes.h | 11 +- 32 files changed, 1219 insertions(+), 493 deletions(-) create mode 100644 .travis.yml rename benchmark/{gr_other_slow_problem.gnu => gr_other_ck_problem.gnu} (78%) create mode 100644 benchmark/gr_other_googlelibchash_problem.gnu diff --git a/.gitignore b/.gitignore index 3ed2381..b8781f5 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,12 @@ stamp-h1 *.s *.S +# coverage +*.gcda +*.gcno +*.info +cov/ + # project *.dst *.epr diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..32fa9b0 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,10 @@ +# Travis CI configuration file + +language: c + +script: make check + +compiler: + - clang + - gcc + diff --git a/HISTORY b/HISTORY index 2fe1344..d5e79c2 100644 --- a/HISTORY +++ b/HISTORY @@ -1,12 +1,18 @@ TommyDS HISTORY =============== -2.0 2014/10 +2.1 2014/12 BETA +================ + +2.0 2014/12 =========== + * Fixed a Segmentation Fault bug in the trie_inplace container when inserting + duplicate elements. * Faster array and hashlin implementation when accessing elements. * Added new hashtable functions to iterate over all the elements. * Added a new tommy_calloc() function used for allocating initialized memory. If you redefined tommy_malloc(), likely you have to redefine also tommy_calloc(). + * Reached 100% code coverage in the regression test. * Different source code organization. * Added benchmark comparison with Binary Search Tesseract by Gregorius van den Hoven. diff --git a/INSTALL b/INSTALL index f5a2c03..40e55ff 100644 --- a/INSTALL +++ b/INSTALL @@ -1,7 +1,8 @@ TommyDS INSTALL =============== -TommyDS doesn't need any installation. You have only to import the required .c -and .h files into your program and start using them. +TommyDS doesn't need any installation. +You have only to import the required .c and .h files into your program +and use the them. diff --git a/Makefile b/Makefile index 1ae5b9a..5336c6c 100644 --- a/Makefile +++ b/Makefile @@ -1,17 +1,30 @@ ############################################################################# # Tommy Makefile -VERSION=2.0 -CFLAGS=-m32 -O3 -march=pentium4 -mtune=generic -Wall -Wextra -Wshadow -Wcast-qual -g -# -std=gnu++11 required by Google btree -CXXFLAGS=$(CFLAGS) -fpermissive -std=gnu++11 -CC=gcc -CXX=g++ -UNAME=$(shell uname) +# Version of TommyDS +VERSION = 2.1 + +# Build options for the check program +ifdef COVERAGE +CFLAGS = -O0 -g -fprofile-arcs -ftest-coverage +else +CFLAGS = -O3 -march=native -Wall -Wextra -Wshadow -Wcast-qual -g +endif + +# Build options for the benchmark +# -std=gnu++0x required by Google btree +BENCHCXXFLAGS = -m32 -O3 -march=nehalem -fpermissive -std=gnu++0x -Wall -g + +# Programs +CC ?= gcc +CXX ?= g++ +OBJDUMP ?= objdump +UNAME = $(shell uname) # Linux ifeq ($(UNAME),Linux) -LIB=-lrt benchmark/lib/judy/libJudyL.a benchmark/lib/judy/libJudyMalloc.a +LIB=-lrt +BENCHLIB=benchmark/lib/judy/libJudyL.a benchmark/lib/judy/libJudyMalloc.a EXE= O=.o endif @@ -25,13 +38,13 @@ endif # Windows ifeq ($(UNAME),) -LIB=benchmark/lib/judy/src/judy.lib +BENCHLIB=benchmark/lib/judy/src/judy.lib EXE=.exe O=.obj endif -CHECK = ./tommybench -N 1000000 -d tommy-hashlin -#CHECK = ./tommycheck +#CHECK = ./tommybench -n 1000000 -d tommy-hashlin +CHECK = ./tommycheck DEP = \ tommyds/tommyalloc.c \ @@ -67,24 +80,43 @@ DEPTEST = \ check.c \ benchmark.cc -all: tommycheck$(EXE) tommybench$(EXE) +all: tommycheck$(EXE) + +bench: tommybench$(EXE) tommy$(O): $(DEP) $(CC) $(CFLAGS) -c tommyds/tommy.c -o tommy$(O) - $(CC) $(CFLAGS) -S -fverbose-asm tommyds/tommy.c -o tommy.s - objdump -S tommy$(O) > tommy.S + $(OBJDUMP) -S tommy$(O) > tommy.s tommycheck$(EXE): check.c tommy$(O) $(CC) $(CFLAGS) check.c tommy$(O) -o tommycheck$(EXE) $(LIB) tommybench$(EXE): benchmark.cc $(DEP) - $(CXX) $(CXXFLAGS) benchmark.cc -o tommybench$(EXE) $(LIB) + $(CXX) $(BENCHCXXFLAGS) benchmark.cc -o tommybench$(EXE) $(LIB) $(BENCHLIB) -check: tommycheck$(EXE) tommybench$(EXE) +check: tommycheck$(EXE) ./tommycheck$(EXE) - ./tommybench$(EXE) -N 100000 echo Check completed with success! +lcov_reset: + lcov -d . -z + rm -f ./lcov.info + +lcov_capture: + lcov -d . --capture -o lcov.info + +lcov_html: + rm -rf ./cov + mkdir cov + genhtml -o ./cov lcov.info + +coverage: + $(MAKE) COVERAGE=1 tommycheck$(EXE) + $(MAKE) lcov_reset + ./tommycheck$(EXE) + $(MAKE) lcov_capture + $(MAKE) lcov_html + valgrind: valgrind \ --tool=memcheck \ @@ -136,8 +168,9 @@ web: phony tommyweb.doxygen tommy.css $(DEP) rm -f web/tab_*.png clean: - rm -f *.log *.s *.S *.lst *.o + rm -f *.log *.s *.lst *.o rm -f *.ncb *.suo *.obj + rm -f *.gcno *.gcda lcov.info rm -rf Debug Release x64 rm -f callgrind.out.* rm -f cachegrind.out.* @@ -176,3 +209,7 @@ dist: zip -r $(DIST).zip $(DIST) rm -r $(DIST) +distcheck: dist + tar zxvf $(DIST).tar.gz + cd $(DIST) && make check + rm -rf $(DIST) diff --git a/README b/README index a64e90c..cdef1a6 100644 --- a/README +++ b/README @@ -1,7 +1,8 @@ TommyDS ======= -TommyDS is a C library of hashtables and tries designed for high performance. +TommyDS is a C library of array, hashtables and tries data structures, +designed for high performance and providing an easy to use interface. It's faster than all the similar libraries like rbtree, judy, goodledensehash, khash, uthash, nedtries and others. @@ -9,12 +10,15 @@ khash, uthash, nedtries and others. The data structures provided are: tommy_list - A double linked list. - tommy_array - A linear array. It doesn't fragment the heap. + tommy_array - A linear array. It doesn't fragment + the heap. + tommy_arrayblk - A blocked linear array. It doesn't fragment + the heap and it minimizes the space occupation. tommy_hashtable - A fixed size chained hashtable. tommy_hashdyn - A dynamic chained hashtable. tommy_hashlin - A linear chained hashtable. It doesn't have the - problem of the delay when resizing and it doesn't fragment - the heap. + problem of the delay when resizing and it doesn't + fragment the heap. tommy_trie - A trie optimized for cache utilization. tommy_trie_inplace - A trie completely inplace. diff --git a/benchmark.cc b/benchmark.cc index 8e39c70..39bb996 100644 --- a/benchmark.cc +++ b/benchmark.cc @@ -186,7 +186,7 @@ typedef size_t ssize_t; /* Concurrency Kit Hash Set */ /* http://concurrencykit.org/ */ /* Note that it has a VERY BAD performance on the "Change" test, */ -/* so we disable it in the general graphs */ +/* so we disable it in the graphs until further investigation */ /* #define USE_CK */ #if defined(USE_CK) && defined(__linux) /* if you enable it, ensure to link also with the -lck option */ @@ -392,9 +392,9 @@ struct nedtrie_t nedtrie; khash_t(word)* khash; #ifdef __cplusplus /* use a specialized hash, otherwise the performance depends on the STL implementation used. */ -class cpp_tommy_inthash_u32 { +class cpp_hash { public: - unsigned operator()(unsigned key) const { return tommy_inthash_u32(key); } + unsigned operator()(unsigned key) const { return hash(key); } }; #endif #ifdef USE_CPPMAP @@ -402,14 +402,14 @@ typedef std::map cppmap_t; cppmap_t* cppmap; #endif #ifdef USE_CPPUNORDEREDMAP -typedef std::unordered_map cppunorderedmap_t; +typedef std::unordered_map cppunorderedmap_t; cppunorderedmap_t* cppunorderedmap; #endif #ifdef USE_GOOGLELIBCHASH struct HashTable* googlelibhash; #endif #ifdef USE_GOOGLEDENSEHASH -typedef google::dense_hash_map googledensehash_t; +typedef google::dense_hash_map googledensehash_t; googledensehash_t* googledensehash; #endif #ifdef USE_GOOGLEBTREE @@ -2525,7 +2525,7 @@ void test(unsigned size, unsigned data, int log, int sparse) printf("%12u", the_max); - printf(" %16s %16s", DATA_NAME[the_data], ORDER_NAME[the_order]); + printf(" %18s %10s", DATA_NAME[the_data], ORDER_NAME[the_order]); /* skip degenerated cases */ if (LAST[the_data][the_order] > TIME_MAX_NS) { @@ -2546,7 +2546,7 @@ void test(unsigned size, unsigned data, int log, int sparse) if (the_log) { for(i=0;i stop - start) - result = stop - start; - } - - free(DATA); - - miss_time = result * delta / size; - - printf("Cache miss %d [ns]\n", (unsigned)miss_time); + printf("Options\n"); + printf("-n NUMBER Run the test for the specified number of objects.\n"); + printf("-m Run the test for the maximum number of objects.\n"); + printf("-d DATA Run the test for the specified data structure.\n"); + printf("-s Use a sparse dataset intead of a compact one.\n"); + printf("-l Logs results into file for graphs creation.\n"); } int main(int argc, char * argv[]) @@ -2636,7 +2616,6 @@ int main(int argc, char * argv[]) int flag_data = DATA_MAX; int flag_size = 0; int flag_log = 0; - int flag_miss = 0; int flag_sparse = 0; nano_init(); @@ -2649,12 +2628,10 @@ int main(int argc, char * argv[]) } else if (strcmp(argv[i], "-s") == 0) { flag_sparse = 1; } else if (strcmp(argv[i], "-m") == 0) { - flag_miss = 1; - } else if (strcmp(argv[i], "-n") == 0) { flag_size = MAX; - } else if (strcmp(argv[i], "-N") == 0) { + } else if (strcmp(argv[i], "-n") == 0) { if (i+1 >= argc) { - printf("Missing data in %s\n", argv[i]); + printf("Missing number of objects in %s\n", argv[i]); exit(EXIT_FAILURE); } flag_size = atoi(argv[i+1]); @@ -2672,21 +2649,22 @@ int main(int argc, char * argv[]) } } if (flag_data == DATA_MAX) { - printf("Unknown data %s\n", argv[i+1]); + printf("Unknown data name '%s'\n", argv[i+1]); + printf("Possible values are:\n"); + for(j=0;jApplication MultiByte true - v100 + v110 Application diff --git a/benchmark/gr_all.sh b/benchmark/gr_all.sh index d58f55b..eb62729 100644 --- a/benchmark/gr_all.sh +++ b/benchmark/gr_all.sh @@ -6,7 +6,8 @@ export GNUPLOT_DEFAULT_GDFONT=arial gnuplot gr_def_random_hit.gnu gnuplot gr_def_random_change.gnu gnuplot gr_other_judy_problem.gnu -gnuplot gr_other_slow_problem.gnu +gnuplot gr_other_googlelibchash_problem.gnu +gnuplot gr_other_ck_problem.gnu DIR=data/core_i5_650_3G2_linux gnuplot $DIR/gr_def.gnu gr_forward_insert.gnu diff --git a/benchmark/gr_common.gnu b/benchmark/gr_common.gnu index e6c4759..9a46d83 100644 --- a/benchmark/gr_common.gnu +++ b/benchmark/gr_common.gnu @@ -8,7 +8,7 @@ set style data linespoints set datafile missing "0" set xlabel "Number of elements in logarithmic scale" set ylabel "Time for element in nanosecond in logarithmic scale\nLower is better" -set xrange [1000:100000000] +set xrange [1000:10000000] set yrange [6:1000] set logscale y set logscale x diff --git a/benchmark/gr_def_random_change.gnu b/benchmark/gr_def_random_change.gnu index 0827cdc..e204c26 100644 --- a/benchmark/gr_def_random_change.gnu +++ b/benchmark/gr_def_random_change.gnu @@ -1,7 +1,7 @@ load "gr_common.gnu" tdir = "def/" -tsub = "\nCore i5 650 3.20 GHz, 4 MB L3 cache, 2400 Uncore Speed\nLinux, gcc 4.7.1, 32 bit" +tsub = "\nCore i5 650 3.20 GHz, 4 MB L3 cache, 2400 Uncore Speed\nLinux, gcc 4.9.2, 32 bit" set output bdir.tdir."img_random_change".bext set title "Random Change (Remove + Insert)".tsub diff --git a/benchmark/gr_def_random_hit.gnu b/benchmark/gr_def_random_hit.gnu index 236b341..f6ea0f0 100644 --- a/benchmark/gr_def_random_hit.gnu +++ b/benchmark/gr_def_random_hit.gnu @@ -1,7 +1,7 @@ load "gr_common.gnu" tdir = "def/" -tsub = "\nCore i5 650 3.20 GHz, 4 MB L3 cache, 2400 Uncore Speed\nLinux, gcc 4.7.1, 32 bit" +tsub = "\nCore i5 650 3.20 GHz, 4 MB L3 cache, 2400 Uncore Speed\nLinux, gcc 4.9.2, 32 bit" set output bdir.tdir."img_random_hit".bext set title "Random Hit".tsub diff --git a/benchmark/gr_other_slow_problem.gnu b/benchmark/gr_other_ck_problem.gnu similarity index 78% rename from benchmark/gr_other_slow_problem.gnu rename to benchmark/gr_other_ck_problem.gnu index d6a7226..4612a3e 100644 --- a/benchmark/gr_other_slow_problem.gnu +++ b/benchmark/gr_other_ck_problem.gnu @@ -5,9 +5,9 @@ set yrange [10:10000] tdir = "other/" tsub = "\nCore i5 650 3.20 GHz, 4 MB L3 cache\nLinux, gcc 4.7.1, 32 bit" -set output bdir.tdir."slow_problem".bext +set output bdir.tdir."ck_problem".bext set title "Random Change (Remove + Insert)".tsub -data = bdir.tdir.'slow_problem.lst' +data = bdir.tdir.'ck_problem.lst' plot data using 1:2 title columnheader(2), \ for [i=3:20] '' using 1:i title columnheader(i) ls i-1 diff --git a/benchmark/gr_other_googlelibchash_problem.gnu b/benchmark/gr_other_googlelibchash_problem.gnu new file mode 100644 index 0000000..11616c7 --- /dev/null +++ b/benchmark/gr_other_googlelibchash_problem.gnu @@ -0,0 +1,14 @@ +load "gr_common.gnu" + +set yrange [10:10000] + +tdir = "other/" +tsub = "\nCore i5 650 3.20 GHz, 4 MB L3 cache\nLinux, gcc 4.7.1, 32 bit" + +set output bdir.tdir."googlelibchash_problem".bext +set title "Random Change (Remove + Insert)".tsub +data = bdir.tdir.'googlelibchash_problem.lst' + +plot data using 1:2 title columnheader(2), \ + for [i=3:20] '' using 1:i title columnheader(i) ls i-1 + diff --git a/check.c b/check.c index 1892676..0e0f7c7 100644 --- a/check.c +++ b/check.c @@ -53,13 +53,13 @@ #include "tommyds/tommy.h" -#define MAX 1000000 +#define TOMMY_SIZE 1000000 #define PAYLOAD 16 /**< Size of payload data for objects */ struct object { - tommy_node node; int value; + tommy_node node; char payload[PAYLOAD]; }; @@ -101,7 +101,19 @@ int compare_vector(const void* void_a, const void* void_b) struct object_hash { int value; tommy_node node; - tommy_node hashnode; + char payload[PAYLOAD]; +}; + +struct object_trie { + int value; + tommy_trie_node node; + char payload[PAYLOAD]; +}; + +struct object_trie_inplace { + int value; + tommy_trie_inplace_node node; + char payload[PAYLOAD]; }; /******************************************************************************/ @@ -207,23 +219,48 @@ unsigned rnd(unsigned max) } /******************************************************************************/ -/* test */ +/* helper */ + +unsigned isqrt(unsigned n) +{ + unsigned root, remain, place; + + root = 0; + + remain = n; -const char* the_str; -tommy_uint64_t the_start; + place = 0x40000000; + + while (place > remain) + place /= 4; + + while (place) { + if (remain >= root + place) { + remain -= root + place; + root += 2 * place; + } + + root /= 2; + place /= 4; + } + + return root; +} /** * Cache clearing buffer. */ -unsigned char CACHE[8*1024*1024]; +static unsigned char the_cache[16*1024*1024]; +static const char* the_str; +static tommy_uint64_t the_start; void cache_clear(void) { unsigned i; /* read & write */ - for(i=0;idata; - p = p->next; - tommy_hashtable_remove_existing(&hashtable, &obj->hashnode); - } + if (tommy_hashtable_count(&hashtable) != n) + abort(); + + the_count = 0; + tommy_hashtable_foreach(&hashtable, count_callback); + if (the_count != n) + abort(); + + /* remove in backward order */ + for(i=0;idata; - tommy_list_remove_existing(&list, p); - tommy_hashtable_remove_existing(&hashtable, &obj->hashnode); + tommy_hashtable_remove_existing(&hashtable, &HASH[j].node); } - /* remove remaining */ - p = tommy_list_head(&list); - while (p) { - struct object_hash* obj = p->data; - p = p->next; - tommy_hashtable_remove_existing(&hashtable, &obj->hashnode); - } + for(;jdata; - p = p->next; - tommy_hashdyn_remove_existing(&hashdyn, &obj->hashnode); - } + if (tommy_hashdyn_count(&hashdyn) != n) + abort(); + + the_count = 0; + tommy_hashdyn_foreach(&hashdyn, count_callback); + if (the_count != n) + abort(); + + /* remove in backward order */ + for(i=0;idata; - tommy_list_remove_existing(&list, p); - tommy_hashdyn_remove_existing(&hashdyn, &obj->hashnode); + tommy_hashdyn_remove_existing(&hashdyn, &HASH[j].node); } - /* remove remaining */ - p = tommy_list_head(&list); - while (p) { - struct object_hash* obj = p->data; - p = p->next; - tommy_hashdyn_remove_existing(&hashdyn, &obj->hashnode); - } + for(;jdata; + if (obj->value == module - 1) + break; + bucket = bucket->next; } + if (bucket == 0) + abort(); + + /* deinitialize without removing elements */ + tommy_hashlin_done(&hashlin); START("hashlin stack"); - limit = 10 * sqrt(MAX); - for(n=0;ndata; - p = p->next; - tommy_hashlin_remove_existing(&hashlin, &obj->hashnode); - } + if (tommy_hashlin_count(&hashlin) != n) + abort(); + + the_count = 0; + tommy_hashlin_foreach(&hashlin, count_callback); + if (the_count != n) + abort(); + + /* remove in backward order */ + for(i=0;idata; - tommy_list_remove_existing(&list, p); - tommy_hashlin_remove_existing(&hashlin, &obj->hashnode); + tommy_hashlin_remove_existing(&hashlin, &HASH[j].node); } - /* remove remaining */ - p = tommy_list_head(&list); - while (p) { - struct object_hash* obj = p->data; - p = p->next; - tommy_hashlin_remove_existing(&hashlin, &obj->hashnode); - } + for(;jfaster than all the similar libraries like * rbtree, * judy, - * googlebtree + * googlebtree, * stxbtree, * khash, * uthash, @@ -59,8 +59,6 @@ * * The most interesting are ::tommy_array, ::tommy_hashdyn, ::tommy_hashlin, ::tommy_trie and ::tommy_trie_inplace. * - * Tommy is released with a \ref license "2-clause BSD license". - * * The official site of TommyDS is http://tommyds.sourceforge.net/, * * \section Use @@ -132,46 +130,35 @@ * To compute the hash value, you can use the generic tommy_hash_u32() function, or the * specialized integer hash function tommy_inthash_u32(). * - * \section Performance - * Here you can see some timings comparing with other natable implementations in the Hit - * and Change graphs. Hit means searching an object with a key with success, - * and Change means searching, removing and reinsert it with a different key value. - * - * Times are expressed in nanoseconds for element, and lower is better. - * - * To have some reference numbers, you can check Latency numbers every programmer should know. - * - * A complete analysis is available in the \ref benchmark page. - * - * - * - * - * * \section Features * * Tommy is fast and easy to use. * - * Tommy is 100% portable in all the platforms and operating systems. + * Tommy is portable to all platforms and operating systems. * * Tommy containers support multiple elements with the same key. * - * See the \ref design page for more details. + * Tommy containers keep the original insertion order of elements with equal keys. * - * \section Limitations + * Tommy is released with the \ref license "2-clause BSD license". * - * Tommy is not thread safe. You have always to provide thread safety using - * locks before calling any Tommy functions. + * See the \ref design page for more details and limitations. * - * Tommy doesn't provide iterators over the implicit order defined by the data - * structures. To iterate on elements you must insert them also into a ::tommy_list, - * and use the list as iterator. See the \ref multiindex example for more details. - * Note that this is a real limitation only for ::tommy_trie, as it's the only - * data structure defining an useable order. + * \section Performance + * Here you can see some timings comparing with other notable implementations. + * The Hit graph shows the time required for searching random objects with a key. + * The Change graph shows the time required for searching, removing and reinsert random objects + * with a different key value. * - * Tommy doesn't provide an error reporting mechanism for a malloc() failure. - * You have to provide it redefining malloc() if you expect it to fail. + * Times are expressed in nanoseconds for each element, and lower is better. * - * Tommy assumes to never have more than 2^32-1 elements in a container. + * To have some reference numbers, you can check Latency numbers every programmer should know. + * + * A complete analysis is available in the \ref benchmark page. + * + * + * + * * * \page benchmark Tommy Benchmarks * @@ -250,6 +237,11 @@ * starting at 0. Using a 0 base would have given an unfair advantage to some * implementation handling it as a special case. * + * For all the hashtables the keys are hashed using the tommy_inthash_u32() function + * that ensures an uniform distribution. This hash function is also reversible, + * meaning that no collision is going to be caused by hashing the keys. + * For tries and trees the keys are not hashed, and used directly. + * * The tests are repeated using keys in Random mode and in Forward mode. * In the forward mode the key values are used in order from the lowest to the highest. * In the random mode the key values are used in a completely random order. @@ -336,7 +328,7 @@ * This is possible if you know in advance the distribution of keys. * For example, in the benchmark you could use something like: * \code - * #define hash(v) tommy_inthash32(v & ~0xF) + (v & 0xF) + * #define hash(v) tommy_inthash_u32(v & ~0xF) + (v & 0xF) * \endcode * and make keys that differ only by the lowest bits * to have hashes with the same property, resulting in @@ -379,8 +371,8 @@ * \section code Code * * The compilers used in the benchmark are: - * - gcc 4.7.1 in Linux with options: -O3 -march=pentium4 -mtune=generic - * - Visual C 2012 in Windows with options: /Ox /GL /GS- + * - gcc 4.9.2 in Linux with options: -O3 -march=nehalem + * - Visual C 2012 in Windows with options: /Ox /Oy- /GL /GS- /arch:SSE2 * * The following is pseudo code of the benchmark used. In this case it's written for the C++ unordered_map. * @@ -525,7 +517,7 @@ * \subsection googlelibchash Google C libchash * It's the C implementation located in the experimental/ directory of the googlesparsehash archive. * It has very bad performances in the Change test for some N values. - * See this graph with a lot of spikes. + * See this graph with a lot of spikes. * The C++ version doesn't suffer of this problem. * * \subsection googledensehash Google C++ densehash @@ -551,7 +543,7 @@ * * \subsection ck Concurrency Kit * It has very bad performances in the Change test for some N values. - * See this graph with a lot of spikes. + * See this graph with a lot of spikes. * * \page multiindex Tommy Multi Indexing * @@ -560,7 +552,7 @@ * and use the list as iterator. * * This technique allows to keep track of the insertion order with the list, - * and provide more search possibilities using different data structures for + * and provides more search possibilities using different data structures for * different search keys. * * See the next example, for a objects inserted in a ::tommy_list, and in @@ -646,50 +638,78 @@ * * \page design Tommy Design * - * Tommy is mainly designed to provide high performance, but much care was - * also given in the definition of an useable API. In case, even making some - * compromise with efficency. + * Tommy is designed to fulfill the need of generic data structures for the + * C language, providing at the same time high performance and a clean + * and easy to use interface. * - * \section multi Multi key - * All the Tommy containers support the insertion of multiple elements with - * the same key. + * \section testing Testing + * + * Extensive and automated tests with the runtime checker valgrind + * and the static analyzer clang + * are done to ensure the correctness of the library. + * + * The test has a code coverage of 100%, + * measured with lcov. + * + * \section Limitations + * + * Tommy is not thread safe. You have always to provide thread safety using + * locks before calling any Tommy functions. * - * This allow the maximum flexibility, but in some cases it requires some - * more space to keep a list of equal elements. + * Tommy doesn't provide iterators over the implicit order defined by the data + * structures. To iterate on elements you must insert them also into a ::tommy_list, + * and use the list as iterator. See the \ref multiindex example for more details. + * Note that this is a real limitation only for ::tommy_trie, as it's the only + * data structure defining an useable order. * - * \section datapointer Data pointer - * The tommy_node::data field is present to provide a simpler API. + * Tommy doesn't provide an error reporting mechanism for a malloc() failure. + * You have to provide it redefining malloc() if you expect it to fail. * - * A more memory conservative approach is to do not store this pointer, and - * computing it from the embedded node pointer every time. + * Tommy assumes to never have more than 2^32-1 elements in a container. + * + * \section compromise Compromises + * + * Finding the right balance between efficency and easy to use, required some + * comprimises, mostly on memory efficency, to avoid to cripple the interface. + * The following is a list of such decisions. * - * See for example the Linux Kernel declaration of container_of() at - * http://lxr.free-electrons.com/ident?i=container_of + * \subsection multi_key Multi key + * All the Tommy containers support the insertion of multiple elements with + * the same key, adding in each node a list of equal elements. * - * Although, it would have required more complexity for the user to require - * a manual conversion from a node to the object containing the node. + * They are the equivalent at the C++ associative containers multimap\ + * and unordered_multimap\ + * that allow duplicates of the same key. * - * \section zero_list Zero terminated next list - * The half 0 terminated format of tommy_node::next is present to provide - * a forward iterator terminating in 0. + * A more memory conservative approach is to not allow duplicated elements, + * removing the need of this list. * - * A more efficient approach is to use a double circular list, as operating on - * nodes in a circular list doesn't requires to manage the special terminating - * case. + * \subsection data_pointer Data pointer + * The tommy_node::data field is present to allow search and remove functions to return + * directly a pointer at the element stored in the container. * - * Although, it would have required more complexity at the user for a simple - * iteration. + * A more memory conservative approach is to require the user to compute + * the element pointer from the embedded node with a fixed displacement. + * For an example, see the Linux Kernel declaration of + * container_of(). * - * \section double_linked Double linked list for collisions - * The linked list used for collision is a double linked list to allow - * insertion of elements at the end of the list to keep the insertion order - * of equal elements. + * \subsection insertion_order Insertion order + * The list used for collisions is double linked to allow + * insertion of elements at the end of the list to keep the + * insertion order of equal elements. * * A more memory conservative approach is to use a single linked list, - * inserting elements only at the start of the list. - * On the other hand, with with a double linked list we can concatenate - * two lists in constant time, as using the previous circular element we - * can get a tail pointer. + * inserting elements only at the start of the list, losing the + * original insertion order. + * + * \subsection zero_list Zero terminated list + * The 0 terminated format of tommy_node::next is present to provide + * a forward iterator terminating in 0. This allows the user to write a simple + * iteration loop over the list of elements in the same bucket. + * + * A more efficient approach is to use a circular list, as operating on + * nodes in a circular list doesn't requires to manage the special + * terminating case when adding or removing elements. * * \page license Tommy License * Tommy is released with a 2-clause BSD license. diff --git a/tommyds/tommyalloc.c b/tommyds/tommyalloc.c index 8a0fa6b..2c51365 100644 --- a/tommyds/tommyalloc.c +++ b/tommyds/tommyalloc.c @@ -44,9 +44,8 @@ void tommy_allocator_init(tommy_allocator* alloc, tommy_size_t block_size, tommy align_size = sizeof(void*); /* ensure that the block_size keeps the alignment */ - if (block_size % align_size != 0) { + if (block_size % align_size != 0) block_size += align_size - block_size % align_size; - } alloc->block_size = block_size; alloc->align_size = align_size; diff --git a/tommyds/tommychain.h b/tommyds/tommychain.h index a9c05f8..71d3ccc 100644 --- a/tommyds/tommychain.h +++ b/tommyds/tommychain.h @@ -210,11 +210,10 @@ tommy_inline void tommy_chain_mergesort(tommy_chain* chain, tommy_compare_func* mask = counter >> i; while (mask != 1) { mask >>= 1; - if (mask & 1) { + if (mask & 1) tommy_chain_merge_degenerated(&bit[i + 1], &bit[i], cmp); - } else { + else bit[i + 1] = bit[i]; - } ++i; } diff --git a/tommyds/tommyhash.h b/tommyds/tommyhash.h index 329ef9d..33317b4 100644 --- a/tommyds/tommyhash.h +++ b/tommyds/tommyhash.h @@ -48,7 +48,7 @@ typedef tommy_key_t tommy_hash_t; * from http://www.burtleburtle.net/bob/hash/doobs.html, function hashlittle(). * \param init_val Initialization value. * Using a different initialization value, you can generate a completely different set of hash values. - * Use 0 if not relevalt. + * Use 0 if not relevant. * \param void_key Pointer at the data to hash. * \param key_len Size of the data to hash. * \note @@ -63,7 +63,7 @@ tommy_uint32_t tommy_hash_u32(tommy_uint32_t init_val, const void* void_key, tom * from http://www.burtleburtle.net/bob/hash/doobs.html, function hashlittle2(). * \param init_val Initialization value. * Using a different initialization value, you can generate a completely different set of hash values. - * Use 0 if not relevalt. + * Use 0 if not relevant. * \param void_key Pointer at the data to hash. * \param key_len Size of the data to hash. * \note @@ -73,7 +73,7 @@ tommy_uint32_t tommy_hash_u32(tommy_uint32_t init_val, const void* void_key, tom tommy_uint64_t tommy_hash_u64(tommy_uint64_t init_val, const void* void_key, tommy_size_t key_len); /** - * Integer hash of 32 bits. + * Integer reversible hash function for 32 bits. * Implementation of the Robert Jenkins "4-byte Integer Hashing", * from http://burtleburtle.net/bob/hash/integer.html */ @@ -91,9 +91,9 @@ tommy_inline tommy_uint32_t tommy_inthash_u32(tommy_uint32_t key) } /** - * Integer hash of 64 bits. + * Integer reversible hash function for 64 bits. * Implementation of the Thomas Wang "Integer Hash Function", - * from http://www.cris.com/~Ttwang/tech/inthash.htm + * from http://web.archive.org/web/20071223173210/http://www.concentric.net/~Ttwang/tech/inthash.htm */ tommy_inline tommy_uint64_t tommy_inthash_u64(tommy_uint64_t key) { diff --git a/tommyds/tommyhashdyn.c b/tommyds/tommyhashdyn.c index 4da3fe6..02ef404 100644 --- a/tommyds/tommyhashdyn.c +++ b/tommyds/tommyhashdyn.c @@ -84,11 +84,11 @@ static void tommy_hashdyn_resize(tommy_hashdyn* hashdyn, tommy_count_t new_bucke j = hashdyn->bucket[i]; while (j) { tommy_hashdyn_node* j_next = j->next; - tommy_count_t index = j->key & new_bucket_mask; - if (new_bucket[index]) - tommy_list_insert_tail_not_empty(new_bucket[index], j); + tommy_count_t pos = j->key & new_bucket_mask; + if (new_bucket[pos]) + tommy_list_insert_tail_not_empty(new_bucket[pos], j); else - tommy_list_insert_first(&new_bucket[index], j); + tommy_list_insert_first(&new_bucket[pos], j); j = j_next; } } @@ -120,9 +120,8 @@ static void tommy_hashdyn_resize(tommy_hashdyn* hashdyn, tommy_count_t new_bucke tommy_inline void hashdyn_grow_step(tommy_hashdyn* hashdyn) { /* grow if more than 50% full */ - if (hashdyn->count >= hashdyn->bucket_max / 2) { + if (hashdyn->count >= hashdyn->bucket_max / 2) tommy_hashdyn_resize(hashdyn, hashdyn->bucket_bit + 1); - } } /** @@ -131,9 +130,8 @@ tommy_inline void hashdyn_grow_step(tommy_hashdyn* hashdyn) tommy_inline void hashdyn_shrink_step(tommy_hashdyn* hashdyn) { /* shrink if less than 12.5% full */ - if (hashdyn->count <= hashdyn->bucket_max / 8 && hashdyn->bucket_bit > TOMMY_HASHDYN_BIT) { + if (hashdyn->count <= hashdyn->bucket_max / 8 && hashdyn->bucket_bit > TOMMY_HASHDYN_BIT) tommy_hashdyn_resize(hashdyn, hashdyn->bucket_bit - 1); - } } void tommy_hashdyn_insert(tommy_hashdyn* hashdyn, tommy_hashdyn_node* node, void* data, tommy_hash_t hash) @@ -164,7 +162,7 @@ void* tommy_hashdyn_remove_existing(tommy_hashdyn* hashdyn, tommy_hashdyn_node* void* tommy_hashdyn_remove(tommy_hashdyn* hashdyn, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash) { - tommy_count_t pos = hash % hashdyn->bucket_max; + tommy_count_t pos = hash & hashdyn->bucket_mask; tommy_hashdyn_node* node = hashdyn->bucket[pos]; while (node) { diff --git a/tommyds/tommyhashdyn.h b/tommyds/tommyhashdyn.h index 11f2447..afbdfab 100644 --- a/tommyds/tommyhashdyn.h +++ b/tommyds/tommyhashdyn.h @@ -189,7 +189,6 @@ void tommy_hashdyn_insert(tommy_hashdyn* hashdyn, tommy_hashdyn_node* node, void * You have to provide a compare function and the hash of the element you want to remove. * If the element is not found, 0 is returned. * If more equal elements are present, the first one is removed. - * This operation is faster than calling tommy_hashdyn_bucket() and tommy_hashdyn_remove_existing() separately. * \param cmp Compare function called with cmp_arg as first argument and with the element to compare as a second one. * The function should return 0 for equal elements, anything other for different elements. * \param cmp_arg Compare argument passed as first argument of the compare function. @@ -274,7 +273,7 @@ void* tommy_hashdyn_remove_existing(tommy_hashdyn* hashdyn, tommy_hashdyn_node* void tommy_hashdyn_foreach(tommy_hashdyn* hashdyn, tommy_foreach_func* func); /** - * Calls the specified function with argument for each element in the hashtable. + * Calls the specified function with an argument for each element in the hashtable. */ void tommy_hashdyn_foreach_arg(tommy_hashdyn* hashdyn, tommy_foreach_arg_func* func, void* arg); diff --git a/tommyds/tommyhashlin.c b/tommyds/tommyhashlin.c index bea9403..bcf7d98 100644 --- a/tommyds/tommyhashlin.c +++ b/tommyds/tommyhashlin.c @@ -40,6 +40,20 @@ #define TOMMY_HASHLIN_STATE_GROW 1 #define TOMMY_HASHLIN_STATE_SHRINK 2 +/** + * Set the hashtable in stable state. + */ +tommy_inline void tommy_hashlin_stable(tommy_hashlin* hashlin) +{ + hashlin->state = TOMMY_HASHLIN_STATE_STABLE; + + /* setup low_mask/max/split to allow tommy_hashlin_bucket_ref() */ + /* and tommy_hashlin_foreach() to work regardless we are in stable state */ + hashlin->low_max = hashlin->bucket_max; + hashlin->low_mask = hashlin->bucket_mask; + hashlin->split = 0; +} + void tommy_hashlin_init(tommy_hashlin* hashlin) { tommy_uint_t i; @@ -53,7 +67,7 @@ void tommy_hashlin_init(tommy_hashlin* hashlin) hashlin->bucket[i] = hashlin->bucket[0]; /* stable state */ - hashlin->state = TOMMY_HASHLIN_STATE_STABLE; + tommy_hashlin_stable(hashlin); hashlin->count = 0; } @@ -69,45 +83,6 @@ void tommy_hashlin_done(tommy_hashlin* hashlin) } } -/** - * Return the bucket at the specified pos. - */ -tommy_inline tommy_hashlin_node** tommy_hashlin_pos(tommy_hashlin* hashlin, tommy_hash_t pos) -{ - tommy_uint_t bsr; - - /* get the highest bit set, in case of all 0, return 0 */ - bsr = tommy_ilog2_u32(pos | 1); - - return &hashlin->bucket[bsr][pos]; -} - -/** - * Return the bucket to use. - */ -tommy_inline tommy_hashlin_node** tommy_hashlin_bucket_ptr(tommy_hashlin* hashlin, tommy_hash_t hash) -{ - tommy_count_t pos; - - /* if we are reallocating */ - if (hashlin->state != TOMMY_HASHLIN_STATE_STABLE) { - /* compute the old position */ - pos = hash & hashlin->low_mask; - - /* if we have not reallocated this position yet */ - if (pos >= hashlin->split) { - - /* use it as it was before */ - return tommy_hashlin_pos(hashlin, pos); - } - } - - /* otherwise operates normally */ - pos = hash & hashlin->bucket_mask; - - return tommy_hashlin_pos(hashlin, pos); -} - /** * Grow one step. */ @@ -135,7 +110,7 @@ tommy_inline void hashlin_grow_step(tommy_hashlin* hashlin) /* cast to ptrdiff_t to ensure to get a negative value */ hashlin->bucket[hashlin->bucket_bit] = &segment[-(tommy_ptrdiff_t)hashlin->low_max]; - /* grow the hash size and allocate */ + /* grow the hash size */ ++hashlin->bucket_bit; hashlin->bucket_max = 1 << hashlin->bucket_bit; hashlin->bucket_mask = hashlin->bucket_max - 1; @@ -172,17 +147,17 @@ tommy_inline void hashlin_grow_step(tommy_hashlin* hashlin) *split[0] = 0; *split[1] = 0; - /* compute the bit to identify the bucket */ - mask = hashlin->bucket_mask & ~hashlin->low_mask; + /* the bit used to identify the bucket */ + mask = hashlin->low_max; /* flush the bucket */ while (j) { tommy_hashlin_node* j_next = j->next; - tommy_count_t index = (j->key & mask) != 0; - if (*split[index]) - tommy_list_insert_tail_not_empty(*split[index], j); + tommy_count_t pos = (j->key & mask) != 0; + if (*split[pos]) + tommy_list_insert_tail_not_empty(*split[pos], j); else - tommy_list_insert_first(split[index], j); + tommy_list_insert_first(split[pos], j); j = j_next; } @@ -191,7 +166,8 @@ tommy_inline void hashlin_grow_step(tommy_hashlin* hashlin) /* if we have finished, change the state */ if (hashlin->split == hashlin->low_max) { - hashlin->state = TOMMY_HASHLIN_STATE_STABLE; + /* go in stable mode */ + tommy_hashlin_stable(hashlin); break; } } @@ -251,8 +227,6 @@ tommy_inline void hashlin_shrink_step(tommy_hashlin* hashlin) if (hashlin->split == 0) { tommy_hashlin_node** segment; - hashlin->state = TOMMY_HASHLIN_STATE_STABLE; - /* shrink the hash size */ --hashlin->bucket_bit; hashlin->bucket_max = 1 << hashlin->bucket_bit; @@ -261,6 +235,9 @@ tommy_inline void hashlin_shrink_step(tommy_hashlin* hashlin) /* free the last segment */ segment = hashlin->bucket[hashlin->bucket_bit]; tommy_free(&segment[((tommy_ptrdiff_t)1) << hashlin->bucket_bit]); + + /* go in stable mode */ + tommy_hashlin_stable(hashlin); break; } } @@ -269,7 +246,7 @@ tommy_inline void hashlin_shrink_step(tommy_hashlin* hashlin) void tommy_hashlin_insert(tommy_hashlin* hashlin, tommy_hashlin_node* node, void* data, tommy_hash_t hash) { - tommy_list_insert_tail(tommy_hashlin_bucket_ptr(hashlin, hash), node, data); + tommy_list_insert_tail(tommy_hashlin_bucket_ref(hashlin, hash), node, data); node->key = hash; @@ -280,7 +257,7 @@ void tommy_hashlin_insert(tommy_hashlin* hashlin, tommy_hashlin_node* node, void void* tommy_hashlin_remove_existing(tommy_hashlin* hashlin, tommy_hashlin_node* node) { - tommy_list_remove_existing(tommy_hashlin_bucket_ptr(hashlin, node->key), node); + tommy_list_remove_existing(tommy_hashlin_bucket_ref(hashlin, node->key), node); --hashlin->count; @@ -289,14 +266,9 @@ void* tommy_hashlin_remove_existing(tommy_hashlin* hashlin, tommy_hashlin_node* return node->data; } -tommy_hashlin_node* tommy_hashlin_bucket(tommy_hashlin* hashlin, tommy_hash_t hash) -{ - return *tommy_hashlin_bucket_ptr(hashlin, hash); -} - void* tommy_hashlin_remove(tommy_hashlin* hashlin, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash) { - tommy_hashlin_node** let_ptr = tommy_hashlin_bucket_ptr(hashlin, hash); + tommy_hashlin_node** let_ptr = tommy_hashlin_bucket_ref(hashlin, hash); tommy_hashlin_node* node = *let_ptr; while (node) { @@ -321,12 +293,8 @@ void tommy_hashlin_foreach(tommy_hashlin* hashlin, tommy_foreach_func* func) tommy_count_t bucket_max; tommy_count_t pos; - /* if we are reallocating */ - if (hashlin->state != TOMMY_HASHLIN_STATE_STABLE) { - bucket_max = hashlin->low_max + hashlin->split; - } else { - bucket_max = hashlin->bucket_max; - } + /* number of valid buckets */ + bucket_max = hashlin->low_max + hashlin->split; for (pos = 0; pos < bucket_max; ++pos) { tommy_hashlin_node* node = *tommy_hashlin_pos(hashlin, pos); @@ -344,12 +312,8 @@ void tommy_hashlin_foreach_arg(tommy_hashlin* hashlin, tommy_foreach_arg_func* f tommy_count_t bucket_max; tommy_count_t pos; - /* if we are reallocating */ - if (hashlin->state != TOMMY_HASHLIN_STATE_STABLE) { - bucket_max = hashlin->low_max + hashlin->split; - } else { - bucket_max = hashlin->bucket_max; - } + /* number of valid buckets */ + bucket_max = hashlin->low_max + hashlin->split; for (pos = 0; pos < bucket_max; ++pos) { tommy_hashlin_node* node = *tommy_hashlin_pos(hashlin, pos); diff --git a/tommyds/tommyhashlin.h b/tommyds/tommyhashlin.h index 0ba3a21..477b1e7 100644 --- a/tommyds/tommyhashlin.h +++ b/tommyds/tommyhashlin.h @@ -201,7 +201,6 @@ void tommy_hashlin_insert(tommy_hashlin* hashlin, tommy_hashlin_node* node, void * You have to provide a compare function and the hash of the element you want to remove. * If the element is not found, 0 is returned. * If more equal elements are present, the first one is removed. - * This operation is faster than calling tommy_hashlin_bucket() and tommy_hashlin_remove_existing() separately. * \param cmp Compare function called with cmp_arg as first argument and with the element to compare as a second one. * The function should return 0 for equal elements, anything other for different elements. * \param cmp_arg Compare argument passed as first argument of the compare function. @@ -210,6 +209,48 @@ void tommy_hashlin_insert(tommy_hashlin* hashlin, tommy_hashlin_node* node, void */ void* tommy_hashlin_remove(tommy_hashlin* hashlin, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash); +/** \internal + * Returns the bucket at the specified position. + */ +tommy_inline tommy_hashlin_node** tommy_hashlin_pos(tommy_hashlin* hashlin, tommy_hash_t pos) +{ + tommy_uint_t bsr; + + /* get the highest bit set, in case of all 0, return 0 */ + bsr = tommy_ilog2_u32(pos | 1); + + return &hashlin->bucket[bsr][pos]; +} + +/** \internal + * Returns a pointer to the bucket of the specified hash. + */ +tommy_inline tommy_hashlin_node** tommy_hashlin_bucket_ref(tommy_hashlin* hashlin, tommy_hash_t hash) +{ + tommy_count_t pos; + tommy_count_t high_pos; + + pos = hash & hashlin->low_mask; + high_pos = hash & hashlin->bucket_mask; + + /* if this position is already allocated in the high half */ + if (pos < hashlin->split) { + /* The following assigment is expected to be implemented */ + /* with a conditional move instruction */ + /* that results in a little better and constant performance */ + /* regardless of the split position. */ + /* This affects mostly the worst case, when the split value */ + /* is near at its half, resulting in a totally unpredictable */ + /* condition by the CPU. */ + /* In such case the use of the conditional move is generally faster. */ + + /* use also the high bit */ + pos = high_pos; + } + + return tommy_hashlin_pos(hashlin, pos); +} + /** * Gets the bucket of the specified hash. * The bucket is guaranteed to contain ALL the elements with the specified hash, @@ -218,7 +259,10 @@ void* tommy_hashlin_remove(tommy_hashlin* hashlin, tommy_search_func* cmp, const * \param hash Hash of the element to find. * \return The head of the bucket, or 0 if empty. */ -tommy_hashlin_node* tommy_hashlin_bucket(tommy_hashlin* hashlin, tommy_hash_t hash); +tommy_inline tommy_hashlin_node* tommy_hashlin_bucket(tommy_hashlin* hashlin, tommy_hash_t hash) +{ + return *tommy_hashlin_bucket_ref(hashlin, hash); +} /** * Searches an element in the hashtable. @@ -283,7 +327,7 @@ void* tommy_hashlin_remove_existing(tommy_hashlin* hashlin, tommy_hashlin_node* void tommy_hashlin_foreach(tommy_hashlin* hashlin, tommy_foreach_func* func); /** - * Calls the specified function with argument for each element in the hashtable. + * Calls the specified function with an argument for each element in the hashtable. */ void tommy_hashlin_foreach_arg(tommy_hashlin* hashlin, tommy_foreach_arg_func* func, void* arg); diff --git a/tommyds/tommyhashtbl.c b/tommyds/tommyhashtbl.c index f8b8fd5..007b8ec 100644 --- a/tommyds/tommyhashtbl.c +++ b/tommyds/tommyhashtbl.c @@ -35,11 +35,10 @@ void tommy_hashtable_init(tommy_hashtable* hashtable, tommy_count_t bucket_max) { - if (bucket_max < 16) { + if (bucket_max < 16) bucket_max = 16; - } else { + else bucket_max = tommy_roundup_pow2_u32(bucket_max); - } hashtable->bucket_max = bucket_max; hashtable->bucket_mask = hashtable->bucket_max - 1; diff --git a/tommyds/tommyhashtbl.h b/tommyds/tommyhashtbl.h index 75e6e4a..76e8062 100644 --- a/tommyds/tommyhashtbl.h +++ b/tommyds/tommyhashtbl.h @@ -173,7 +173,6 @@ void tommy_hashtable_insert(tommy_hashtable* hashtable, tommy_hashtable_node* no * You have to provide a compare function and the hash of the element you want to remove. * If the element is not found, 0 is returned. * If more equal elements are present, the first one is removed. - * This operation is faster than calling tommy_hashtable_bucket() and tommy_hashtable_remove_existing() separately. * \param cmp Compare function called with cmp_arg as first argument and with the element to compare as a second one. * The function should return 0 for equal elements, anything other for different elements. * \param cmp_arg Compare argument passed as first argument of the compare function. @@ -258,7 +257,7 @@ void* tommy_hashtable_remove_existing(tommy_hashtable* hashtable, tommy_hashtabl void tommy_hashtable_foreach(tommy_hashtable* hashtable, tommy_foreach_func* func); /** - * Calls the specified function with argument for each element in the hashtable. + * Calls the specified function with an argument for each element in the hashtable. */ void tommy_hashtable_foreach_arg(tommy_hashtable* hashtable, tommy_foreach_arg_func* func, void* arg); diff --git a/tommyds/tommylist.c b/tommyds/tommylist.c index 891d655..ea85b08 100644 --- a/tommyds/tommylist.c +++ b/tommyds/tommylist.c @@ -34,9 +34,8 @@ void tommy_list_concat(tommy_list* first, tommy_list* second) tommy_node* first_tail; tommy_node* second_head; - if (tommy_list_empty(second)) { + if (tommy_list_empty(second)) return; - } if (tommy_list_empty(first)) { *first = *second; diff --git a/tommyds/tommylist.h b/tommyds/tommylist.h index 98e4303..8004a44 100644 --- a/tommyds/tommylist.h +++ b/tommyds/tommylist.h @@ -28,18 +28,18 @@ /** \file * Double linked list for collisions into hashtables. * - * This list is a double linked list mainly targetted for collisions into an hashtables, - * but useable also as a generic list. + * This list is a double linked list mainly targetted for handling collisions + * into an hashtables, but useable also as a generic list. * * The main feature of this list is to require only one pointer to represent the - * list, compared to a classic implementation requiring an head an a tail pointers. + * list, compared to a classic implementation requiring a head an a tail pointers. * This reduces the memory usage in hashtables. * * Another feature is to support the insertion at the end of the list. This allow to store * collisions in a stable order. Where for stable order we mean that equal elements keep * their insertion order. * - * To initialize the list, you have to call tommy_list_init(), or simply assign + * To initialize the list, you have to call tommy_list_init(), or to simply assign * to it NULL, as an empty list is represented by the NULL value. * * \code @@ -69,7 +69,7 @@ * tommy_list_insert_tail(&list, &obj->node, obj); // inserts the object * \endcode * - * To iterates over all the elements in the list you have to call + * To iterate over all the elements in the list you have to call * tommy_list_head() to get the head of the list and follow the * tommy_node::next pointer until NULL. * @@ -84,18 +84,13 @@ * } * \endcode * - * To destroy the list you have only to remove all the elements, as the list is - * completely inplace and it doesn't allocate memory. + * To destroy the list you have to remove all the elements, + * as the list is completely inplace and it doesn't allocate memory. + * This can be done with the tommy_list_foreach() function. * * \code - * tommy_node* i = tommy_list_head(&list); - * while (i) { - * tommy_node* i_next = i->next; // saves the next element before freeing - * - * free(i->data); // frees the object allocated memory - * - * i = i_next; // goes to the next element - * } + * // deallocates all the objects iterating the list + * tommy_list_foreach(&list, free); * \endcode */ @@ -206,11 +201,10 @@ tommy_inline void tommy_list_insert_head(tommy_list* list, tommy_node* node, voi { tommy_node* head = tommy_list_head(list); - if (head) { + if (head) tommy_list_insert_head_not_empty(list, node); - } else { + else tommy_list_insert_first(list, node); - } node->data = data; } @@ -224,11 +218,10 @@ tommy_inline void tommy_list_insert_tail(tommy_list* list, tommy_node* node, voi { tommy_node* head = tommy_list_head(list); - if (head) { + if (head) tommy_list_insert_tail_not_empty(head, node); - } else { + else tommy_list_insert_first(list, node); - } node->data = data; } @@ -264,18 +257,16 @@ tommy_inline void* tommy_list_remove_existing(tommy_list* list, tommy_node* node tommy_node* head = tommy_list_head(list); /* remove from the "circular" prev list */ - if (node->next) { + if (node->next) node->next->prev = node->prev; - } else { + else head->prev = node->prev; /* the last */ - } /* remove from the "0 terminated" next list */ - if (head == node) { + if (head == node) *list = node->next; /* the new head, in case 0 */ - } else { + else node->prev->next = node->next; - } return node->data; } @@ -294,7 +285,7 @@ void tommy_list_concat(tommy_list* first, tommy_list* second); * It's a stable merge sort with O(N*log(N)) worst complexity. * It's faster on degenerated cases like partially ordered lists. * \param cmp Compare function called with two elements. - * The function should return <0 if the first element is less than the second, == 0 if equal, and > 0 if greather. + * The function should return <0 if the first element is less than the second, ==0 if equal, and >0 if greather. */ void tommy_list_sort(tommy_list* list, tommy_compare_func* cmp); @@ -307,6 +298,23 @@ tommy_inline tommy_bool_t tommy_list_empty(tommy_list* list) return tommy_list_head(list) == 0; } +/** + * Gets the number of elements. + * \note This operation is O(n). + */ +tommy_inline tommy_count_t tommy_list_count(tommy_list* list) +{ + tommy_count_t count = 0; + tommy_node* i = tommy_list_head(list); + + while (i) { + ++count; + i = i->next; + } + + return count; +} + /** * Calls the specified function for each element in the list. * @@ -347,7 +355,7 @@ tommy_inline void tommy_list_foreach(tommy_list* list, tommy_foreach_func* func) } /** - * Calls the specified function with argument for each element in the list. + * Calls the specified function with an argument for each element in the list. */ tommy_inline void tommy_list_foreach_arg(tommy_list* list, tommy_foreach_arg_func* func, void* arg) { diff --git a/tommyds/tommytrie.c b/tommyds/tommytrie.c index cd3ce42..35ac219 100644 --- a/tommyds/tommytrie.c +++ b/tommyds/tommytrie.c @@ -127,9 +127,8 @@ static void trie_bucket_insert(tommy_trie* trie, tommy_uint_t shift, tommy_trie_ *let_ptr = tommy_cast(tommy_trie_node*, trie_set_tree(tree)); /* initialize it */ - for (i = 0; i < TOMMY_TRIE_TREE_MAX; ++i) { + for (i = 0; i < TOMMY_TRIE_TREE_MAX; ++i) tree->map[i] = 0; - } /* get the position of the two elements */ i = (node->key >> shift) & TOMMY_TRIE_TREE_MASK; diff --git a/tommyds/tommytrie.h b/tommyds/tommytrie.h index 86a4f43..6bf42a0 100644 --- a/tommyds/tommytrie.h +++ b/tommyds/tommytrie.h @@ -111,10 +111,9 @@ * \endcode * * To destroy the trie you have to remove all the elements, and deinitialize - * the trie calling tommy_trie_done() and the allocator using tommy_allocator_done(). + * the allocator using tommy_allocator_done(). * * \code - * tommy_trie_done(&trie); * tommy_allocator_done(&alloc); * \endcode * @@ -244,7 +243,7 @@ tommy_inline void* tommy_trie_search(tommy_trie* trie, tommy_key_t key) void* tommy_trie_remove_existing(tommy_trie* trie, tommy_trie_node* node); /** - * Returns the number of elements. + * Gets the number of elements. */ tommy_inline tommy_count_t tommy_trie_count(tommy_trie* trie) { diff --git a/tommyds/tommytrieinp.c b/tommyds/tommytrieinp.c index bfe6089..26ec2c3 100644 --- a/tommyds/tommytrieinp.c +++ b/tommyds/tommytrieinp.c @@ -81,18 +81,16 @@ tommy_inline void tommy_trie_inplace_list_remove(tommy_trie_inplace_node** let_p tommy_trie_inplace_node* head = *let_ptr; /* remove from the "circular" prev list */ - if (node->next) { + if (node->next) node->next->prev = node->prev; - } else { + else head->prev = node->prev; /* the last */ - } /* remove from the "0 terminated" next list */ - if (head == node) { + if (head == node) *let_ptr = node->next; /* the new first */ - } else { + else node->prev->next = node->next; - } } void tommy_trie_inplace_init(tommy_trie_inplace* trie_inplace) @@ -162,20 +160,22 @@ static tommy_trie_inplace_node* trie_inplace_bucket_remove(tommy_uint_t shift, t return 0; /* if the node to remove is not specified */ - if (!remove) { - /* remove the first */ - remove = node; - } + if (!remove) + remove = node; /* remove the first */ tommy_trie_inplace_list_remove(let_ptr, remove); + /* if not change in the node, nothing more to do */ + if (*let_ptr == node) + return remove; + /* if we have a substitute */ if (*let_ptr != 0) { /* copy the child pointers to the new one */ node = *let_ptr; - for (i = 0; i < TOMMY_TRIE_INPLACE_TREE_MAX; ++i) { + for (i = 0; i < TOMMY_TRIE_INPLACE_TREE_MAX; ++i) node->map[i] = remove->map[i]; - } + return remove; } @@ -196,17 +196,15 @@ static tommy_trie_inplace_node* trie_inplace_bucket_remove(tommy_uint_t shift, t } /* if it's itself a leaf */ - if (!leaf_let_ptr) { + if (!leaf_let_ptr) return remove; - } /* remove the leaf */ *leaf_let_ptr = 0; /* copy the child pointers */ - for (i = 0; i < TOMMY_TRIE_INPLACE_TREE_MAX; ++i) { + for (i = 0; i < TOMMY_TRIE_INPLACE_TREE_MAX; ++i) leaf->map[i] = remove->map[i]; - } /* put it in place */ *let_ptr = leaf; diff --git a/tommyds/tommytrieinp.h b/tommyds/tommytrieinp.h index 317a80c..c30f676 100644 --- a/tommyds/tommytrieinp.h +++ b/tommyds/tommytrieinp.h @@ -222,7 +222,7 @@ tommy_inline void* tommy_trie_inplace_search(tommy_trie_inplace* trie_inplace, t void* tommy_trie_inplace_remove_existing(tommy_trie_inplace* trie_inplace, tommy_trie_inplace_node* node); /** - * Returns the number of elements. + * Gets the number of elements. */ tommy_inline tommy_count_t tommy_trie_inplace_count(tommy_trie_inplace* trie_inplace) { diff --git a/tommyds/tommytypes.h b/tommyds/tommytypes.h index 5c81f3c..45d3d1a 100644 --- a/tommyds/tommytypes.h +++ b/tommyds/tommytypes.h @@ -243,7 +243,7 @@ typedef int tommy_compare_func(const void* obj_a, const void* obj_b); * Search function for elements. * \param arg Pointer at the value to search. * \param obj Pointer at the object to compare to. - * \return ==0 if the value matches the element. != 0 if different. + * \return ==0 if the value matches the element. !=0 if different. * * Note that the first argument is a pointer to the value to search and * the second one is a pointer to the object to compare. @@ -310,9 +310,9 @@ typedef void tommy_foreach_arg_func(void* arg, void* obj); * Return the bit index of the most significant 1 bit. * * If no bit is set, the result is undefined. - * To force a return 0 in this case, you can use tommy_ilog2(value | 1). + * To force a return 0 in this case, you can use tommy_ilog2_u32(value | 1). * - * Other interesting ways for bitscan can be found at: + * Other interesting ways for bitscan are at: * * Bit Twiddling Hacks * http://graphics.stanford.edu/~seander/bithacks.html @@ -321,7 +321,7 @@ typedef void tommy_foreach_arg_func(void* arg, void* obj); * http://chessprogramming.wikispaces.com/BitScan * * \param value Value to scan. 0 is not allowed. - * \return The index of the most significan bit set. + * \return The index of the most significant bit set. */ tommy_inline tommy_uint_t tommy_ilog2_u32(tommy_uint32_t value) { @@ -336,7 +336,8 @@ tommy_inline tommy_uint_t tommy_ilog2_u32(tommy_uint32_t value) * Where "x ^ 31 = 31 - x", but gcc does not optimize "31 - __builtin_clz(x)" to bsr(x), * but generates 31 - (bsr(x) xor 31). * - * So we write "__builtin_clz(x) ^ 31" instead of "31 - __builtin_clz(x)". + * So we write "__builtin_clz(x) ^ 31" instead of "31 - __builtin_clz(x)", + * to allow the double xor to be optimized out. */ return __builtin_clz(value) ^ 31; #else