diff --git a/.gitignore b/.gitignore
index 3ed2381..b8781f5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,6 +26,12 @@ stamp-h1
 *.s
 *.S
 
+# coverage
+*.gcda
+*.gcno
+*.info
+cov/
+
 # project
 *.dst
 *.epr
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..32fa9b0
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,10 @@
+# Travis CI configuration file
+
+language: c
+
+script: make check
+
+compiler:
+  - clang
+  - gcc
+
diff --git a/HISTORY b/HISTORY
index 2fe1344..d5e79c2 100644
--- a/HISTORY
+++ b/HISTORY
@@ -1,12 +1,18 @@
 TommyDS HISTORY
 ===============
 
-2.0 2014/10
+2.1 2014/12 BETA
+================
+
+2.0 2014/12
 ===========
+ * Fixed a Segmentation Fault bug in the trie_inplace container when inserting
+   duplicate elements.
  * Faster array and hashlin implementation when accessing elements.
  * Added new hashtable functions to iterate over all the elements.
  * Added a new tommy_calloc() function used for allocating initialized memory.
    If you redefined tommy_malloc(), likely you have to redefine also tommy_calloc().
+ * Reached 100% code coverage in the regression test.      
  * Different source code organization.
  * Added benchmark comparison with Binary Search Tesseract by Gregorius van
    den Hoven.
diff --git a/INSTALL b/INSTALL
index f5a2c03..40e55ff 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,7 +1,8 @@
 TommyDS INSTALL
 ===============
 
-TommyDS doesn't need any installation. You have only to import the required .c
-and .h files into your program and start using them.
+TommyDS doesn't need any installation.
 
+You have only to import the required .c and .h files into your program
+and use the them.
 
diff --git a/Makefile b/Makefile
index 1ae5b9a..5336c6c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,17 +1,30 @@
 #############################################################################
 # Tommy Makefile
 
-VERSION=2.0
-CFLAGS=-m32 -O3 -march=pentium4 -mtune=generic -Wall -Wextra -Wshadow -Wcast-qual -g
-# -std=gnu++11 required by Google btree
-CXXFLAGS=$(CFLAGS) -fpermissive -std=gnu++11
-CC=gcc
-CXX=g++
-UNAME=$(shell uname)
+# Version of TommyDS
+VERSION = 2.1
+
+# Build options for the check program
+ifdef COVERAGE
+CFLAGS = -O0 -g -fprofile-arcs -ftest-coverage
+else
+CFLAGS = -O3 -march=native -Wall -Wextra -Wshadow -Wcast-qual -g
+endif
+
+# Build options for the benchmark
+# -std=gnu++0x required by Google btree
+BENCHCXXFLAGS = -m32 -O3 -march=nehalem -fpermissive -std=gnu++0x -Wall -g
+
+# Programs
+CC ?= gcc
+CXX ?= g++
+OBJDUMP ?= objdump
+UNAME = $(shell uname)
 
 # Linux
 ifeq ($(UNAME),Linux)
-LIB=-lrt benchmark/lib/judy/libJudyL.a benchmark/lib/judy/libJudyMalloc.a
+LIB=-lrt
+BENCHLIB=benchmark/lib/judy/libJudyL.a benchmark/lib/judy/libJudyMalloc.a
 EXE=
 O=.o
 endif
@@ -25,13 +38,13 @@ endif
 
 # Windows
 ifeq ($(UNAME),)
-LIB=benchmark/lib/judy/src/judy.lib
+BENCHLIB=benchmark/lib/judy/src/judy.lib
 EXE=.exe
 O=.obj
 endif
 
-CHECK = ./tommybench -N 1000000 -d tommy-hashlin
-#CHECK = ./tommycheck
+#CHECK = ./tommybench -n 1000000 -d tommy-hashlin
+CHECK = ./tommycheck
 
 DEP = \
 	tommyds/tommyalloc.c \
@@ -67,24 +80,43 @@ DEPTEST = \
 	check.c \
 	benchmark.cc
 
-all: tommycheck$(EXE) tommybench$(EXE)
+all: tommycheck$(EXE)
+
+bench: tommybench$(EXE)
 
 tommy$(O): $(DEP)
 	$(CC) $(CFLAGS) -c tommyds/tommy.c -o tommy$(O)
-	$(CC) $(CFLAGS) -S -fverbose-asm tommyds/tommy.c -o tommy.s
-	objdump -S tommy$(O) > tommy.S
+	$(OBJDUMP) -S tommy$(O) > tommy.s
 
 tommycheck$(EXE): check.c tommy$(O)
 	$(CC) $(CFLAGS) check.c tommy$(O) -o tommycheck$(EXE) $(LIB)
 
 tommybench$(EXE): benchmark.cc $(DEP)
-	$(CXX) $(CXXFLAGS) benchmark.cc -o tommybench$(EXE) $(LIB)
+	$(CXX) $(BENCHCXXFLAGS) benchmark.cc -o tommybench$(EXE) $(LIB) $(BENCHLIB)
 
-check: tommycheck$(EXE) tommybench$(EXE)
+check: tommycheck$(EXE)
 	./tommycheck$(EXE)
-	./tommybench$(EXE) -N 100000
 	echo Check completed with success!
 
+lcov_reset:
+	lcov -d . -z
+	rm -f ./lcov.info
+
+lcov_capture:
+	lcov -d . --capture -o lcov.info
+
+lcov_html:
+	rm -rf ./cov
+	mkdir cov
+	genhtml -o ./cov lcov.info
+
+coverage:
+	$(MAKE) COVERAGE=1 tommycheck$(EXE)
+	$(MAKE) lcov_reset
+	./tommycheck$(EXE)
+	$(MAKE) lcov_capture
+	$(MAKE) lcov_html
+
 valgrind:
 	valgrind \
 		--tool=memcheck \
@@ -136,8 +168,9 @@ web: phony tommyweb.doxygen tommy.css $(DEP)
 	rm -f web/tab_*.png
 
 clean:
-	rm -f *.log *.s *.S *.lst *.o
+	rm -f *.log *.s *.lst *.o
 	rm -f *.ncb *.suo *.obj
+	rm -f *.gcno *.gcda lcov.info
 	rm -rf Debug Release x64
 	rm -f callgrind.out.*
 	rm -f cachegrind.out.*
@@ -176,3 +209,7 @@ dist:
 	zip -r $(DIST).zip $(DIST)
 	rm -r $(DIST)
 
+distcheck: dist
+	tar zxvf $(DIST).tar.gz
+	cd $(DIST) && make check
+	rm -rf $(DIST)
diff --git a/README b/README
index a64e90c..cdef1a6 100644
--- a/README
+++ b/README
@@ -1,7 +1,8 @@
 TommyDS
 =======
 
-TommyDS is a C library of hashtables and tries designed for high performance.
+TommyDS is a C library of array, hashtables and tries data structures,
+designed for high performance and providing an easy to use interface.
 
 It's faster than all the similar libraries like rbtree, judy, goodledensehash,
 khash, uthash, nedtries and others.
@@ -9,12 +10,15 @@ khash, uthash, nedtries and others.
 The data structures provided are:
 
 	tommy_list - A double linked list.
-	tommy_array - A linear array. It doesn't fragment the heap.
+	tommy_array - A linear array. It doesn't fragment
+		the heap.
+	tommy_arrayblk - A blocked linear array. It doesn't fragment
+		the heap and it minimizes the space occupation.
 	tommy_hashtable - A fixed size chained hashtable.
 	tommy_hashdyn - A dynamic chained hashtable.
 	tommy_hashlin - A linear chained hashtable. It doesn't have the
-		problem of the delay when resizing and it doesn't fragment
-		the heap.
+		problem of the delay when resizing and it doesn't
+		fragment the heap.
 	tommy_trie - A trie optimized for cache utilization.
 	tommy_trie_inplace - A trie completely inplace.
 
diff --git a/benchmark.cc b/benchmark.cc
index 8e39c70..39bb996 100644
--- a/benchmark.cc
+++ b/benchmark.cc
@@ -186,7 +186,7 @@ typedef size_t ssize_t;
 /* Concurrency Kit Hash Set */
 /* http://concurrencykit.org/ */
 /* Note that it has a VERY BAD performance on the "Change" test, */
-/* so we disable it in the general graphs */
+/* so we disable it in the graphs until further investigation */
 /* #define USE_CK */
 #if defined(USE_CK) && defined(__linux)
 /* if you enable it, ensure to link also with the -lck option */
@@ -392,9 +392,9 @@ struct nedtrie_t nedtrie;
 khash_t(word)* khash;
 #ifdef __cplusplus
 /* use a specialized hash, otherwise the performance depends on the STL implementation used. */
-class cpp_tommy_inthash_u32 {
+class cpp_hash {
 public:
-	unsigned operator()(unsigned key) const { return tommy_inthash_u32(key); }
+	unsigned operator()(unsigned key) const { return hash(key); }
 };
 #endif
 #ifdef USE_CPPMAP
@@ -402,14 +402,14 @@ typedef std::map<unsigned, struct cpp_object*> cppmap_t;
 cppmap_t* cppmap;
 #endif
 #ifdef USE_CPPUNORDEREDMAP
-typedef std::unordered_map<unsigned, struct cpp_object*, cpp_tommy_inthash_u32> cppunorderedmap_t;
+typedef std::unordered_map<unsigned, struct cpp_object*, cpp_hash> cppunorderedmap_t;
 cppunorderedmap_t* cppunorderedmap;
 #endif
 #ifdef USE_GOOGLELIBCHASH
 struct HashTable* googlelibhash;
 #endif
 #ifdef USE_GOOGLEDENSEHASH
-typedef google::dense_hash_map<unsigned, struct google_object*, cpp_tommy_inthash_u32> googledensehash_t;
+typedef google::dense_hash_map<unsigned, struct google_object*, cpp_hash> googledensehash_t;
 googledensehash_t* googledensehash;
 #endif
 #ifdef USE_GOOGLEBTREE
@@ -2525,7 +2525,7 @@ void test(unsigned size, unsigned data, int log, int sparse)
 
 					printf("%12u", the_max);
 
-					printf(" %16s %16s", DATA_NAME[the_data], ORDER_NAME[the_order]);
+					printf(" %18s %10s", DATA_NAME[the_data], ORDER_NAME[the_order]);
 
 					/* skip degenerated cases */
 					if (LAST[the_data][the_order] > TIME_MAX_NS) {
@@ -2546,7 +2546,7 @@ void test(unsigned size, unsigned data, int log, int sparse)
 					if (the_log) {
 						for(i=0;i<OPERATION_MAX;++i)
 							printf(" %4u", LOG[the_retry][the_data][the_order][i]);
-						printf(" [ms]\n");
+						printf(" [ns]\n");
 					}
 				}
 			}
@@ -2600,34 +2600,14 @@ void test(unsigned size, unsigned data, int log, int sparse)
 	}
 }
 
-void test_cache_miss(void)
+void help(void)
 {
-	unsigned size = 512*1024*1024;
-	unsigned char* DATA = (unsigned char*)malloc(size);
-	unsigned delta = 512;
-	tommy_uint64_t miss_time;
-	unsigned i, j;
-	tommy_uint64_t result = 0;
-
-	memset(DATA, 0, size);
-
-	for(j=0;j<8;++j) {
-		tommy_uint64_t start, stop;
-		start = nano();
-		for(i=0;i<size;i += delta) {
-			++DATA[i];
-		}
-		stop = nano();
-
-		if (!result || result > stop - start)
-			result = stop - start;
-	}
-
-	free(DATA);
-
-	miss_time = result * delta / size;
-
-	printf("Cache miss %d [ns]\n", (unsigned)miss_time);
+	printf("Options\n");
+	printf("-n NUMBER Run the test for the specified number of objects.\n");
+	printf("-m        Run the test for the maximum number of objects.\n");
+	printf("-d DATA   Run the test for the specified data structure.\n");
+	printf("-s        Use a sparse dataset intead of a compact one.\n");
+	printf("-l        Logs results into file for graphs creation.\n");
 }
 
 int main(int argc, char * argv[])
@@ -2636,7 +2616,6 @@ int main(int argc, char * argv[])
 	int flag_data = DATA_MAX;
 	int flag_size = 0;
 	int flag_log = 0;
-	int flag_miss = 0;
 	int flag_sparse = 0;
 
 	nano_init();
@@ -2649,12 +2628,10 @@ int main(int argc, char * argv[])
 		} else if (strcmp(argv[i], "-s") == 0) {
 			flag_sparse = 1;
 		} else if (strcmp(argv[i], "-m") == 0) {
-			flag_miss = 1;
-		} else if (strcmp(argv[i], "-n") == 0) {
 			flag_size = MAX;
-		} else if (strcmp(argv[i], "-N") == 0) {
+		} else if (strcmp(argv[i], "-n") == 0) {
 			if (i+1 >= argc) {
-				printf("Missing data in %s\n", argv[i]);
+				printf("Missing number of objects in %s\n", argv[i]);
 				exit(EXIT_FAILURE);
 			}
 			flag_size = atoi(argv[i+1]);
@@ -2672,21 +2649,22 @@ int main(int argc, char * argv[])
 				}
 			}
 			if (flag_data == DATA_MAX) {
-				printf("Unknown data %s\n", argv[i+1]);
+				printf("Unknown data name '%s'\n", argv[i+1]);
+				printf("Possible values are:\n");
+				for(j=0;j<DATA_MAX;++j) {
+					printf("\t%s\n", DATA_NAME[j]);
+				}
 				exit(EXIT_FAILURE);
 			}
+
 			++i;
 		} else {
 			printf("Unknown option %s\n", argv[i]);
+			help();
 			exit(EXIT_FAILURE);
 		} 
 	}
 
-	if (flag_miss) {
-		test_cache_miss();
-		return EXIT_SUCCESS;
-	}
-
 	test(flag_size, flag_data, flag_log, flag_sparse);
 
 	printf("OK\n");
diff --git a/benchmark.vcxproj b/benchmark.vcxproj
index 5fb256b..110ee36 100644
--- a/benchmark.vcxproj
+++ b/benchmark.vcxproj
@@ -28,7 +28,7 @@
     <ConfigurationType>Application</ConfigurationType>
     <CharacterSet>MultiByte</CharacterSet>
     <WholeProgramOptimization>true</WholeProgramOptimization>
-    <PlatformToolset>v100</PlatformToolset>
+    <PlatformToolset>v110</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
diff --git a/benchmark/gr_all.sh b/benchmark/gr_all.sh
index d58f55b..eb62729 100644
--- a/benchmark/gr_all.sh
+++ b/benchmark/gr_all.sh
@@ -6,7 +6,8 @@ export GNUPLOT_DEFAULT_GDFONT=arial
 gnuplot gr_def_random_hit.gnu
 gnuplot gr_def_random_change.gnu
 gnuplot gr_other_judy_problem.gnu
-gnuplot gr_other_slow_problem.gnu
+gnuplot gr_other_googlelibchash_problem.gnu
+gnuplot gr_other_ck_problem.gnu
 
 DIR=data/core_i5_650_3G2_linux
 gnuplot $DIR/gr_def.gnu gr_forward_insert.gnu
diff --git a/benchmark/gr_common.gnu b/benchmark/gr_common.gnu
index e6c4759..9a46d83 100644
--- a/benchmark/gr_common.gnu
+++ b/benchmark/gr_common.gnu
@@ -8,7 +8,7 @@ set style data linespoints
 set datafile missing "0"
 set xlabel "Number of elements in logarithmic scale"
 set ylabel "Time for element in nanosecond in logarithmic scale\nLower is better"
-set xrange [1000:100000000]
+set xrange [1000:10000000]
 set yrange [6:1000]
 set logscale y
 set logscale x
diff --git a/benchmark/gr_def_random_change.gnu b/benchmark/gr_def_random_change.gnu
index 0827cdc..e204c26 100644
--- a/benchmark/gr_def_random_change.gnu
+++ b/benchmark/gr_def_random_change.gnu
@@ -1,7 +1,7 @@
 load "gr_common.gnu"
 
 tdir = "def/"
-tsub = "\nCore i5 650 3.20 GHz, 4 MB L3 cache, 2400 Uncore Speed\nLinux, gcc 4.7.1, 32 bit"
+tsub = "\nCore i5 650 3.20 GHz, 4 MB L3 cache, 2400 Uncore Speed\nLinux, gcc 4.9.2, 32 bit"
 
 set output bdir.tdir."img_random_change".bext
 set title "Random Change (Remove + Insert)".tsub
diff --git a/benchmark/gr_def_random_hit.gnu b/benchmark/gr_def_random_hit.gnu
index 236b341..f6ea0f0 100644
--- a/benchmark/gr_def_random_hit.gnu
+++ b/benchmark/gr_def_random_hit.gnu
@@ -1,7 +1,7 @@
 load "gr_common.gnu"
 
 tdir = "def/"
-tsub = "\nCore i5 650 3.20 GHz, 4 MB L3 cache, 2400 Uncore Speed\nLinux, gcc 4.7.1, 32 bit"
+tsub = "\nCore i5 650 3.20 GHz, 4 MB L3 cache, 2400 Uncore Speed\nLinux, gcc 4.9.2, 32 bit"
 
 set output bdir.tdir."img_random_hit".bext
 set title "Random Hit".tsub
diff --git a/benchmark/gr_other_slow_problem.gnu b/benchmark/gr_other_ck_problem.gnu
similarity index 78%
rename from benchmark/gr_other_slow_problem.gnu
rename to benchmark/gr_other_ck_problem.gnu
index d6a7226..4612a3e 100644
--- a/benchmark/gr_other_slow_problem.gnu
+++ b/benchmark/gr_other_ck_problem.gnu
@@ -5,9 +5,9 @@ set yrange [10:10000]
 tdir = "other/"
 tsub = "\nCore i5 650 3.20 GHz, 4 MB L3 cache\nLinux, gcc 4.7.1, 32 bit"
 
-set output bdir.tdir."slow_problem".bext
+set output bdir.tdir."ck_problem".bext
 set title "Random Change (Remove + Insert)".tsub
-data = bdir.tdir.'slow_problem.lst'
+data = bdir.tdir.'ck_problem.lst'
 
 plot data using 1:2 title columnheader(2), \
 	for [i=3:20] '' using 1:i title columnheader(i) ls i-1
diff --git a/benchmark/gr_other_googlelibchash_problem.gnu b/benchmark/gr_other_googlelibchash_problem.gnu
new file mode 100644
index 0000000..11616c7
--- /dev/null
+++ b/benchmark/gr_other_googlelibchash_problem.gnu
@@ -0,0 +1,14 @@
+load "gr_common.gnu"
+
+set yrange [10:10000]
+
+tdir = "other/"
+tsub = "\nCore i5 650 3.20 GHz, 4 MB L3 cache\nLinux, gcc 4.7.1, 32 bit"
+
+set output bdir.tdir."googlelibchash_problem".bext
+set title "Random Change (Remove + Insert)".tsub
+data = bdir.tdir.'googlelibchash_problem.lst'
+
+plot data using 1:2 title columnheader(2), \
+	for [i=3:20] '' using 1:i title columnheader(i) ls i-1
+
diff --git a/check.c b/check.c
index 1892676..0e0f7c7 100644
--- a/check.c
+++ b/check.c
@@ -53,13 +53,13 @@
 
 #include "tommyds/tommy.h"
 
-#define MAX 1000000
+#define TOMMY_SIZE 1000000
 
 #define PAYLOAD 16 /**< Size of payload data for objects */
 
 struct object {
-	tommy_node node;
 	int value;
+	tommy_node node;
 	char payload[PAYLOAD];
 };
 
@@ -101,7 +101,19 @@ int compare_vector(const void* void_a, const void* void_b)
 struct object_hash {
 	int value;
 	tommy_node node;
-	tommy_node hashnode;
+	char payload[PAYLOAD];
+};
+
+struct object_trie {
+	int value;
+	tommy_trie_node node;
+	char payload[PAYLOAD];
+};
+
+struct object_trie_inplace {
+	int value;
+	tommy_trie_inplace_node node;
+	char payload[PAYLOAD];
 };
 
 /******************************************************************************/
@@ -207,23 +219,48 @@ unsigned rnd(unsigned max)
 }
 
 /******************************************************************************/
-/* test */
+/* helper */
+
+unsigned isqrt(unsigned n)
+{
+	unsigned root, remain, place;
+
+	root = 0;
+
+	remain = n;
 
-const char* the_str;
-tommy_uint64_t the_start;
+	place = 0x40000000;
+
+	while (place > remain)
+		place /= 4;
+
+	while (place) {
+		if (remain >= root + place) {
+			remain -= root + place;
+			root += 2 * place;
+		}
+
+		root /= 2;
+		place /= 4;
+	}
+
+	return root;
+}
 
 /**
  * Cache clearing buffer.
  */
-unsigned char CACHE[8*1024*1024];
+static unsigned char the_cache[16*1024*1024];
+static const char* the_str;
+static tommy_uint64_t the_start;
 
 void cache_clear(void)
 {
 	unsigned i;
 
 	/* read & write */
-	for(i=0;i<sizeof(CACHE);i += 32)
-		CACHE[i] += 1;
+	for(i=0;i<sizeof(the_cache);i += 32)
+		the_cache[i] += 1;
 
 #ifdef WIN32
 	Sleep(0);
@@ -247,6 +284,316 @@ void stop()
 #define START(s) start(s)
 #define STOP() stop()
 
+/******************************************************************************/
+/* test */
+
+struct hash32_test {
+	char* data;
+	tommy_uint32_t len;
+	tommy_uint32_t hash;
+} HASH32[] = {
+	{ "", 0, 0x8614384c },
+	{ "a", 1, 0x12c16c36 },
+	{ "abc", 3, 0xc58e8af5 },
+	{ "message digest", 14, 0x006b32f1 },
+	{ "abcdefghijklmnopqrstuvwxyz", 26, 0x7e6fcfe0 },
+	{ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", 62, 0x8604adf8 },
+	{ "The quick brown fox jumps over the lazy dog", 43, 0xdeba3d3a },
+	{ "\x00", 1, 0x4a7d1c33 },
+	{ "\x16\x27", 2, 0x8b50899b },
+	{ "\xe2\x56\xb4", 3, 0x60406493 },
+	{ "\xc9\x4d\x9c\xda", 4, 0xa049144a },
+	{ "\x79\xf1\x29\x69\x5d", 5, 0x4da2c2f1 },
+	{ "\x00\x7e\xdf\x1e\x31\x1c", 6, 0x59de30cf },
+	{ "\x2a\x4c\xe1\xff\x9e\x6f\x53", 7, 0x219e149c },
+	{ "\xba\x02\xab\x18\x30\xc5\x0e\x8a", 8, 0x25067520 },
+	{ "\xec\x4e\x7a\x72\x1e\x71\x2a\xc9\x33", 9, 0xa1f368d8 },
+	{ "\xfd\xe2\x9c\x0f\x72\xb7\x08\xea\xd0\x78", 10, 0x805fc63d },
+	{ "\x65\xc4\x8a\xb8\x80\x86\x9a\x79\x00\xb7\xae", 11, 0x7f75dd0f },
+	{ "\x77\xe9\xd7\x80\x0e\x3f\x5c\x43\xc8\xc2\x46\x39", 12, 0xb9154382 },
+	{ "\x87\xd8\x61\x61\x4c\x89\x17\x4e\xa1\xa4\xef\x13\xa9", 13, 0x2bdd05d7 },
+	{ "\xfe\xa6\x5b\xc2\xda\xe8\x95\xd4\x64\xab\x4c\x39\x58\x29", 14, 0xabffeb9f },
+	{ "\x94\x49\xc0\x78\xa0\x80\xda\xc7\x71\x4e\x17\x37\xa9\x7c\x40", 15, 0x886da0b4 },
+	{ "\x53\x7e\x36\xb4\x2e\xc9\xb9\xcc\x18\x3e\x9a\x5f\xfc\xb7\xb0\x61", 16, 0x34ed2af3 },
+	{ 0, 0, 0 }
+};
+
+struct hash64_test {
+	char* data;
+	tommy_uint32_t len;
+	tommy_uint64_t hash;
+} HASH64[] = {
+	{ "", 0, 0x8614384cb5165fbfULL },
+	{ "a", 1, 0x1a2e0298a8e94a3dULL },
+	{ "abc", 3, 0x7555796b7a7d21ebULL },
+	{ "message digest", 14, 0x9411a57d04b92fb4ULL },
+	{ "abcdefghijklmnopqrstuvwxyz", 26, 0x3ca3f8d2b4e69832ULL },
+	{ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", 62, 0x6dae542ba0015a4dULL },
+	{ "The quick brown fox jumps over the lazy dog", 43, 0xe06d8cbb3d2ea1a6ULL },
+	{ "\x00", 1, 0x201e664fb5f2c021ULL },
+	{ "\x16\x27", 2, 0xef42fa8032c4b775ULL },
+	{ "\xe2\x56\xb4", 3, 0x6e6c498a6688466cULL },
+	{ "\xc9\x4d\x9c\xda", 4, 0x5195005419905423ULL },
+	{ "\x79\xf1\x29\x69\x5d", 5, 0x221235b48afee7c1ULL },
+	{ "\x00\x7e\xdf\x1e\x31\x1c", 6, 0x1b1f18b9266f095bULL },
+	{ "\x2a\x4c\xe1\xff\x9e\x6f\x53", 7, 0x2cbafa8e741d49caULL },
+	{ "\xba\x02\xab\x18\x30\xc5\x0e\x8a", 8, 0x4677f04c06e0758dULL },
+	{ "\xec\x4e\x7a\x72\x1e\x71\x2a\xc9\x33", 9, 0x5afe09e8214e2163ULL },
+	{ "\xfd\xe2\x9c\x0f\x72\xb7\x08\xea\xd0\x78", 10, 0x115b6276d209fab6ULL },
+	{ "\x65\xc4\x8a\xb8\x80\x86\x9a\x79\x00\xb7\xae", 11, 0xd0636d2f01cf3a3eULL },
+	{ "\x77\xe9\xd7\x80\x0e\x3f\x5c\x43\xc8\xc2\x46\x39", 12, 0x6d259f5fef74f93eULL },
+	{ "\x87\xd8\x61\x61\x4c\x89\x17\x4e\xa1\xa4\xef\x13\xa9", 13, 0x23449c3baf93ac39ULL },
+	{ "\xfe\xa6\x5b\xc2\xda\xe8\x95\xd4\x64\xab\x4c\x39\x58\x29", 14, 0x9b85ba28d7854d69ULL },
+	{ "\x94\x49\xc0\x78\xa0\x80\xda\xc7\x71\x4e\x17\x37\xa9\x7c\x40", 15, 0x3617c833193a359fULL },
+	{ "\x53\x7e\x36\xb4\x2e\xc9\xb9\xcc\x18\x3e\x9a\x5f\xfc\xb7\xb0\x61", 16, 0x5dbf9ff58e274dd9ULL },
+	{ 0, 0, 0 }
+};
+
+struct inthash32_test {
+	tommy_uint32_t value;
+	tommy_uint32_t hash;
+} INTHASH32[] = {
+	{ 0x00000000, 0x00000000 },
+	{ 0x00000001, 0xc2b73583 },
+	{ 0x00000002, 0xe90f1258 },
+	{ 0x00000004, 0x7a10c2d3 },
+	{ 0x00000008, 0x200c3457 },
+	{ 0x00000010, 0xeb97690a },
+	{ 0x00000020, 0x7fb291d3 },
+	{ 0x00000040, 0xf50601d8 },
+	{ 0x00000080, 0x727dbaed },
+	{ 0x00000100, 0x7ef5f77d },
+	{ 0x00000200, 0x91a480dc },
+	{ 0x00000400, 0x2bad9acc },
+	{ 0x00000800, 0xfe4d150e },
+	{ 0x00001000, 0xc3add476 },
+	{ 0x00002000, 0x23946174 },
+	{ 0x00004000, 0x987cfc43 },
+	{ 0x00008000, 0x630cdf68 },
+	{ 0x00010000, 0x0ac3a767 },
+	{ 0x00020000, 0xad086d5b },
+	{ 0x00040000, 0x1126ccdf },
+	{ 0x00080000, 0x4370dbc4 },
+	{ 0x00100000, 0xefd6e5e6 },
+	{ 0x00200000, 0x9a93c1b5 },
+	{ 0x00400000, 0x10114902 },
+	{ 0x00800000, 0x96117e60 },
+	{ 0x01000000, 0x5dec9f58 },
+	{ 0x02000000, 0xfee234c7 },
+	{ 0x04000000, 0x36137e26 },
+	{ 0x08000000, 0x6c26fc4c },
+	{ 0x10000000, 0xd84df898 },
+	{ 0x20000000, 0xb099f131 },
+	{ 0x40000000, 0x6131e262 },
+	{ 0x80000000, 0xc263c4c4 },
+	{ 0x00204a16, 0xd8b97461 },
+	{ 0x05542a27, 0x65d0057a },
+	{ 0x169c39e2, 0x7c2ff59a },
+	{ 0x2eab4956, 0xa8ba89bd },
+	{ 0x0bb0b8b4, 0xb790c8de },
+	{ 0x0bd068c9, 0x92d30546 },
+	{ 0x3e5d224d, 0xd610bf1d },
+	{ 0x436c8d9c, 0x27b09019 },
+	{ 0x3a2adfda, 0xdfde9385 },
+	{ 0x1dd8ca79, 0xc7c10d7b },
+	{ 0x6a67c4f1, 0xf92a788d },
+	{ 0x7742fa29, 0x892c3519 },
+	{ 0x48b62d69, 0x7f642f55 },
+	{ 0x472e195d, 0xea2c49e5 },
+	{ 0x0681a900, 0x4de5c929 },
+	{ 0x622ebb7e, 0x35a7e306 },
+	{ 0x026bccdf, 0xa7e4b630 },
+	{ 0x204d531e, 0x43ebd664 },
+	{ 0x262b5331, 0x7a9a161f },
+	{ 0x7020241c, 0xbaed3ef7 },
+	{ 0x440a0e2a, 0x0b8c5b29 },
+	{ 0x75cb1c4c, 0x19555414 },
+	{ 0x41f9a5e1, 0xc6acbc6b },
+	{ 0x67bc26ff, 0x54e4411f },
+	{ 0x181e279e, 0x979c834f },
+	{ 0x7172c06f, 0xe6a179ff },
+	{ 0x4909e153, 0x198e5e0f },
+	{ 0x09d3bfba, 0x109a6f17 },
+	{ 0x685ae502, 0xf9d57a4b },
+	{ 0x7e10e8ab, 0x09765bec },
+	{ 0x0f262618, 0xe16404cd },
+	{ 0x726b8230, 0x55e478b4 },
+	{ 0, 0 }
+};
+
+struct inthash64_test {
+	tommy_uint64_t value;
+	tommy_uint64_t hash;
+} INTHASH64[] = {
+	{ 0x0000000000000000ULL, 0x77cfa1eef01bca90ULL },
+	{ 0x0000000000000001ULL, 0x5bca7c69b794f8ceULL },
+	{ 0x0000000000000002ULL, 0xb795033f6f2a0674ULL },
+	{ 0x0000000000000004ULL, 0x6f2a25235e544a31ULL },
+	{ 0x0000000000000008ULL, 0xde543f7b3ca87ecbULL },
+	{ 0x0000000000000010ULL, 0xbca87eec7950fd82ULL },
+	{ 0x0000000000000020ULL, 0x7950fddef2a1fb10ULL },
+	{ 0x0000000000000040ULL, 0xf2a1fbbde543f620ULL },
+	{ 0x0000000000000080ULL, 0xe543f9324a87efadULL },
+	{ 0x0000000000000100ULL, 0xca87f25e950fdf4eULL },
+	{ 0x0000000000000200ULL, 0x950fe4bd2a1fbe9cULL },
+	{ 0x0000000000000400ULL, 0x2a1fc968543f7d14ULL },
+	{ 0x0000000000000800ULL, 0x543f92d0a87efa28ULL },
+	{ 0x0000000000001000ULL, 0xa87f259f50fdf44cULL },
+	{ 0x0000000000002000ULL, 0x50fe4b3ea1fbe898ULL },
+	{ 0x0000000000004000ULL, 0xa1fc967bc3f7d12dULL },
+	{ 0x0000000000008000ULL, 0x43f92da207efa3afULL },
+	{ 0x0000000000010000ULL, 0x87f25b4e8fdf4773ULL },
+	{ 0x0000000000020000ULL, 0x0fe4b6a71fbe8efaULL },
+	{ 0x0000000000040000ULL, 0x1fc96d4ebf7d1df5ULL },
+	{ 0x0000000000080000ULL, 0x3f92da9dfefa3bebULL },
+	{ 0x0000000000100000ULL, 0x7f25b53c7df477d7ULL },
+	{ 0x0000000000200000ULL, 0xfe4b6a797be8efafULL },
+	{ 0x0000000000400000ULL, 0xfc96d4f377d1df5fULL },
+	{ 0x0000000000800000ULL, 0xf92da9e76fa3bebfULL },
+	{ 0x0000000001000000ULL, 0xf25b39f0df4749c2ULL },
+	{ 0x0000000002000000ULL, 0xe4b673e1be8e9384ULL },
+	{ 0x0000000004000000ULL, 0xc96ce7c3fd1d2709ULL },
+	{ 0x0000000008000000ULL, 0x92d9cf87fa3a4e12ULL },
+	{ 0x0000000010000000ULL, 0x25b39f0ff4749c24ULL },
+	{ 0x0000000020000000ULL, 0x4b673e1fe8e93848ULL },
+	{ 0x0000000040000000ULL, 0x96ce7c3a51d27085ULL },
+	{ 0x0000000080000000ULL, 0x2d9cf88523a4e10bULL },
+	{ 0x0000000100000000ULL, 0x5b39f10ac749c217ULL },
+	{ 0x0000000200000000ULL, 0xb673e2060e93842fULL },
+	{ 0x0000000400000000ULL, 0x6ce7c40c9d27085fULL },
+	{ 0x0000000800000000ULL, 0xdc8387ff3f0e10aaULL },
+	{ 0x0000001000000000ULL, 0xb9070fee7e1c2154ULL },
+	{ 0x0000002000000000ULL, 0x720e1fdcfc3842a8ULL },
+	{ 0x0000004000000000ULL, 0xe41c3fa478708545ULL },
+	{ 0x0000008000000000ULL, 0xc8387f58f0e10a8aULL },
+	{ 0x0000010000000000ULL, 0x9364fec267021515ULL },
+	{ 0x0000020000000000ULL, 0x26c9fd954e042a2bULL },
+	{ 0x0000040000000000ULL, 0x4d93fb2a9c085456ULL },
+	{ 0x0000080000000000ULL, 0x9b27f6553810a8acULL },
+	{ 0x0000100000000000ULL, 0xae84159b600d1cc8ULL },
+	{ 0x0000200000000000ULL, 0x455932ec903fc254ULL },
+	{ 0x0000400000000000ULL, 0xa2d36aa0d044b36fULL },
+	{ 0x0000800000000000ULL, 0x7b8ef94d0d2d2844ULL },
+	{ 0x0001000000000000ULL, 0xd802248d5ba62df7ULL },
+	{ 0x0002000000000000ULL, 0x2c298f47af1d015eULL },
+	{ 0x0004000000000000ULL, 0x93a4e2a055abc2f5ULL },
+	{ 0x0008000000000000ULL, 0x6f9511373b7145b2ULL },
+	{ 0x0010000000000000ULL, 0x6305c0717e1d40d4ULL },
+	{ 0x0020000000000000ULL, 0x6237df27b416b76bULL },
+	{ 0x0040000000000000ULL, 0x042bc5ffe77f439eULL },
+	{ 0x0080000000000000ULL, 0x7241900621a8c72bULL },
+	{ 0x0100000000000000ULL, 0x6a298a4b4ecb2ec6ULL },
+	{ 0x0200000000000000ULL, 0x789742a5659f92fbULL },
+	{ 0x0400000000000000ULL, 0x794ee951db0365e6ULL },
+	{ 0x0800000000000000ULL, 0x745424e0b94aec3cULL },
+	{ 0x1000000000000000ULL, 0x726e27d005120de7ULL },
+	{ 0x2000000000000000ULL, 0x6898adb511c8513eULL },
+	{ 0x4000000000000000ULL, 0x59dbb96b3414d7ecULL },
+	{ 0x8000000000000000ULL, 0x3be7d0f7780de548ULL },
+	{ 0x0cead30e6469f5c5ULL, 0x0f3b67e6407d0ed9ULL },
+	{ 0x028a2bec206c7b8aULL, 0xa9be0155bf452972ULL },
+	{ 0x56e5747a306eac4eULL, 0x308451cac91706c3ULL },
+	{ 0x6058b11e57287c72ULL, 0x6f8b3e2b8bf1abc4ULL },
+	{ 0x4fec8f2a00cc2071ULL, 0x11f1e965f20a45a4ULL },
+	{ 0x4f0bdf33102febc9ULL, 0x6102a774aa4baacfULL },
+	{ 0x17e067e262adc6fdULL, 0x5878e9adb48c4d7cULL },
+	{ 0x41374f0f3f94939cULL, 0x55d43d2febf8b7dfULL },
+	{ 0x59d163b72870b072ULL, 0x98ee8d1508e8ff20ULL },
+	{ 0x702b00ea2f01f008ULL, 0x1abf9db8b3306341ULL },
+	{ 0x433b78782f7cc0d0ULL, 0xd33358a085d11f6aULL },
+	{ 0x45789bc436c34865ULL, 0xa573c24e116ccf0dULL },
+	{ 0x15e7f9b85580528aULL, 0x6d594ec1bb5651dfULL },
+	{ 0x6e52ea866a56f880ULL, 0x5dac61fbbe7d64ffULL },
+	{ 0x06baec796275d69aULL, 0x193424cca0b43145ULL },
+	{ 0x2ac9d0b77bbdcc00ULL, 0x44b2175a2c313151ULL },
+	{ 0x2299ab770a8223aeULL, 0x404514aa9d8e5c65ULL },
+	{ 0x568e90d709816de9ULL, 0x08907c8b20d9fb59ULL },
+	{ 0x3b6b460e67b34680ULL, 0xfc77f1ea859b024eULL },
+	{ 0x418fde5c48db0e3fULL, 0x2e6a2d8cbe2d4412ULL },
+	{ 0x5e8fa3c84b12c543ULL, 0xa9354017412aba12ULL },
+	{ 0x49fad5466f937fc2ULL, 0xe81aeb4429b6264aULL },
+	{ 0x2ab116877b7b6839ULL, 0xb189fec8b0994d6eULL },
+	{ 0x16a5916132482fd8ULL, 0xa7c48eb9f12b7d37ULL },
+	{ 0x6d24a54c0e51b961ULL, 0x701631d776a5b0e2ULL },
+	{ 0x7a39bf1767d47a89ULL, 0x6bd08da7d7095b7aULL },
+	{ 0x11c7f9a173bf8d4eULL, 0x451390b96dcad08dULL },
+	{ 0x10411fef57d4fca4ULL, 0x49bd093dc8d7e040ULL },
+	{ 0x001079a900860713ULL, 0x81ab2baf1bf59632ULL },
+	{ 0x4b1b9ca618b2a5feULL, 0x152cb5a46ef65f99ULL },
+	{ 0x036db8c22ffea95bULL, 0x08a263ec4de57fa9ULL },
+	{ 0x64861ee83b7d6ddaULL, 0xd344a694800cea8cULL },
+	{ 0, 0 }
+};
+
+void test_hash(void)
+{
+	unsigned i;
+
+	START("hash functions");
+
+	for(i=0;HASH32[i].data;++i) {
+		if (tommy_hash_u32(0xa766795d, HASH32[i].data, HASH32[i].len) != HASH32[i].hash)
+			abort();
+	}
+
+	for(i=0;HASH64[i].data;++i) {
+		if (tommy_hash_u64(0x2f022773a766795dULL, HASH64[i].data, HASH64[i].len) != HASH64[i].hash)
+			abort();
+	}
+
+	for(i=0;INTHASH32[i].value || !i;++i) {
+		if (tommy_inthash_u32(INTHASH32[i].value) != INTHASH32[i].hash)
+			abort();
+	}
+
+	for(i=0;INTHASH64[i].value || !i;++i) {
+		if (tommy_inthash_u64(INTHASH64[i].value) != INTHASH64[i].hash)
+			abort();
+	}
+
+	STOP();
+}
+
+void test_alloc(void)
+{
+	const unsigned size = 10 * TOMMY_SIZE;
+	unsigned i;
+	tommy_allocator alloc;
+	void** PTR;
+
+	PTR = malloc(size * sizeof(void*));
+
+	/* ensure at least pointer alignment */
+	tommy_allocator_init(&alloc, sizeof(void*), 1);
+	if (alloc.align_size < sizeof(void*))
+		abort();
+	tommy_allocator_done(&alloc);
+
+	/* ensure correct alignment */
+	tommy_allocator_init(&alloc, sizeof(void*) - 1, sizeof(void*));
+	if (alloc.block_size != sizeof(void*))
+		abort();
+	tommy_allocator_done(&alloc);
+
+	tommy_allocator_init(&alloc, 64, 64);
+
+	START("alloc");
+	for(i=0;i<size;++i) {
+		PTR[i] = tommy_allocator_alloc(&alloc);
+	}
+	STOP();
+
+	START("free");
+	for(i=0;i<size;++i) {
+		tommy_allocator_free(&alloc, PTR[i]);
+	}
+	STOP();
+
+	tommy_allocator_done(&alloc);
+
+	free(PTR);
+}
+
 void test_list_order(tommy_node* list)
 {
 	tommy_node* node;
@@ -273,17 +620,28 @@ void test_list(void)
 	struct object_vector* VECTOR;
 	tommy_node* list;
 	unsigned i;
+	const unsigned size = TOMMY_SIZE;
 
-	LIST = malloc(MAX * sizeof(struct object));
-	VECTOR = malloc(MAX * sizeof(struct object_vector));
+	LIST = malloc(size * sizeof(struct object));
+	VECTOR = malloc(size * sizeof(struct object_vector));
 
-	for(i=0;i<MAX;++i) {
+	for(i=0;i<size;++i) {
 		VECTOR[i].value = LIST[i].value = 0;
 	}
 
-	list = 0;
-	for(i=0;i<MAX;++i) {
-		VECTOR[i].value = LIST[i].value = rnd(MAX);
+	tommy_list_init(&list);
+
+	if (!tommy_list_empty(&list))
+		abort();
+
+	if (tommy_list_tail(&list) != 0)
+		abort();
+
+	if (tommy_list_head(&list) != 0)
+		abort();
+
+	for(i=0;i<size;++i) {
+		VECTOR[i].value = LIST[i].value = rnd(size);
 		tommy_list_insert_tail(&list, &LIST[i].node, &LIST[i]);
 	}
 
@@ -292,17 +650,17 @@ void test_list(void)
 	STOP();
 
 	START("C qsort random");
-	qsort(VECTOR, MAX, sizeof(VECTOR[0]), compare_vector);
+	qsort(VECTOR, size, sizeof(VECTOR[0]), compare_vector);
 	STOP();
 
 	test_list_order(list);
 
 	/* forward order with some (1%) random values */
 	list = 0;
-	for(i=0;i<MAX;++i) {
+	for(i=0;i<size;++i) {
 		VECTOR[i].value = LIST[i].value = i;
 		if (rnd(100) == 0)
-			VECTOR[i].value = LIST[i].value = rnd(MAX);
+			VECTOR[i].value = LIST[i].value = rnd(size);
 		tommy_list_insert_tail(&list, &LIST[i].node, &LIST[i]);
 	}
 
@@ -311,14 +669,14 @@ void test_list(void)
 	STOP();
 
 	START("C qsort partially ordered");
-	qsort(VECTOR, MAX, sizeof(VECTOR[0]), compare_vector);
+	qsort(VECTOR, size, sizeof(VECTOR[0]), compare_vector);
 	STOP();
 
 	test_list_order(list);
 
 	/* forward order */
 	list = 0;
-	for(i=0;i<MAX;++i) {
+	for(i=0;i<size;++i) {
 		VECTOR[i].value = LIST[i].value = i;
 		tommy_list_insert_tail(&list, &LIST[i].node, &LIST[i]);
 	}
@@ -328,15 +686,15 @@ void test_list(void)
 	STOP();
 
 	START("C qsort forward");
-	qsort(VECTOR, MAX, sizeof(VECTOR[0]), compare_vector);
+	qsort(VECTOR, size, sizeof(VECTOR[0]), compare_vector);
 	STOP();
 
 	test_list_order(list);
 
 	/* backward order */
 	list = 0;
-	for(i=0;i<MAX;++i) {
-		VECTOR[i].value = LIST[i].value = MAX - 1 - i;
+	for(i=0;i<size;++i) {
+		VECTOR[i].value = LIST[i].value = size - 1 - i;
 		tommy_list_insert_tail(&list, &LIST[i].node, &LIST[i]);
 	}
 
@@ -345,15 +703,15 @@ void test_list(void)
 	STOP();
 
 	START("C qsort backward");
-	qsort(VECTOR, MAX, sizeof(VECTOR[0]), compare_vector);
+	qsort(VECTOR, size, sizeof(VECTOR[0]), compare_vector);
 	STOP();
 
 	test_list_order(list);
 
 	/* use a small range of random value to insert a lot of duplicates */
 	list = 0;
-	for(i=0;i<MAX;++i) {
-		VECTOR[i].value = LIST[i].value = rnd(MAX / 1000 + 2);
+	for(i=0;i<size;++i) {
+		VECTOR[i].value = LIST[i].value = rnd(size / 1000 + 2);
 		tommy_list_insert_tail(&list, &LIST[i].node, &LIST[i]);
 	}
 
@@ -362,7 +720,7 @@ void test_list(void)
 	STOP();
 
 	START("C qsort random duplicate");
-	qsort(VECTOR, MAX, sizeof(VECTOR[0]), compare_vector);
+	qsort(VECTOR, size, sizeof(VECTOR[0]), compare_vector);
 	STOP();
 
 	test_list_order(list);
@@ -374,12 +732,13 @@ void test_list(void)
 void test_array(void)
 {
 	tommy_array array;
-	unsigned i;
+	tommy_uintptr_t i;
+	const unsigned size = 50 * TOMMY_SIZE;
 
 	tommy_array_init(&array);
 
 	START("array init");
-	for(i=0;i<MAX*100;++i) {
+	for(i=0;i<size;++i) {
 		tommy_array_grow(&array, i + 1);
 		if (tommy_array_get(&array, i) != 0)
 			abort();
@@ -387,30 +746,72 @@ void test_array(void)
 	STOP();
 
 	START("array set");
-	for(i=0;i<MAX*100;++i) {
+	for(i=0;i<size;++i) {
 		tommy_array_set(&array, i, (void*)i);
 	}
 	STOP();
 
 	START("array get");
-	for(i=0;i<MAX*100;++i) {
+	for(i=0;i<size;++i) {
 		if (tommy_array_get(&array, i) != (void*)i)
 			abort();
 	}
 	STOP();
 
+	if (tommy_array_memory_usage(&array) < size * sizeof(void*))
+		abort();
+
 	tommy_array_done(&array);
 }
 
+void test_arrayof(void)
+{
+	tommy_arrayof arrayof;
+	unsigned i;
+	const unsigned size = 50 * TOMMY_SIZE;
+
+	tommy_arrayof_init(&arrayof, sizeof(unsigned));
+
+	START("arrayof init");
+	for(i=0;i<size;++i) {
+		tommy_arrayof_grow(&arrayof, i + 1);
+		unsigned* ref = tommy_arrayof_ref(&arrayof, i);
+		if (*ref != 0)
+			abort();
+	}
+	STOP();
+
+	START("arrayof set");
+	for(i=0;i<size;++i) {
+		unsigned* ref = tommy_arrayof_ref(&arrayof, i);
+		*ref = i;
+	}
+	STOP();
+
+	START("arrayof get");
+	for(i=0;i<size;++i) {
+		unsigned* ref = tommy_arrayof_ref(&arrayof, i);
+		if (*ref != i)
+			abort();
+	}
+	STOP();
+
+	if (tommy_arrayof_memory_usage(&arrayof) < size * sizeof(unsigned))
+		abort();
+
+	tommy_arrayof_done(&arrayof);
+}
+
 void test_arrayblk(void)
 {
 	tommy_arrayblk arrayblk;
-	unsigned i;
+	tommy_uintptr_t i;
+	const unsigned size = 50 * TOMMY_SIZE;
 
 	tommy_arrayblk_init(&arrayblk);
 
 	START("arrayblk init");
-	for(i=0;i<MAX*100;++i) {
+	for(i=0;i<size;++i) {
 		tommy_arrayblk_grow(&arrayblk, i + 1);
 		if (tommy_arrayblk_get(&arrayblk, i) != 0)
 			abort();
@@ -418,112 +819,178 @@ void test_arrayblk(void)
 	STOP();
 
 	START("arrayblk set");
-	for(i=0;i<MAX*100;++i) {
+	for(i=0;i<size;++i) {
 		tommy_arrayblk_set(&arrayblk, i, (void*)i);
 	}
 	STOP();
 
 	START("arrayblk get");
-	for(i=0;i<MAX*100;++i) {
+	for(i=0;i<size;++i) {
 		if (tommy_arrayblk_get(&arrayblk, i) != (void*)i)
 			abort();
 	}
 	STOP();
 
+	if (tommy_arrayblk_memory_usage(&arrayblk) < size * sizeof(void*))
+		abort();
+
 	tommy_arrayblk_done(&arrayblk);
 }
 
-void count_arg(void* arg, void* data)
+void test_arrayblkof(void)
+{
+	tommy_arrayblkof arrayblkof;
+	unsigned i;
+	const unsigned size = 50 * TOMMY_SIZE;
+
+	tommy_arrayblkof_init(&arrayblkof, sizeof(unsigned));
+
+	START("arrayblkof init");
+	for(i=0;i<size;++i) {
+		tommy_arrayblkof_grow(&arrayblkof, i + 1);
+		unsigned* ref = tommy_arrayblkof_ref(&arrayblkof, i);
+		if (*ref != 0)
+			abort();
+	}
+	STOP();
+
+	START("arrayblkof set");
+	for(i=0;i<size;++i) {
+		unsigned* ref = tommy_arrayblkof_ref(&arrayblkof, i);
+		*ref = i;
+	}
+	STOP();
+
+	START("arrayblkof get");
+	for(i=0;i<size;++i) {
+		unsigned* ref = tommy_arrayblkof_ref(&arrayblkof, i);
+		if (*ref != i)
+			abort();
+	}
+	STOP();
+
+	if (tommy_arrayblkof_memory_usage(&arrayblkof) < size * sizeof(unsigned))
+		abort();
+
+	tommy_arrayblkof_done(&arrayblkof);
+}
+
+static unsigned the_count;
+
+static void count_callback(void* data)
+{
+	(void)data;
+	++the_count;
+}
+
+static void count_arg_callback(void* arg, void* data)
 {
 	unsigned* count = arg;
 	(void)data;
 	++*count;
 }
 
+static int search_callback(const void* arg, const void* obj)
+{
+	return arg != obj;
+}
+
 void test_hashtable(void)
 {
-	tommy_list list;
 	tommy_hashtable hashtable;
 	struct object_hash* HASH;
-	unsigned i, n;
-	tommy_node* p;
+	unsigned i, j, n;
 	unsigned limit;
-	unsigned count;
+	const unsigned size = TOMMY_SIZE;
+	const unsigned module = TOMMY_SIZE / 4;
 
-	HASH = malloc(MAX * sizeof(struct object_hash));
+	HASH = malloc(size * sizeof(struct object_hash));
 
-	for(i=0;i<MAX;++i) {
-		HASH[i].value = i;
-	}
+	for(i=0;i<size;++i)
+		HASH[i].value = i % module;
+
+	/* initialize a very small hashtable */
+	tommy_hashtable_init(&hashtable, 1);
+
+	/* check that we allocated space for more elements */
+	if (hashtable.bucket_max == 1)
+		abort();
+
+	/* destroy it as empty */
+	tommy_hashtable_done(&hashtable);
 
 	START("hashtable stack");
-	limit = 10 * sqrt(MAX);
-	for(n=0;n<limit;++n) {
-		tommy_list_init(&list);
+	limit = 5 * isqrt(size);
+	for(n=0;n<=limit;++n) {
+		/* last iteration is full size */
+		if (n == limit)
+			n = limit = size;
+
 		tommy_hashtable_init(&hashtable, limit / 2);
 
 		/* insert */
-		for(i=0;i<n;++i) {
-			tommy_list_insert_head(&list, &HASH[i].node, &HASH[i]);
-			tommy_hashtable_insert(&hashtable, &HASH[i].hashnode, &HASH[i], HASH[i].value);
-		}
+		for(i=0;i<n;++i)
+			tommy_hashtable_insert(&hashtable, &HASH[i].node, &HASH[i], HASH[i].value);
 
-		count = 0;
-		tommy_hashtable_foreach_arg(&hashtable, count_arg, &count);
-		if (count != n)
+		if (tommy_hashtable_memory_usage(&hashtable) < n * sizeof(void*))
 			abort();
 
-		/* remove */
-		p = tommy_list_head(&list);
-		while (p) {
-			struct object_hash* obj = p->data;
-			p = p->next;
-			tommy_hashtable_remove_existing(&hashtable, &obj->hashnode);
-		}
+		if (tommy_hashtable_count(&hashtable) != n)
+			abort();
+
+		the_count = 0;
+		tommy_hashtable_foreach(&hashtable, count_callback);
+		if (the_count != n)
+			abort();
+
+		/* remove in backward order */
+		for(i=0;i<n/2;++i)
+			tommy_hashtable_remove_existing(&hashtable, &HASH[n-i-1].node);
+
+		/* remove missing */
+		for(i=0;i<n/2;++i)
+			if (tommy_hashtable_remove(&hashtable, search_callback, &HASH[n-i-1], HASH[n-i-1].value) != 0)
+				abort();
+
+		/* remove search */
+		for(i=0;i<n/2;++i)
+			if (tommy_hashtable_remove(&hashtable, search_callback, &HASH[n/2-i-1], HASH[n/2-i-1].value) == 0)
+				abort();
 
 		tommy_hashtable_done(&hashtable);
 	}
 	STOP();
 
 	START("hashtable queue");
-	limit = sqrt(MAX) / 8;
-	for(n=0;n<limit;++n) {
-		tommy_list_init(&list);
+	limit = isqrt(size) / 16;
+	for(n=0;n<=limit;++n) {
+		/* last iteration is full size */
+		if (n == limit)
+			n = limit = size;
+
 		tommy_hashtable_init(&hashtable, limit / 2);
 
 		/* insert first run */
-		for(i=0;i<n;++i) {
-			tommy_list_insert_head(&list, &HASH[i].node, &HASH[i]);
-			tommy_hashtable_insert(&hashtable, &HASH[i].hashnode, &HASH[i], HASH[i].value);
-		}
+		for(j=0,i=0;i<n;++i)
+			tommy_hashtable_insert(&hashtable, &HASH[i].node, &HASH[i], HASH[i].value);
 
-		count = 0;
-		tommy_hashtable_foreach_arg(&hashtable, count_arg, &count);
-		if (count != n)
+		the_count = 0;
+		tommy_hashtable_foreach_arg(&hashtable, count_arg_callback, &the_count);
+		if (the_count != n)
 			abort();
 
 		/* insert all the others */
-		for(;i<MAX;++i) {
-			struct object_hash* obj;
-
+		for(;i<size;++i,++j) {
 			/* insert one */
-			tommy_list_insert_head(&list, &HASH[i].node, &HASH[i]);
-			tommy_hashtable_insert(&hashtable, &HASH[i].hashnode, &HASH[i], HASH[i].value);
+			tommy_hashtable_insert(&hashtable, &HASH[i].node, &HASH[i], HASH[i].value);
 
 			/* remove one */
-			p = tommy_list_head(&list);
-			obj = p->data;
-			tommy_list_remove_existing(&list, p);
-			tommy_hashtable_remove_existing(&hashtable, &obj->hashnode);
+			tommy_hashtable_remove_existing(&hashtable, &HASH[j].node);
 		}
 
-		/* remove remaining */
-		p = tommy_list_head(&list);
-		while (p) {
-			struct object_hash* obj = p->data;
-			p = p->next;
-			tommy_hashtable_remove_existing(&hashtable, &obj->hashnode);
-		}
+		for(;j<size;++j)
+			if (tommy_hashtable_remove(&hashtable, search_callback, &HASH[j], HASH[j].value) == 0)
+				abort();
 
 		tommy_hashtable_done(&hashtable);
 	}
@@ -532,88 +999,90 @@ void test_hashtable(void)
 
 void test_hashdyn(void)
 {
-	tommy_list list;
 	tommy_hashdyn hashdyn;
 	struct object_hash* HASH;
-	unsigned i, n;
-	tommy_node* p;
+	unsigned i, j, n;
 	unsigned limit;
-	unsigned count;
+	const unsigned size = TOMMY_SIZE;
+	const unsigned module = TOMMY_SIZE / 4;
 
-	HASH = malloc(MAX * sizeof(struct object_hash));
+	HASH = malloc(size * sizeof(struct object_hash));
 
-	for(i=0;i<MAX;++i) {
-		HASH[i].value = i;
-	}
+	for(i=0;i<size;++i)
+		HASH[i].value = i % module;
 
 	START("hashdyn stack");
-	limit = 10 * sqrt(MAX);
-	for(n=0;n<limit;++n) {
-		tommy_list_init(&list);
+	limit = 5 * isqrt(size);
+	for(n=0;n<=limit;++n) {
+		/* last iteration is full size */
+		if (n == limit)
+			n = limit = size;
+
 		tommy_hashdyn_init(&hashdyn);
 
 		/* insert */
-		for(i=0;i<n;++i) {
-			tommy_list_insert_head(&list, &HASH[i].node, &HASH[i]);
-			tommy_hashdyn_insert(&hashdyn, &HASH[i].hashnode, &HASH[i], HASH[i].value);
-		}
+		for(i=0;i<n;++i)
+			tommy_hashdyn_insert(&hashdyn, &HASH[i].node, &HASH[i], HASH[i].value);
 
-		count = 0;
-		tommy_hashdyn_foreach_arg(&hashdyn, count_arg, &count);
-		if (count != n)
+		if (tommy_hashdyn_memory_usage(&hashdyn) < n * sizeof(void*))
 			abort();
 
-		/* remove */
-		p = tommy_list_head(&list);
-		while (p) {
-			struct object_hash* obj = p->data;
-			p = p->next;
-			tommy_hashdyn_remove_existing(&hashdyn, &obj->hashnode);
-		}
+		if (tommy_hashdyn_count(&hashdyn) != n)
+			abort();
+
+		the_count = 0;
+		tommy_hashdyn_foreach(&hashdyn, count_callback);
+		if (the_count != n)
+			abort();
+
+		/* remove in backward order */
+		for(i=0;i<n/2;++i)
+			tommy_hashdyn_remove_existing(&hashdyn, &HASH[n-i-1].node);
+
+		/* remove missing */
+		for(i=0;i<n/2;++i)
+			if (tommy_hashdyn_remove(&hashdyn, search_callback, &HASH[n-i-1], HASH[n-i-1].value) != 0)
+				abort();
+
+		/* remove search */
+		for(i=0;i<n/2;++i)
+			if (tommy_hashdyn_remove(&hashdyn, search_callback, &HASH[n/2-i-1], HASH[n/2-i-1].value) == 0)
+				abort();
 
 		tommy_hashdyn_done(&hashdyn);
 	}
 	STOP();
 
 	START("hashdyn queue");
-	limit = sqrt(MAX) / 8;
-	for(n=0;n<limit;++n) {
-		tommy_list_init(&list);
+	limit = isqrt(size) / 16;
+	for(n=0;n<=limit;++n) {
+		/* last iteration is full size */
+		if (n == limit)
+			n = limit = size;
+
 		tommy_hashdyn_init(&hashdyn);
 
 		/* insert first run */
-		for(i=0;i<n;++i) {
-			tommy_list_insert_head(&list, &HASH[i].node, &HASH[i]);
-			tommy_hashdyn_insert(&hashdyn, &HASH[i].hashnode, &HASH[i], HASH[i].value);
-		}
+		for(j=0,i=0;i<n;++i)
+			tommy_hashdyn_insert(&hashdyn, &HASH[i].node, &HASH[i], HASH[i].value);
 
-		count = 0;
-		tommy_hashdyn_foreach_arg(&hashdyn, count_arg, &count);
-		if (count != n)
+		the_count = 0;
+		tommy_hashdyn_foreach_arg(&hashdyn, count_arg_callback, &the_count);
+		if (the_count != n)
 			abort();
 
 		/* insert all the others */
-		for(;i<MAX;++i) {
-			struct object_hash* obj;
-
+		for(;i<size;++i,++j) {
 			/* insert one */
-			tommy_list_insert_head(&list, &HASH[i].node, &HASH[i]);
-			tommy_hashdyn_insert(&hashdyn, &HASH[i].hashnode, &HASH[i], HASH[i].value);
+			tommy_hashdyn_insert(&hashdyn, &HASH[i].node, &HASH[i], HASH[i].value);
 
 			/* remove one */
-			p = tommy_list_head(&list);
-			obj = p->data;
-			tommy_list_remove_existing(&list, p);
-			tommy_hashdyn_remove_existing(&hashdyn, &obj->hashnode);
+			tommy_hashdyn_remove_existing(&hashdyn, &HASH[j].node);
 		}
 
-		/* remove remaining */
-		p = tommy_list_head(&list);
-		while (p) {
-			struct object_hash* obj = p->data;
-			p = p->next;
-			tommy_hashdyn_remove_existing(&hashdyn, &obj->hashnode);
-		}
+		for(;j<size;++j)
+			if (tommy_hashdyn_remove(&hashdyn, search_callback, &HASH[j], HASH[j].value) == 0)
+				abort();
 
 		tommy_hashdyn_done(&hashdyn);
 	}
@@ -622,105 +1091,280 @@ void test_hashdyn(void)
 
 void test_hashlin(void)
 {
-	tommy_list list;
 	tommy_hashlin hashlin;
 	struct object_hash* HASH;
-	unsigned i, n;
-	tommy_node* p;
+	unsigned i, j, n;
 	unsigned limit;
-	unsigned count;
+	const unsigned size = TOMMY_SIZE;
+	const unsigned module = TOMMY_SIZE / 4;
+	tommy_hashlin_node* bucket;
+
+	HASH = malloc(size * sizeof(struct object_hash));
 
-	HASH = malloc(MAX * sizeof(struct object_hash));
+	for(i=0;i<size;++i)
+		HASH[i].value = i % module;
 
-	for(i=0;i<MAX;++i) {
-		HASH[i].value = i;
+	tommy_hashlin_init(&hashlin);
+
+	/* insert */
+	for(i=0;i<size;++i)
+		tommy_hashlin_insert(&hashlin, &HASH[i].node, &HASH[i], HASH[i].value);
+
+	/* get the bucket of the last element */
+	bucket = tommy_hashlin_bucket(&hashlin, module - 1);
+
+	/* search for element */
+	while (bucket) {
+		struct object_hash* obj = bucket->data;
+		if (obj->value == module - 1)
+			break;
+		bucket = bucket->next;
 	}
+	if (bucket == 0)
+		abort();
+
+	/* deinitialize without removing elements */
+	tommy_hashlin_done(&hashlin);
 
 	START("hashlin stack");
-	limit = 10 * sqrt(MAX);
-	for(n=0;n<limit;++n) {
-		tommy_list_init(&list);
+	limit = 5 * isqrt(size);
+	for(n=0;n<=limit;++n) {
+		/* last iteration is full size */
+		if (n == limit)
+			n = limit = size;
+
 		tommy_hashlin_init(&hashlin);
 
 		/* insert */
-		for(i=0;i<n;++i) {
-			tommy_list_insert_head(&list, &HASH[i].node, &HASH[i]);
-			tommy_hashlin_insert(&hashlin, &HASH[i].hashnode, &HASH[i], HASH[i].value);
-		}
+		for(i=0;i<n;++i)
+			tommy_hashlin_insert(&hashlin, &HASH[i].node, &HASH[i], HASH[i].value);
 
-		count = 0;
-		tommy_hashlin_foreach_arg(&hashlin, count_arg, &count);
-		if (count != n)
+		if (tommy_hashlin_memory_usage(&hashlin) < n * sizeof(void*))
 			abort();
 
-		/* remove */
-		p = tommy_list_head(&list);
-		while (p) {
-			struct object_hash* obj = p->data;
-			p = p->next;
-			tommy_hashlin_remove_existing(&hashlin, &obj->hashnode);
-		}
+		if (tommy_hashlin_count(&hashlin) != n)
+			abort();
+
+		the_count = 0;
+		tommy_hashlin_foreach(&hashlin, count_callback);
+		if (the_count != n)
+			abort();
+
+		/* remove in backward order */
+		for(i=0;i<n/2;++i)
+			tommy_hashlin_remove_existing(&hashlin, &HASH[n-i-1].node);
+
+		/* remove missing */
+		for(i=0;i<n/2;++i)
+			if (tommy_hashlin_remove(&hashlin, search_callback, &HASH[n-i-1], HASH[n-i-1].value) != 0)
+				abort();
+
+		/* remove search */
+		for(i=0;i<n/2;++i)
+			if (tommy_hashlin_remove(&hashlin, search_callback, &HASH[n/2-i-1], HASH[n/2-i-1].value) == 0)
+				abort();
 
 		tommy_hashlin_done(&hashlin);
 	}
 	STOP();
 
 	START("hashlin queue");
-	limit = sqrt(MAX) / 8;
-	for(n=0;n<limit;++n) {
-		tommy_list_init(&list);
+	limit = isqrt(size) / 16;
+	for(n=0;n<=limit;++n) {
+		/* last iteration is full size */
+		if (n == limit)
+			n = limit = size;
+
 		tommy_hashlin_init(&hashlin);
 
 		/* insert first run */
-		for(i=0;i<n;++i) {
-			tommy_list_insert_head(&list, &HASH[i].node, &HASH[i]);
-			tommy_hashlin_insert(&hashlin, &HASH[i].hashnode, &HASH[i], HASH[i].value);
-		}
+		for(j=0,i=0;i<n;++i)
+			tommy_hashlin_insert(&hashlin, &HASH[i].node, &HASH[i], HASH[i].value);
 
-		count = 0;
-		tommy_hashlin_foreach_arg(&hashlin, count_arg, &count);
-		if (count != n)
+		the_count = 0;
+		tommy_hashlin_foreach_arg(&hashlin, count_arg_callback, &the_count);
+		if (the_count != n)
 			abort();
 
 		/* insert all the others */
-		for(;i<MAX;++i) {
-			struct object_hash* obj;
-
+		for(;i<size;++i,++j) {
 			/* insert one */
-			tommy_list_insert_head(&list, &HASH[i].node, &HASH[i]);
-			tommy_hashlin_insert(&hashlin, &HASH[i].hashnode, &HASH[i], HASH[i].value);
+			tommy_hashlin_insert(&hashlin, &HASH[i].node, &HASH[i], HASH[i].value);
 
 			/* remove one */
-			p = tommy_list_head(&list);
-			obj = p->data;
-			tommy_list_remove_existing(&list, p);
-			tommy_hashlin_remove_existing(&hashlin, &obj->hashnode);
+			tommy_hashlin_remove_existing(&hashlin, &HASH[j].node);
 		}
 
-		/* remove remaining */
-		p = tommy_list_head(&list);
-		while (p) {
-			struct object_hash* obj = p->data;
-			p = p->next;
-			tommy_hashlin_remove_existing(&hashlin, &obj->hashnode);
-		}
+		for(;j<size;++j)
+			if (tommy_hashlin_remove(&hashlin, search_callback, &HASH[j], HASH[j].value) == 0)
+				abort();
 
 		tommy_hashlin_done(&hashlin);
 	}
 	STOP();
 }
 
+void test_trie(void)
+{
+	tommy_trie trie;
+	tommy_allocator alloc;
+	struct object_trie* OBJ;
+	struct object_trie DUP[2];
+	unsigned i;
+	const unsigned size = TOMMY_SIZE * 4;
+
+	OBJ = malloc(size * sizeof(struct object_trie));
+
+	for(i=0;i<size;++i)
+		OBJ[i].value = i;
+
+	START("trie");
+	tommy_allocator_init(&alloc, TOMMY_TRIE_BLOCK_SIZE, TOMMY_TRIE_BLOCK_SIZE);
+	tommy_trie_init(&trie, &alloc);
+
+	/* insert */
+	for(i=0;i<size;++i)
+		tommy_trie_insert(&trie, &OBJ[i].node, &OBJ[i], OBJ[i].value);
+
+	if (tommy_trie_memory_usage(&trie) < size * sizeof(tommy_trie_node))
+		abort();
+
+	if (tommy_allocator_memory_usage(&alloc) < trie.node_count * TOMMY_TRIE_BLOCK_SIZE)
+		abort();
+
+	if (tommy_trie_count(&trie) != size)
+		abort();
+
+	/* insert duplicate */
+	for(i=0;i<2;++i) {
+		DUP[i].value = 0;
+		tommy_trie_insert(&trie, &DUP[i].node, &DUP[i], DUP[i].value);
+	}
+
+	/* search present */
+	for(i=0;i<size/2;++i)
+		if (tommy_trie_search(&trie, OBJ[i].value) == 0)
+			abort();
+
+	/* remove first duplicate */
+	tommy_trie_remove_existing(&trie, &DUP[0].node);
+
+	/* remove existing */
+	for(i=0;i<size/2;++i)
+		tommy_trie_remove_existing(&trie, &OBJ[i].node);
+
+	/* remove missing using the same bucket of the duplicate */
+	if (tommy_trie_remove(&trie, 1) != 0)
+		abort();
+
+	/* search missing using the same bucket of the duplicate */
+	if (tommy_trie_search(&trie, 1) != 0)
+		abort();
+
+	/* remove second duplicate */
+	tommy_trie_remove_existing(&trie, &DUP[1].node);
+
+	/* remove missing */
+	for(i=0;i<size/2;++i)
+		if (tommy_trie_remove(&trie, OBJ[i].value) != 0)
+			abort();
+
+	/* search missing */
+	for(i=0;i<size/2;++i)
+		if (tommy_trie_search(&trie, OBJ[i].value) != 0)
+			abort();
+
+	/* remove present */
+	for(i=0;i<size/2;++i)
+		if (tommy_trie_remove(&trie, OBJ[size/2+i].value) == 0)
+			abort();
+
+	tommy_allocator_done(&alloc);
+	STOP();
+}
+
+void test_trie_inplace(void)
+{
+	tommy_trie_inplace trie_inplace;
+	struct object_trie_inplace* OBJ;
+	struct object_trie_inplace DUP[2];
+	unsigned i;
+	const unsigned size = TOMMY_SIZE * 4;
+
+	OBJ = malloc(size * sizeof(struct object_trie_inplace));
+
+	for(i=0;i<size;++i)
+		OBJ[i].value = i;
+
+	START("trie_inplace");
+	tommy_trie_inplace_init(&trie_inplace);
+
+	/* insert */
+	for(i=0;i<size;++i)
+		tommy_trie_inplace_insert(&trie_inplace, &OBJ[i].node, &OBJ[i], OBJ[i].value);
+
+	if (tommy_trie_inplace_memory_usage(&trie_inplace) < size * sizeof(tommy_trie_inplace_node))
+		abort();
+
+	if (tommy_trie_inplace_count(&trie_inplace) != size)
+		abort();
+
+	/* insert duplicates */
+	for(i=0;i<2;++i) {
+		DUP[i].value = 0;
+		tommy_trie_inplace_insert(&trie_inplace, &DUP[i].node, &DUP[i], DUP[i].value);
+	}
+
+	/* search present */
+	for(i=0;i<size/2;++i)
+		if (tommy_trie_inplace_search(&trie_inplace, OBJ[i].value) == 0)
+			abort();
+
+	/* remove first duplicate */
+	tommy_trie_inplace_remove_existing(&trie_inplace, &DUP[0].node);
+
+	/* remove existing */
+	for(i=0;i<size/2;++i)
+		tommy_trie_inplace_remove_existing(&trie_inplace, &OBJ[i].node);
+
+	/* remove second duplicate */
+	tommy_trie_inplace_remove_existing(&trie_inplace, &DUP[1].node);
+
+	/* remove missing */
+	for(i=0;i<size/2;++i)
+		if (tommy_trie_inplace_remove(&trie_inplace, OBJ[i].value) != 0)
+			abort();
+
+	/* search missing */
+	for(i=0;i<size/2;++i)
+		if (tommy_trie_inplace_search(&trie_inplace, OBJ[i].value) != 0)
+			abort();
+
+	/* remove present */
+	for(i=0;i<size/2;++i)
+		if (tommy_trie_inplace_remove(&trie_inplace, OBJ[size/2+i].value) == 0)
+			abort();
+
+	STOP();
+}
+
 int main() {
 	nano_init();
 
 	printf("Tommy check program.\n");
 
+	test_hash();
+	test_alloc();
 	test_list();
 	test_array();
+	test_arrayof();
 	test_arrayblk();
+	test_arrayblkof();
 	test_hashtable();
 	test_hashdyn();
 	test_hashlin();
+	test_trie();
+	test_trie_inplace();
 
 	printf("OK\n");
 
diff --git a/tommyds/tommy.h b/tommyds/tommy.h
index 75d502e..b92863f 100644
--- a/tommyds/tommy.h
+++ b/tommyds/tommy.h
@@ -27,13 +27,13 @@
 
 /** \mainpage
  * \section Introduction
- * Tommy is a C library of hashtables and tries designed to store and find objects
- * with high performance.
+ * Tommy is a C library of array, hashtables and tries data structures,
+ * designed for high performance and providing an easy to use interface.
  *
  * It's <b>faster</b> than all the similar libraries like
  * <a href="http://www.canonware.com/rb/">rbtree</a>,
  * <a href="http://judy.sourceforge.net/">judy</a>,
- * <a href="http://code.google.com/p/cpp-btree/">googlebtree</a>
+ * <a href="http://code.google.com/p/cpp-btree/">googlebtree</a>,
  * <a href="http://panthema.net/2007/stx-btree/">stxbtree</a>,
  * <a href="http://attractivechaos.awardspace.com/">khash</a>,
  * <a href="http://uthash.sourceforge.net/">uthash</a>,
@@ -59,8 +59,6 @@
  *
  * The most interesting are ::tommy_array, ::tommy_hashdyn, ::tommy_hashlin, ::tommy_trie and ::tommy_trie_inplace.
  *
- * Tommy is released with a \ref license "2-clause BSD license".
- *
  * The official site of TommyDS is <a href="http://tommyds.sourceforge.net/">http://tommyds.sourceforge.net/</a>,
  *
  * \section Use
@@ -132,46 +130,35 @@
  * To compute the hash value, you can use the generic tommy_hash_u32() function, or the
  * specialized integer hash function tommy_inthash_u32().
  *
- * \section Performance
- * Here you can see some timings comparing with other natable implementations in the <i>Hit</i>
- * and <i>Change</i> graphs. Hit means searching an object with a key with success,
- * and Change means searching, removing and reinsert it with a different key value.
- *
- * Times are expressed in nanoseconds for element, and <b>lower is better</b>.
- *
- * To have some reference numbers, you can check <a href="https://gist.github.com/jboner/2841832">Latency numbers every programmer should know</a>.
- *
- * A complete analysis is available in the \ref benchmark page.
- *
- * <img src="def/img_random_hit.png"/>
- *
- * <img src="def/img_random_change.png"/>
- *
  * \section Features
  *
  * Tommy is fast and easy to use.
  *
- * Tommy is 100% portable in all the platforms and operating systems.
+ * Tommy is portable to all platforms and operating systems.
  *
  * Tommy containers support multiple elements with the same key.
  *
- * See the \ref design page for more details.
+ * Tommy containers keep the original insertion order of elements with equal keys.
  *
- * \section Limitations
+ * Tommy is released with the \ref license "2-clause BSD license".
  *
- * Tommy is not thread safe. You have always to provide thread safety using
- * locks before calling any Tommy functions.
+ * See the \ref design page for more details and limitations.
  *
- * Tommy doesn't provide iterators over the implicit order defined by the data
- * structures. To iterate on elements you must insert them also into a ::tommy_list,
- * and use the list as iterator. See the \ref multiindex example for more details.
- * Note that this is a real limitation only for ::tommy_trie, as it's the only
- * data structure defining an useable order.
+ * \section Performance
+ * Here you can see some timings comparing with other notable implementations.
+ * The <i>Hit</i> graph shows the time required for searching random objects with a key.
+ * The <i>Change</i> graph shows the time required for searching, removing and reinsert random objects
+ * with a different key value.
  *
- * Tommy doesn't provide an error reporting mechanism for a malloc() failure.
- * You have to provide it redefining malloc() if you expect it to fail.
+ * Times are expressed in nanoseconds for each element, and <b>lower is better</b>.
  *
- * Tommy assumes to never have more than 2^32-1 elements in a container.
+ * To have some reference numbers, you can check <a href="https://gist.github.com/jboner/2841832">Latency numbers every programmer should know</a>.
+ *
+ * A complete analysis is available in the \ref benchmark page.
+ *
+ * <img src="def/img_random_hit.png"/>
+ *
+ * <img src="def/img_random_change.png"/>
  *
  * \page benchmark Tommy Benchmarks
  *
@@ -250,6 +237,11 @@
  * starting at 0. Using a 0 base would have given an unfair advantage to some
  * implementation handling it as a special case.
  *
+ * For all the hashtables the keys are hashed using the tommy_inthash_u32() function
+ * that ensures an uniform distribution. This hash function is also reversible,
+ * meaning that no collision is going to be caused by hashing the keys.
+ * For tries and trees the keys are not hashed, and used directly.
+ *
  * The tests are repeated using keys in <i>Random</i> mode and in <i>Forward</i> mode.
  * In the forward mode the key values are used in order from the lowest to the highest.
  * In the random mode the key values are used in a completely random order.
@@ -336,7 +328,7 @@
  * This is possible if you know in advance the distribution of keys.
  * For example, in the benchmark you could use something like:
  * \code
- * #define hash(v) tommy_inthash32(v & ~0xF) + (v & 0xF)
+ * #define hash(v) tommy_inthash_u32(v & ~0xF) + (v & 0xF)
  * \endcode
  * and make keys that differ only by the lowest bits
  * to have hashes with the same property, resulting in
@@ -379,8 +371,8 @@
  * \section code Code
  *
  * The compilers used in the benchmark are:
- *  - <b>gcc 4.7.1</b> in Linux with options: -O3 -march=pentium4 -mtune=generic
- *  - <b>Visual C 2012</b> in Windows with options: /Ox /GL /GS-
+ *  - <b>gcc 4.9.2</b> in Linux with options: -O3 -march=nehalem
+ *  - <b>Visual C 2012</b> in Windows with options: /Ox /Oy- /GL /GS- /arch:SSE2
  *
  * The following is pseudo code of the benchmark used. In this case it's written for the C++ unordered_map.
  *
@@ -525,7 +517,7 @@
  * \subsection googlelibchash Google C libchash
  * It's the C implementation located in the <i>experimental/</i> directory of the googlesparsehash archive.
  * It has very bad performances in the <i>Change</i> test for some N values.
- * See this <a href="other/slow_problem.png">graph</a> with a lot of spikes.
+ * See this <a href="other/googlelibchash_problem.png">graph</a> with a lot of spikes.
  * The C++ version doesn't suffer of this problem.
  *
  * \subsection googledensehash Google C++ densehash
@@ -551,7 +543,7 @@
  *
  * \subsection ck Concurrency Kit
  * It has very bad performances in the <i>Change</i> test for some N values.
- * See this <a href="other/slow_problem.png">graph</a> with a lot of spikes.
+ * See this <a href="other/ck_problem.png">graph</a> with a lot of spikes.
  *
  * \page multiindex Tommy Multi Indexing
  *
@@ -560,7 +552,7 @@
  * and use the list as iterator.
  *
  * This technique allows to keep track of the insertion order with the list,
- * and provide more search possibilities using different data structures for
+ * and provides more search possibilities using different data structures for
  * different search keys.
  *
  * See the next example, for a objects inserted in a ::tommy_list, and in
@@ -646,50 +638,78 @@
  *
  * \page design Tommy Design
  *
- * Tommy is mainly designed to provide high performance, but much care was
- * also given in the definition of an useable API. In case, even making some
- * compromise with efficency.
+ * Tommy is designed to fulfill the need of generic data structures for the
+ * C language, providing at the same time high performance and a clean
+ * and easy to use interface.
  *
- * \section multi Multi key
- * All the Tommy containers support the insertion of multiple elements with
- * the same key.
+ * \section testing Testing
+ *
+ * Extensive and automated tests with the runtime checker <a href="http://valgrind.org/">valgrind</a>
+ * and the static analyzer <a href="http://clang-analyzer.llvm.org/">clang</a>
+ * are done to ensure the correctness of the library.
+ *
+ * The test has a <a href="http://tommyds.sourceforge.net/cov/tommyds/tommyds">code coverage of 100%</a>,
+ * measured with <a href="http://ltp.sourceforge.net/coverage/lcov.php">lcov</a>.
+ *
+ * \section Limitations
+ *
+ * Tommy is not thread safe. You have always to provide thread safety using
+ * locks before calling any Tommy functions.
  *
- * This allow the maximum flexibility, but in some cases it requires some
- * more space to keep a list of equal elements.
+ * Tommy doesn't provide iterators over the implicit order defined by the data
+ * structures. To iterate on elements you must insert them also into a ::tommy_list,
+ * and use the list as iterator. See the \ref multiindex example for more details.
+ * Note that this is a real limitation only for ::tommy_trie, as it's the only
+ * data structure defining an useable order.
  *
- * \section datapointer Data pointer
- * The tommy_node::data field is present to provide a simpler API.
+ * Tommy doesn't provide an error reporting mechanism for a malloc() failure.
+ * You have to provide it redefining malloc() if you expect it to fail.
  *
- * A more memory conservative approach is to do not store this pointer, and
- * computing it from the embedded node pointer every time.
+ * Tommy assumes to never have more than 2^32-1 elements in a container.
+ *
+ * \section compromise Compromises
+ *
+ * Finding the right balance between efficency and easy to use, required some
+ * comprimises, mostly on memory efficency, to avoid to cripple the interface.
+ * The following is a list of such decisions.
  *
- * See for example the Linux Kernel declaration of container_of() at
- * http://lxr.free-electrons.com/ident?i=container_of
+ * \subsection multi_key Multi key
+ * All the Tommy containers support the insertion of multiple elements with
+ * the same key, adding in each node a list of equal elements.
  *
- * Although, it would have required more complexity for the user to require
- * a manual conversion from a node to the object containing the node.
+ * They are the equivalent at the C++ associative containers <a href="http://www.cplusplus.com/reference/map/multimap/">multimap\<unsigned,void*\></a>
+ * and <a href="http://www.cplusplus.com/reference/unordered_map/unordered_multimap/">unordered_multimap\<unsigned,void*\></a>
+ * that allow duplicates of the same key.
  *
- * \section zero_list Zero terminated next list
- * The half 0 terminated format of tommy_node::next is present to provide
- * a forward iterator terminating in 0.
+ * A more memory conservative approach is to not allow duplicated elements,
+ * removing the need of this list.
  *
- * A more efficient approach is to use a double circular list, as operating on
- * nodes in a circular list doesn't requires to manage the special terminating
- * case.
+ * \subsection data_pointer Data pointer
+ * The tommy_node::data field is present to allow search and remove functions to return
+ * directly a pointer at the element stored in the container.
  *
- * Although, it would have required more complexity at the user for a simple
- * iteration.
+ * A more memory conservative approach is to require the user to compute
+ * the element pointer from the embedded node with a fixed displacement.
+ * For an example, see the Linux Kernel declaration of
+ * <a href="http://lxr.free-electrons.com/ident?i=container_of">container_of()</a>.
  *
- * \section double_linked Double linked list for collisions
- * The linked list used for collision is a double linked list to allow
- * insertion of elements at the end of the list to keep the insertion order
- * of equal elements.
+ * \subsection insertion_order Insertion order
+ * The list used for collisions is double linked to allow
+ * insertion of elements at the end of the list to keep the
+ * insertion order of equal elements.
  *
  * A more memory conservative approach is to use a single linked list,
- * inserting elements only at the start of the list.
- * On the other hand, with with a double linked list we can concatenate
- * two lists in constant time, as using the previous circular element we
- * can get a tail pointer.
+ * inserting elements only at the start of the list, losing the
+ * original insertion order.
+ *
+ * \subsection zero_list Zero terminated list
+ * The 0 terminated format of tommy_node::next is present to provide
+ * a forward iterator terminating in 0. This allows the user to write a simple
+ * iteration loop over the list of elements in the same bucket.
+ *
+ * A more efficient approach is to use a circular list, as operating on
+ * nodes in a circular list doesn't requires to manage the special
+ * terminating case when adding or removing elements.
  *
  * \page license Tommy License
  * Tommy is released with a <i>2-clause BSD license</i>.
diff --git a/tommyds/tommyalloc.c b/tommyds/tommyalloc.c
index 8a0fa6b..2c51365 100644
--- a/tommyds/tommyalloc.c
+++ b/tommyds/tommyalloc.c
@@ -44,9 +44,8 @@ void tommy_allocator_init(tommy_allocator* alloc, tommy_size_t block_size, tommy
 		align_size = sizeof(void*);
 
 	/* ensure that the block_size keeps the alignment */
-	if (block_size % align_size != 0) {
+	if (block_size % align_size != 0)
 		block_size += align_size - block_size % align_size;
-	}
 
 	alloc->block_size = block_size;
 	alloc->align_size = align_size;
diff --git a/tommyds/tommychain.h b/tommyds/tommychain.h
index a9c05f8..71d3ccc 100644
--- a/tommyds/tommychain.h
+++ b/tommyds/tommychain.h
@@ -210,11 +210,10 @@ tommy_inline void tommy_chain_mergesort(tommy_chain* chain, tommy_compare_func*
 	mask = counter >> i;
 	while (mask != 1) {
 		mask >>= 1;
-		if (mask & 1) {
+		if (mask & 1)
 			tommy_chain_merge_degenerated(&bit[i + 1], &bit[i], cmp);
-		} else {
+		else
 			bit[i + 1] = bit[i];
-		}
 		++i;
 	}
 
diff --git a/tommyds/tommyhash.h b/tommyds/tommyhash.h
index 329ef9d..33317b4 100644
--- a/tommyds/tommyhash.h
+++ b/tommyds/tommyhash.h
@@ -48,7 +48,7 @@ typedef tommy_key_t tommy_hash_t;
  * from http://www.burtleburtle.net/bob/hash/doobs.html, function hashlittle().
  * \param init_val Initialization value.
  * Using a different initialization value, you can generate a completely different set of hash values.
- * Use 0 if not relevalt.
+ * Use 0 if not relevant.
  * \param void_key Pointer at the data to hash.
  * \param key_len Size of the data to hash.
  * \note
@@ -63,7 +63,7 @@ tommy_uint32_t tommy_hash_u32(tommy_uint32_t init_val, const void* void_key, tom
  * from http://www.burtleburtle.net/bob/hash/doobs.html, function hashlittle2().
  * \param init_val Initialization value.
  * Using a different initialization value, you can generate a completely different set of hash values.
- * Use 0 if not relevalt.
+ * Use 0 if not relevant.
  * \param void_key Pointer at the data to hash.
  * \param key_len Size of the data to hash.
  * \note
@@ -73,7 +73,7 @@ tommy_uint32_t tommy_hash_u32(tommy_uint32_t init_val, const void* void_key, tom
 tommy_uint64_t tommy_hash_u64(tommy_uint64_t init_val, const void* void_key, tommy_size_t key_len);
 
 /**
- * Integer hash of 32 bits.
+ * Integer reversible hash function for 32 bits.
  * Implementation of the Robert Jenkins "4-byte Integer Hashing",
  * from http://burtleburtle.net/bob/hash/integer.html
  */
@@ -91,9 +91,9 @@ tommy_inline tommy_uint32_t tommy_inthash_u32(tommy_uint32_t key)
 }
 
 /**
- * Integer hash of 64 bits.
+ * Integer reversible hash function for 64 bits.
  * Implementation of the Thomas Wang "Integer Hash Function",
- * from http://www.cris.com/~Ttwang/tech/inthash.htm
+ * from http://web.archive.org/web/20071223173210/http://www.concentric.net/~Ttwang/tech/inthash.htm
  */
 tommy_inline tommy_uint64_t tommy_inthash_u64(tommy_uint64_t key)
 {
diff --git a/tommyds/tommyhashdyn.c b/tommyds/tommyhashdyn.c
index 4da3fe6..02ef404 100644
--- a/tommyds/tommyhashdyn.c
+++ b/tommyds/tommyhashdyn.c
@@ -84,11 +84,11 @@ static void tommy_hashdyn_resize(tommy_hashdyn* hashdyn, tommy_count_t new_bucke
 			j = hashdyn->bucket[i];
 			while (j) {
 				tommy_hashdyn_node* j_next = j->next;
-				tommy_count_t index = j->key & new_bucket_mask;
-				if (new_bucket[index])
-					tommy_list_insert_tail_not_empty(new_bucket[index], j);
+				tommy_count_t pos = j->key & new_bucket_mask;
+				if (new_bucket[pos])
+					tommy_list_insert_tail_not_empty(new_bucket[pos], j);
 				else
-					tommy_list_insert_first(&new_bucket[index], j);
+					tommy_list_insert_first(&new_bucket[pos], j);
 				j = j_next;
 			}
 		}
@@ -120,9 +120,8 @@ static void tommy_hashdyn_resize(tommy_hashdyn* hashdyn, tommy_count_t new_bucke
 tommy_inline void hashdyn_grow_step(tommy_hashdyn* hashdyn)
 {
 	/* grow if more than 50% full */
-	if (hashdyn->count >= hashdyn->bucket_max / 2) {
+	if (hashdyn->count >= hashdyn->bucket_max / 2)
 		tommy_hashdyn_resize(hashdyn, hashdyn->bucket_bit + 1);
-	}
 }
 
 /**
@@ -131,9 +130,8 @@ tommy_inline void hashdyn_grow_step(tommy_hashdyn* hashdyn)
 tommy_inline void hashdyn_shrink_step(tommy_hashdyn* hashdyn)
 {
 	/* shrink if less than 12.5% full */
-	if (hashdyn->count <= hashdyn->bucket_max / 8 && hashdyn->bucket_bit > TOMMY_HASHDYN_BIT) {
+	if (hashdyn->count <= hashdyn->bucket_max / 8 && hashdyn->bucket_bit > TOMMY_HASHDYN_BIT)
 		tommy_hashdyn_resize(hashdyn, hashdyn->bucket_bit - 1);
-	}
 }
 
 void tommy_hashdyn_insert(tommy_hashdyn* hashdyn, tommy_hashdyn_node* node, void* data, tommy_hash_t hash)
@@ -164,7 +162,7 @@ void* tommy_hashdyn_remove_existing(tommy_hashdyn* hashdyn, tommy_hashdyn_node*
 
 void* tommy_hashdyn_remove(tommy_hashdyn* hashdyn, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash)
 {
-	tommy_count_t pos = hash % hashdyn->bucket_max;
+	tommy_count_t pos = hash & hashdyn->bucket_mask;
 	tommy_hashdyn_node* node = hashdyn->bucket[pos];
 
 	while (node) {
diff --git a/tommyds/tommyhashdyn.h b/tommyds/tommyhashdyn.h
index 11f2447..afbdfab 100644
--- a/tommyds/tommyhashdyn.h
+++ b/tommyds/tommyhashdyn.h
@@ -189,7 +189,6 @@ void tommy_hashdyn_insert(tommy_hashdyn* hashdyn, tommy_hashdyn_node* node, void
  * You have to provide a compare function and the hash of the element you want to remove.
  * If the element is not found, 0 is returned.
  * If more equal elements are present, the first one is removed.
- * This operation is faster than calling tommy_hashdyn_bucket() and tommy_hashdyn_remove_existing() separately.
  * \param cmp Compare function called with cmp_arg as first argument and with the element to compare as a second one.
  * The function should return 0 for equal elements, anything other for different elements.
  * \param cmp_arg Compare argument passed as first argument of the compare function.
@@ -274,7 +273,7 @@ void* tommy_hashdyn_remove_existing(tommy_hashdyn* hashdyn, tommy_hashdyn_node*
 void tommy_hashdyn_foreach(tommy_hashdyn* hashdyn, tommy_foreach_func* func);
 
 /**
- * Calls the specified function with argument for each element in the hashtable.
+ * Calls the specified function with an argument for each element in the hashtable.
  */
 void tommy_hashdyn_foreach_arg(tommy_hashdyn* hashdyn, tommy_foreach_arg_func* func, void* arg);
 
diff --git a/tommyds/tommyhashlin.c b/tommyds/tommyhashlin.c
index bea9403..bcf7d98 100644
--- a/tommyds/tommyhashlin.c
+++ b/tommyds/tommyhashlin.c
@@ -40,6 +40,20 @@
 #define TOMMY_HASHLIN_STATE_GROW 1
 #define TOMMY_HASHLIN_STATE_SHRINK 2
 
+/**
+ * Set the hashtable in stable state.
+ */
+tommy_inline void tommy_hashlin_stable(tommy_hashlin* hashlin)
+{
+	hashlin->state = TOMMY_HASHLIN_STATE_STABLE;
+
+	/* setup low_mask/max/split to allow tommy_hashlin_bucket_ref() */
+	/* and tommy_hashlin_foreach() to work regardless we are in stable state */
+	hashlin->low_max = hashlin->bucket_max;
+	hashlin->low_mask = hashlin->bucket_mask;
+	hashlin->split = 0;
+}
+
 void tommy_hashlin_init(tommy_hashlin* hashlin)
 {
 	tommy_uint_t i;
@@ -53,7 +67,7 @@ void tommy_hashlin_init(tommy_hashlin* hashlin)
 		hashlin->bucket[i] = hashlin->bucket[0];
 
 	/* stable state */
-	hashlin->state = TOMMY_HASHLIN_STATE_STABLE;
+	tommy_hashlin_stable(hashlin);
 
 	hashlin->count = 0;
 }
@@ -69,45 +83,6 @@ void tommy_hashlin_done(tommy_hashlin* hashlin)
 	}
 }
 
-/**
- * Return the bucket at the specified pos.
- */
-tommy_inline tommy_hashlin_node** tommy_hashlin_pos(tommy_hashlin* hashlin, tommy_hash_t pos)
-{
-	tommy_uint_t bsr;
-
-	/* get the highest bit set, in case of all 0, return 0 */
-	bsr = tommy_ilog2_u32(pos | 1);
-
-	return &hashlin->bucket[bsr][pos];
-}
-
-/**
- * Return the bucket to use.
- */
-tommy_inline tommy_hashlin_node** tommy_hashlin_bucket_ptr(tommy_hashlin* hashlin, tommy_hash_t hash)
-{
-	tommy_count_t pos;
-
-	/* if we are reallocating */
-	if (hashlin->state != TOMMY_HASHLIN_STATE_STABLE) {
-		/* compute the old position */
-		pos = hash & hashlin->low_mask;
-
-		/* if we have not reallocated this position yet */
-		if (pos >= hashlin->split) {
-
-			/* use it as it was before */
-			return tommy_hashlin_pos(hashlin, pos);
-		}
-	}
-
-	/* otherwise operates normally */
-	pos = hash & hashlin->bucket_mask;
-
-	return tommy_hashlin_pos(hashlin, pos);
-}
-
 /**
  * Grow one step.
  */
@@ -135,7 +110,7 @@ tommy_inline void hashlin_grow_step(tommy_hashlin* hashlin)
 			/* cast to ptrdiff_t to ensure to get a negative value */
 			hashlin->bucket[hashlin->bucket_bit] = &segment[-(tommy_ptrdiff_t)hashlin->low_max];
 
-			/* grow the hash size and allocate */
+			/* grow the hash size */
 			++hashlin->bucket_bit;
 			hashlin->bucket_max = 1 << hashlin->bucket_bit;
 			hashlin->bucket_mask = hashlin->bucket_max - 1;
@@ -172,17 +147,17 @@ tommy_inline void hashlin_grow_step(tommy_hashlin* hashlin)
 			*split[0] = 0;
 			*split[1] = 0;
 
-			/* compute the bit to identify the bucket */
-			mask = hashlin->bucket_mask & ~hashlin->low_mask;
+			/* the bit used to identify the bucket */
+			mask = hashlin->low_max;
 
 			/* flush the bucket */
 			while (j) {
 				tommy_hashlin_node* j_next = j->next;
-				tommy_count_t index = (j->key & mask) != 0;
-				if (*split[index])
-					tommy_list_insert_tail_not_empty(*split[index], j);
+				tommy_count_t pos = (j->key & mask) != 0;
+				if (*split[pos])
+					tommy_list_insert_tail_not_empty(*split[pos], j);
 				else
-					tommy_list_insert_first(split[index], j);
+					tommy_list_insert_first(split[pos], j);
 				j = j_next;
 			}
 
@@ -191,7 +166,8 @@ tommy_inline void hashlin_grow_step(tommy_hashlin* hashlin)
 
 			/* if we have finished, change the state */
 			if (hashlin->split == hashlin->low_max) {
-				hashlin->state = TOMMY_HASHLIN_STATE_STABLE;
+				/* go in stable mode */
+				tommy_hashlin_stable(hashlin);
 				break;
 			}
 		}
@@ -251,8 +227,6 @@ tommy_inline void hashlin_shrink_step(tommy_hashlin* hashlin)
 			if (hashlin->split == 0) {
 				tommy_hashlin_node** segment;
 
-				hashlin->state = TOMMY_HASHLIN_STATE_STABLE;
-
 				/* shrink the hash size */
 				--hashlin->bucket_bit;
 				hashlin->bucket_max = 1 << hashlin->bucket_bit;
@@ -261,6 +235,9 @@ tommy_inline void hashlin_shrink_step(tommy_hashlin* hashlin)
 				/* free the last segment */
 				segment = hashlin->bucket[hashlin->bucket_bit];
 				tommy_free(&segment[((tommy_ptrdiff_t)1) << hashlin->bucket_bit]);
+
+				/* go in stable mode */
+				tommy_hashlin_stable(hashlin);
 				break;
 			}
 		}
@@ -269,7 +246,7 @@ tommy_inline void hashlin_shrink_step(tommy_hashlin* hashlin)
 
 void tommy_hashlin_insert(tommy_hashlin* hashlin, tommy_hashlin_node* node, void* data, tommy_hash_t hash)
 {
-	tommy_list_insert_tail(tommy_hashlin_bucket_ptr(hashlin, hash), node, data);
+	tommy_list_insert_tail(tommy_hashlin_bucket_ref(hashlin, hash), node, data);
 
 	node->key = hash;
 
@@ -280,7 +257,7 @@ void tommy_hashlin_insert(tommy_hashlin* hashlin, tommy_hashlin_node* node, void
 
 void* tommy_hashlin_remove_existing(tommy_hashlin* hashlin, tommy_hashlin_node* node)
 {
-	tommy_list_remove_existing(tommy_hashlin_bucket_ptr(hashlin, node->key), node);
+	tommy_list_remove_existing(tommy_hashlin_bucket_ref(hashlin, node->key), node);
 
 	--hashlin->count;
 
@@ -289,14 +266,9 @@ void* tommy_hashlin_remove_existing(tommy_hashlin* hashlin, tommy_hashlin_node*
 	return node->data;
 }
 
-tommy_hashlin_node* tommy_hashlin_bucket(tommy_hashlin* hashlin, tommy_hash_t hash)
-{
-	return *tommy_hashlin_bucket_ptr(hashlin, hash);
-}
-
 void* tommy_hashlin_remove(tommy_hashlin* hashlin, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash)
 {
-	tommy_hashlin_node** let_ptr = tommy_hashlin_bucket_ptr(hashlin, hash);
+	tommy_hashlin_node** let_ptr = tommy_hashlin_bucket_ref(hashlin, hash);
 	tommy_hashlin_node* node = *let_ptr;
 
 	while (node) {
@@ -321,12 +293,8 @@ void tommy_hashlin_foreach(tommy_hashlin* hashlin, tommy_foreach_func* func)
 	tommy_count_t bucket_max;
 	tommy_count_t pos;
 
-	/* if we are reallocating */
-	if (hashlin->state != TOMMY_HASHLIN_STATE_STABLE) {
-		bucket_max = hashlin->low_max + hashlin->split;
-	} else {
-		bucket_max = hashlin->bucket_max;
-	}
+	/* number of valid buckets */
+	bucket_max = hashlin->low_max + hashlin->split;
 
 	for (pos = 0; pos < bucket_max; ++pos) {
 		tommy_hashlin_node* node = *tommy_hashlin_pos(hashlin, pos);
@@ -344,12 +312,8 @@ void tommy_hashlin_foreach_arg(tommy_hashlin* hashlin, tommy_foreach_arg_func* f
 	tommy_count_t bucket_max;
 	tommy_count_t pos;
 
-	/* if we are reallocating */
-	if (hashlin->state != TOMMY_HASHLIN_STATE_STABLE) {
-		bucket_max = hashlin->low_max + hashlin->split;
-	} else {
-		bucket_max = hashlin->bucket_max;
-	}
+	/* number of valid buckets */
+	bucket_max = hashlin->low_max + hashlin->split;
 
 	for (pos = 0; pos < bucket_max; ++pos) {
 		tommy_hashlin_node* node = *tommy_hashlin_pos(hashlin, pos);
diff --git a/tommyds/tommyhashlin.h b/tommyds/tommyhashlin.h
index 0ba3a21..477b1e7 100644
--- a/tommyds/tommyhashlin.h
+++ b/tommyds/tommyhashlin.h
@@ -201,7 +201,6 @@ void tommy_hashlin_insert(tommy_hashlin* hashlin, tommy_hashlin_node* node, void
  * You have to provide a compare function and the hash of the element you want to remove.
  * If the element is not found, 0 is returned.
  * If more equal elements are present, the first one is removed.
- * This operation is faster than calling tommy_hashlin_bucket() and tommy_hashlin_remove_existing() separately.
  * \param cmp Compare function called with cmp_arg as first argument and with the element to compare as a second one.
  * The function should return 0 for equal elements, anything other for different elements.
  * \param cmp_arg Compare argument passed as first argument of the compare function.
@@ -210,6 +209,48 @@ void tommy_hashlin_insert(tommy_hashlin* hashlin, tommy_hashlin_node* node, void
  */
 void* tommy_hashlin_remove(tommy_hashlin* hashlin, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash);
 
+/** \internal
+ * Returns the bucket at the specified position.
+ */
+tommy_inline tommy_hashlin_node** tommy_hashlin_pos(tommy_hashlin* hashlin, tommy_hash_t pos)
+{
+	tommy_uint_t bsr;
+
+	/* get the highest bit set, in case of all 0, return 0 */
+	bsr = tommy_ilog2_u32(pos | 1);
+
+	return &hashlin->bucket[bsr][pos];
+}
+
+/** \internal
+ * Returns a pointer to the bucket of the specified hash.
+ */
+tommy_inline tommy_hashlin_node** tommy_hashlin_bucket_ref(tommy_hashlin* hashlin, tommy_hash_t hash)
+{
+	tommy_count_t pos;
+	tommy_count_t high_pos;
+
+	pos = hash & hashlin->low_mask;
+	high_pos = hash & hashlin->bucket_mask;
+
+	/* if this position is already allocated in the high half */
+	if (pos < hashlin->split) {
+		/* The following assigment is expected to be implemented */
+		/* with a conditional move instruction */
+		/* that results in a little better and constant performance */
+		/* regardless of the split position. */
+		/* This affects mostly the worst case, when the split value */
+		/* is near at its half, resulting in a totally unpredictable */
+		/* condition by the CPU. */
+		/* In such case the use of the conditional move is generally faster. */
+
+		/* use also the high bit */
+		pos = high_pos;
+	}
+
+	return tommy_hashlin_pos(hashlin, pos);
+}
+
 /**
  * Gets the bucket of the specified hash.
  * The bucket is guaranteed to contain ALL the elements with the specified hash,
@@ -218,7 +259,10 @@ void* tommy_hashlin_remove(tommy_hashlin* hashlin, tommy_search_func* cmp, const
  * \param hash Hash of the element to find.
  * \return The head of the bucket, or 0 if empty.
  */
-tommy_hashlin_node* tommy_hashlin_bucket(tommy_hashlin* hashlin, tommy_hash_t hash);
+tommy_inline tommy_hashlin_node* tommy_hashlin_bucket(tommy_hashlin* hashlin, tommy_hash_t hash)
+{
+	return *tommy_hashlin_bucket_ref(hashlin, hash);
+}
 
 /**
  * Searches an element in the hashtable.
@@ -283,7 +327,7 @@ void* tommy_hashlin_remove_existing(tommy_hashlin* hashlin, tommy_hashlin_node*
 void tommy_hashlin_foreach(tommy_hashlin* hashlin, tommy_foreach_func* func);
 
 /**
- * Calls the specified function with argument for each element in the hashtable.
+ * Calls the specified function with an argument for each element in the hashtable.
  */
 void tommy_hashlin_foreach_arg(tommy_hashlin* hashlin, tommy_foreach_arg_func* func, void* arg);
 
diff --git a/tommyds/tommyhashtbl.c b/tommyds/tommyhashtbl.c
index f8b8fd5..007b8ec 100644
--- a/tommyds/tommyhashtbl.c
+++ b/tommyds/tommyhashtbl.c
@@ -35,11 +35,10 @@
 
 void tommy_hashtable_init(tommy_hashtable* hashtable, tommy_count_t bucket_max)
 {
-	if (bucket_max < 16) {
+	if (bucket_max < 16)
 		bucket_max = 16;
-	} else {
+	else
 		bucket_max = tommy_roundup_pow2_u32(bucket_max);
-	}
 
 	hashtable->bucket_max = bucket_max;
 	hashtable->bucket_mask = hashtable->bucket_max - 1;
diff --git a/tommyds/tommyhashtbl.h b/tommyds/tommyhashtbl.h
index 75e6e4a..76e8062 100644
--- a/tommyds/tommyhashtbl.h
+++ b/tommyds/tommyhashtbl.h
@@ -173,7 +173,6 @@ void tommy_hashtable_insert(tommy_hashtable* hashtable, tommy_hashtable_node* no
  * You have to provide a compare function and the hash of the element you want to remove.
  * If the element is not found, 0 is returned.
  * If more equal elements are present, the first one is removed.
- * This operation is faster than calling tommy_hashtable_bucket() and tommy_hashtable_remove_existing() separately.
  * \param cmp Compare function called with cmp_arg as first argument and with the element to compare as a second one.
  * The function should return 0 for equal elements, anything other for different elements.
  * \param cmp_arg Compare argument passed as first argument of the compare function.
@@ -258,7 +257,7 @@ void* tommy_hashtable_remove_existing(tommy_hashtable* hashtable, tommy_hashtabl
 void tommy_hashtable_foreach(tommy_hashtable* hashtable, tommy_foreach_func* func);
 
 /**
- * Calls the specified function with argument for each element in the hashtable.
+ * Calls the specified function with an argument for each element in the hashtable.
  */
 void tommy_hashtable_foreach_arg(tommy_hashtable* hashtable, tommy_foreach_arg_func* func, void* arg);
 
diff --git a/tommyds/tommylist.c b/tommyds/tommylist.c
index 891d655..ea85b08 100644
--- a/tommyds/tommylist.c
+++ b/tommyds/tommylist.c
@@ -34,9 +34,8 @@ void tommy_list_concat(tommy_list* first, tommy_list* second)
 	tommy_node* first_tail;
 	tommy_node* second_head;
 
-	if (tommy_list_empty(second)) {
+	if (tommy_list_empty(second))
 		return;
-	}
 
 	if (tommy_list_empty(first)) {
 		*first = *second;
diff --git a/tommyds/tommylist.h b/tommyds/tommylist.h
index 98e4303..8004a44 100644
--- a/tommyds/tommylist.h
+++ b/tommyds/tommylist.h
@@ -28,18 +28,18 @@
 /** \file
  * Double linked list for collisions into hashtables.
  *
- * This list is a double linked list mainly targetted for collisions into an hashtables,
- * but useable also as a generic list.
+ * This list is a double linked list mainly targetted for handling collisions
+ * into an hashtables, but useable also as a generic list.
  *
  * The main feature of this list is to require only one pointer to represent the
- * list, compared to a classic implementation requiring an head an a tail pointers.
+ * list, compared to a classic implementation requiring a head an a tail pointers.
  * This reduces the memory usage in hashtables.
  *
  * Another feature is to support the insertion at the end of the list. This allow to store
  * collisions in a stable order. Where for stable order we mean that equal elements keep
  * their insertion order.
  *
- * To initialize the list, you have to call tommy_list_init(), or simply assign
+ * To initialize the list, you have to call tommy_list_init(), or to simply assign
  * to it NULL, as an empty list is represented by the NULL value.
  *
  * \code
@@ -69,7 +69,7 @@
  * tommy_list_insert_tail(&list, &obj->node, obj); // inserts the object
  * \endcode
  *
- * To iterates over all the elements in the list you have to call
+ * To iterate over all the elements in the list you have to call
  * tommy_list_head() to get the head of the list and follow the
  * tommy_node::next pointer until NULL.
  *
@@ -84,18 +84,13 @@
  * }
  * \endcode
  *
- * To destroy the list you have only to remove all the elements, as the list is
- * completely inplace and it doesn't allocate memory.
+ * To destroy the list you have to remove all the elements,
+ * as the list is completely inplace and it doesn't allocate memory.
+ * This can be done with the tommy_list_foreach() function.
  *
  * \code
- * tommy_node* i = tommy_list_head(&list);
- * while (i) {
- *     tommy_node* i_next = i->next; // saves the next element before freeing
- *
- *     free(i->data); // frees the object allocated memory
- *
- *     i = i_next; // goes to the next element
- * }
+ * // deallocates all the objects iterating the list
+ * tommy_list_foreach(&list, free);
  * \endcode
  */
 
@@ -206,11 +201,10 @@ tommy_inline void tommy_list_insert_head(tommy_list* list, tommy_node* node, voi
 {
 	tommy_node* head = tommy_list_head(list);
 
-	if (head) {
+	if (head)
 		tommy_list_insert_head_not_empty(list, node);
-	} else {
+	else
 		tommy_list_insert_first(list, node);
-	}
 
 	node->data = data;
 }
@@ -224,11 +218,10 @@ tommy_inline void tommy_list_insert_tail(tommy_list* list, tommy_node* node, voi
 {
 	tommy_node* head = tommy_list_head(list);
 
-	if (head) {
+	if (head)
 		tommy_list_insert_tail_not_empty(head, node);
-	} else {
+	else
 		tommy_list_insert_first(list, node);
-	}
 
 	node->data = data;
 }
@@ -264,18 +257,16 @@ tommy_inline void* tommy_list_remove_existing(tommy_list* list, tommy_node* node
 	tommy_node* head = tommy_list_head(list);
 
 	/* remove from the "circular" prev list */
-	if (node->next) {
+	if (node->next)
 		node->next->prev = node->prev;
-	} else {
+	else
 		head->prev = node->prev; /* the last */
-	}
 
 	/* remove from the "0 terminated" next list */
-	if (head == node) {
+	if (head == node)
 		*list = node->next; /* the new head, in case 0 */
-	} else {
+	else
 		node->prev->next = node->next;
-	}
 
 	return node->data;
 }
@@ -294,7 +285,7 @@ void tommy_list_concat(tommy_list* first, tommy_list* second);
  * It's a stable merge sort with O(N*log(N)) worst complexity.
  * It's faster on degenerated cases like partially ordered lists.
  * \param cmp Compare function called with two elements.
- * The function should return <0 if the first element is less than the second, == 0 if equal, and > 0 if greather.
+ * The function should return <0 if the first element is less than the second, ==0 if equal, and >0 if greather.
  */
 void tommy_list_sort(tommy_list* list, tommy_compare_func* cmp);
 
@@ -307,6 +298,23 @@ tommy_inline tommy_bool_t tommy_list_empty(tommy_list* list)
 	return tommy_list_head(list) == 0;
 }
 
+/**
+ * Gets the number of elements.
+ * \note This operation is O(n).
+ */
+tommy_inline tommy_count_t tommy_list_count(tommy_list* list)
+{
+	tommy_count_t count = 0;
+	tommy_node* i = tommy_list_head(list);
+
+	while (i) {
+		++count;
+		i = i->next;
+	}
+
+	return count;
+}
+
 /**
  * Calls the specified function for each element in the list.
  *
@@ -347,7 +355,7 @@ tommy_inline void tommy_list_foreach(tommy_list* list, tommy_foreach_func* func)
 }
 
 /**
- * Calls the specified function with argument for each element in the list.
+ * Calls the specified function with an argument for each element in the list.
  */
 tommy_inline void tommy_list_foreach_arg(tommy_list* list, tommy_foreach_arg_func* func, void* arg)
 {
diff --git a/tommyds/tommytrie.c b/tommyds/tommytrie.c
index cd3ce42..35ac219 100644
--- a/tommyds/tommytrie.c
+++ b/tommyds/tommytrie.c
@@ -127,9 +127,8 @@ static void trie_bucket_insert(tommy_trie* trie, tommy_uint_t shift, tommy_trie_
 	*let_ptr = tommy_cast(tommy_trie_node*, trie_set_tree(tree));
 
 	/* initialize it */
-	for (i = 0; i < TOMMY_TRIE_TREE_MAX; ++i) {
+	for (i = 0; i < TOMMY_TRIE_TREE_MAX; ++i)
 		tree->map[i] = 0;
-	}
 
 	/* get the position of the two elements */
 	i = (node->key >> shift) & TOMMY_TRIE_TREE_MASK;
diff --git a/tommyds/tommytrie.h b/tommyds/tommytrie.h
index 86a4f43..6bf42a0 100644
--- a/tommyds/tommytrie.h
+++ b/tommyds/tommytrie.h
@@ -111,10 +111,9 @@
  * \endcode
  *
  * To destroy the trie you have to remove all the elements, and deinitialize
- * the trie calling tommy_trie_done() and the allocator using tommy_allocator_done().
+ * the allocator using tommy_allocator_done().
  *
  * \code
- * tommy_trie_done(&trie);
  * tommy_allocator_done(&alloc);
  * \endcode
  *
@@ -244,7 +243,7 @@ tommy_inline void* tommy_trie_search(tommy_trie* trie, tommy_key_t key)
 void* tommy_trie_remove_existing(tommy_trie* trie, tommy_trie_node* node);
 
 /**
- * Returns the number of elements.
+ * Gets the number of elements.
  */
 tommy_inline tommy_count_t tommy_trie_count(tommy_trie* trie)
 {
diff --git a/tommyds/tommytrieinp.c b/tommyds/tommytrieinp.c
index bfe6089..26ec2c3 100644
--- a/tommyds/tommytrieinp.c
+++ b/tommyds/tommytrieinp.c
@@ -81,18 +81,16 @@ tommy_inline void tommy_trie_inplace_list_remove(tommy_trie_inplace_node** let_p
 	tommy_trie_inplace_node* head = *let_ptr;
 
 	/* remove from the "circular" prev list */
-	if (node->next) {
+	if (node->next)
 		node->next->prev = node->prev;
-	} else {
+	else
 		head->prev = node->prev; /* the last */
-	}
 
 	/* remove from the "0 terminated" next list */
-	if (head == node) {
+	if (head == node)
 		*let_ptr = node->next; /* the new first */
-	} else {
+	else
 		node->prev->next = node->next;
-	}
 }
 
 void tommy_trie_inplace_init(tommy_trie_inplace* trie_inplace)
@@ -162,20 +160,22 @@ static tommy_trie_inplace_node* trie_inplace_bucket_remove(tommy_uint_t shift, t
 		return 0;
 
 	/* if the node to remove is not specified */
-	if (!remove) {
-		/* remove the first */
-		remove = node;
-	}
+	if (!remove)
+		remove = node; /* remove the first */
 
 	tommy_trie_inplace_list_remove(let_ptr, remove);
 
+	/* if not change in the node, nothing more to do */
+	if (*let_ptr == node)
+		return remove;
+
 	/* if we have a substitute */
 	if (*let_ptr != 0) {
 		/* copy the child pointers to the new one */
 		node = *let_ptr;
-		for (i = 0; i < TOMMY_TRIE_INPLACE_TREE_MAX; ++i) {
+		for (i = 0; i < TOMMY_TRIE_INPLACE_TREE_MAX; ++i)
 			node->map[i] = remove->map[i];
-		}
+
 		return remove;
 	}
 
@@ -196,17 +196,15 @@ static tommy_trie_inplace_node* trie_inplace_bucket_remove(tommy_uint_t shift, t
 	}
 
 	/* if it's itself a leaf */
-	if (!leaf_let_ptr) {
+	if (!leaf_let_ptr)
 		return remove;
-	}
 
 	/* remove the leaf */
 	*leaf_let_ptr = 0;
 
 	/* copy the child pointers */
-	for (i = 0; i < TOMMY_TRIE_INPLACE_TREE_MAX; ++i) {
+	for (i = 0; i < TOMMY_TRIE_INPLACE_TREE_MAX; ++i)
 		leaf->map[i] = remove->map[i];
-	}
 
 	/* put it in place */
 	*let_ptr = leaf;
diff --git a/tommyds/tommytrieinp.h b/tommyds/tommytrieinp.h
index 317a80c..c30f676 100644
--- a/tommyds/tommytrieinp.h
+++ b/tommyds/tommytrieinp.h
@@ -222,7 +222,7 @@ tommy_inline void* tommy_trie_inplace_search(tommy_trie_inplace* trie_inplace, t
 void* tommy_trie_inplace_remove_existing(tommy_trie_inplace* trie_inplace, tommy_trie_inplace_node* node);
 
 /**
- * Returns the number of elements.
+ * Gets the number of elements.
  */
 tommy_inline tommy_count_t tommy_trie_inplace_count(tommy_trie_inplace* trie_inplace)
 {
diff --git a/tommyds/tommytypes.h b/tommyds/tommytypes.h
index 5c81f3c..45d3d1a 100644
--- a/tommyds/tommytypes.h
+++ b/tommyds/tommytypes.h
@@ -243,7 +243,7 @@ typedef int tommy_compare_func(const void* obj_a, const void* obj_b);
  * Search function for elements.
  * \param arg Pointer at the value to search.
  * \param obj Pointer at the object to compare to.
- * \return ==0 if the value matches the element. != 0 if different.
+ * \return ==0 if the value matches the element. !=0 if different.
  *
  * Note that the first argument is a pointer to the value to search and
  * the second one is a pointer to the object to compare.
@@ -310,9 +310,9 @@ typedef void tommy_foreach_arg_func(void* arg, void* obj);
  * Return the bit index of the most significant 1 bit.
  *
  * If no bit is set, the result is undefined.
- * To force a return 0 in this case, you can use tommy_ilog2(value | 1).
+ * To force a return 0 in this case, you can use tommy_ilog2_u32(value | 1).
  *
- * Other interesting ways for bitscan can be found at:
+ * Other interesting ways for bitscan are at:
  *
  * Bit Twiddling Hacks
  * http://graphics.stanford.edu/~seander/bithacks.html
@@ -321,7 +321,7 @@ typedef void tommy_foreach_arg_func(void* arg, void* obj);
  * http://chessprogramming.wikispaces.com/BitScan
  *
  * \param value Value to scan. 0 is not allowed.
- * \return The index of the most significan bit set.
+ * \return The index of the most significant bit set.
  */
 tommy_inline tommy_uint_t tommy_ilog2_u32(tommy_uint32_t value)
 {
@@ -336,7 +336,8 @@ tommy_inline tommy_uint_t tommy_ilog2_u32(tommy_uint32_t value)
 	 * Where "x ^ 31 = 31 - x", but gcc does not optimize "31 - __builtin_clz(x)" to bsr(x),
 	 * but generates 31 - (bsr(x) xor 31).
 	 *
-	 * So we write "__builtin_clz(x) ^ 31" instead of "31 - __builtin_clz(x)".
+	 * So we write "__builtin_clz(x) ^ 31" instead of "31 - __builtin_clz(x)",
+	 * to allow the double xor to be optimized out.
 	 */
 	return __builtin_clz(value) ^ 31;
 #else