From 08a850420920a80f9860c4c50949a046c1985319 Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Wed, 20 Dec 2023 15:35:38 -0800 Subject: [PATCH 001/133] Create a builtin hash function for strings. --- runstring.in | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/runstring.in b/runstring.in index 415db5365..b9eafbcc1 100644 --- a/runstring.in +++ b/runstring.in @@ -10,6 +10,8 @@ stringarray2* => stringArray2() #include #include #include +#include +#include #include "array.h" @@ -37,6 +39,11 @@ static const size_t nTime=256; static char Time[nTime]; #endif +#ifdef RETURN_NESTED + #error "RETURN_NESTED macro is already defined." +#endif +#define RETURN_NESTED return + void checkformat(const char *ptr, bool intformat) { while(*ptr != '\0') { @@ -404,6 +411,27 @@ Int ascii(string s) return s.empty() ? -1 : (unsigned char) s[0]; } +Int hash(string s, bool try_consistent=false) +{ + static unsigned long long per_run_entropy = []() -> unsigned long long{ + // The following code will be run only once: + std::random_device rd; + std::uniform_int_distribution dist(0, 0x3fffffffffffffffULL); + RETURN_NESTED dist(rd); + }(); + unsigned long long hash_result = std::hash{}(s); + // Zero out the highest two bits to avoid conflicts with DefaultValue and Undefined: + long long retv = hash_result & 0x3fffffffffffffffULL; + // Mix in the highest two bits in case the hash distribution needs them. + retv ^= (hash_result >> 62); + // Xor with a random bitstring to ensure the hash behaves differently each + // run; see Hyrum's Law. + if (not try_consistent) { + retv ^= per_run_entropy; + } + return retv; +} + string string(Int x) { ostringstream buf; @@ -454,3 +482,5 @@ Int seconds(string t=emptystring, string format=emptystring) return -1; #endif } + +#undef RETURN_NESTED \ No newline at end of file From e3ad96faa4fb703ad138f10d4b6b541428e36e33 Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Fri, 22 Dec 2023 15:43:21 -0800 Subject: [PATCH 002/133] Unnest nested function in C++ hash --- runstring.in | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/runstring.in b/runstring.in index 509d8f1b3..0b59f61a2 100644 --- a/runstring.in +++ b/runstring.in @@ -39,11 +39,6 @@ static const size_t nTime=256; static char Time[nTime]; #endif -#ifdef RETURN_NESTED - #error "RETURN_NESTED macro is already defined." -#endif -#define RETURN_NESTED return - void checkformat(const char *ptr, bool intformat) { while(*ptr != '\0') { @@ -112,6 +107,17 @@ void checkformat(const char *ptr, bool intformat) } /* End of else statement */ } } + +// Generates one random 62-bit integer that is different every time asy is run. +// The function is not intended to be run more than once, but it will likely +// give different results every time it is run. +// **NOT CRYPTOGRAPHICALLY SECURE** +unsigned long long one_random_int() { + std::random_device rd; + std::uniform_int_distribution + dist(0, 0x3fffffffffffffffULL); + return dist(rd); +} // Autogenerated routines: @@ -414,14 +420,12 @@ Int ascii(string s) Int hash(string s, bool try_consistent=false) { - static unsigned long long per_run_entropy = []() -> unsigned long long{ - // The following code will be run only once: - std::random_device rd; - std::uniform_int_distribution dist(0, 0x3fffffffffffffffULL); - RETURN_NESTED dist(rd); - }(); + // A single random int that is generated the first time this function is + // run. + static unsigned long long per_run_entropy = one_random_int(); unsigned long long hash_result = std::hash{}(s); - // Zero out the highest two bits to avoid conflicts with DefaultValue and Undefined: + // Zero out the highest two bits to avoid conflicts with DefaultValue and + // Undefined: long long retv = hash_result & 0x3fffffffffffffffULL; // Mix in the highest two bits in case the hash distribution needs them. retv ^= (hash_result >> 62); @@ -482,6 +486,4 @@ Int seconds(string t=emptystring, string format=emptystring) #else return -1; #endif -} - -#undef RETURN_NESTED \ No newline at end of file +} \ No newline at end of file From e6dda67927135d6d2debe8637645caaf215d6d35 Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Fri, 22 Dec 2023 16:09:12 -0800 Subject: [PATCH 003/133] Minor cleanup --- runstring.in | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/runstring.in b/runstring.in index 0b59f61a2..f0a3167d1 100644 --- a/runstring.in +++ b/runstring.in @@ -10,7 +10,6 @@ stringarray2* => stringArray2() #include #include #include -#include #include #include "array.h" @@ -486,4 +485,4 @@ Int seconds(string t=emptystring, string format=emptystring) #else return -1; #endif -} \ No newline at end of file +} From e72fdeb2d31b9be9bac96c3796be7c220e15699d Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Fri, 29 Dec 2023 10:12:33 -0800 Subject: [PATCH 004/133] dummy parse from Andy --- camp.y | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/camp.y b/camp.y index 01bd47ecb..030fda190 100644 --- a/camp.y +++ b/camp.y @@ -174,6 +174,10 @@ using mem::string; %type forupdate stmexplist %type explicitornot +// Make new classes for the following and add to union above. +%type decdec +%type decdeclist + /* There are four shift/reduce conflicts: * the dangling ELSE in IF (exp) IF (exp) stm ELSE stm * new ID @@ -248,6 +252,25 @@ dec: | INCLUDE ID ';' { $$ = new includedec($1, $2.sym); } | INCLUDE STRING ';' { $$ = new includedec($1, $2->getString()); } + +// Experimental - templated imports. +| TYPEDEF IMPORT decidlist ';' + { assert(false); } +| FROM name '(' decdeclist ')' UNRAVEL idpairlist ';' + { assert(false); } +; + +// List mapping dec to dec as in "Key=string, Value=int" +decdec: + decidstart '=' decidstart + { assert(false); } +; + +decdeclist: + decdec + { assert(false); } +| decdeclist ',' decdec + { assert(false); } ; idpair: From bd149c6ee137a065f73eb5a1a188a59beb27149e Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Thu, 4 Jan 2024 15:47:18 -0800 Subject: [PATCH 005/133] translate decdec to formal --- camp.y | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/camp.y b/camp.y index 030fda190..d20adbcc4 100644 --- a/camp.y +++ b/camp.y @@ -256,21 +256,22 @@ dec: // Experimental - templated imports. | TYPEDEF IMPORT decidlist ';' { assert(false); } -| FROM name '(' decdeclist ')' UNRAVEL idpairlist ';' +/* ACCESS name '(' decdeclist ')' 'as' ID */ +| ACCESS name '(' decdeclist ')' ID ID ';' { assert(false); } ; // List mapping dec to dec as in "Key=string, Value=int" decdec: - decidstart '=' decidstart - { assert(false); } + ID ASSIGN type + { $$ = new formal($1.pos, $3, new decidstart($1.pos, $1.sym)); } ; decdeclist: decdec - { assert(false); } + { $$ = new formals($1->getPos()); $$->add($1); } | decdeclist ',' decdec - { assert(false); } + { $$ = $1; $$->add($3); } ; idpair: From 79584602b2c6d9770322b712454f8b34819d09e3 Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Mon, 8 Jan 2024 14:35:52 -0800 Subject: [PATCH 006/133] checkpoint --- camp.y | 17 ++++++++++++++++- dec.h | 12 ++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/camp.y b/camp.y index d20adbcc4..2009b827c 100644 --- a/camp.y +++ b/camp.y @@ -50,6 +50,20 @@ bool checkKeyword(position pos, symbol sym) return true; } +// Check if the symbol given is "as". Returns true in this case and +// returns false and reports an error otherwise. +bool checkAs(position pos, symbol sym) +{ + if (sym != symbol::trans("as")) { + em.error(pos); + em << "expected 'as' here"; + + return false; + } + return true; +} + + namespace absyntax { file *root; } using namespace absyntax; @@ -258,7 +272,8 @@ dec: { assert(false); } /* ACCESS name '(' decdeclist ')' 'as' ID */ | ACCESS name '(' decdeclist ')' ID ID ';' - { assert(false); } + { checkAs($6.pos, $6.sym); + $$ = new templateAccessDec($2, $4, $7); } ; // List mapping dec to dec as in "Key=string, Value=int" diff --git a/dec.h b/dec.h index 5a8f8cdd0..351a064c1 100644 --- a/dec.h +++ b/dec.h @@ -543,6 +543,18 @@ class accessdec : public dec { void createSymMap(AsymptoteLsp::SymbolContext* symContext) override; }; +// Accesses the file with specified types added to the type environment. +class templateAccessDec : public dec { + symbol src; // The name of the module to access. + formals *args; + symbol dest; // What to call it in the local environment. + +public: + templateAccessDec(position pos, symbol src, formals *args, symbol dest) + : dec(pos), src(src), args(args), dest(test) {} +}; + + // Abstract base class for // from _ access _; (fromaccessdec) // and From b6d1dafdd95b501ad0afc58ed12110105a83103b Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Sat, 3 Feb 2024 12:10:21 -0800 Subject: [PATCH 007/133] Queue implementations (wholly untested). --- tests/datastructures/queue.asy | 136 +++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 tests/datastructures/queue.asy diff --git a/tests/datastructures/queue.asy b/tests/datastructures/queue.asy new file mode 100644 index 000000000..ea2f82175 --- /dev/null +++ b/tests/datastructures/queue.asy @@ -0,0 +1,136 @@ +typedef import(T); + +// This is supposed to be an interface. We should probably import it from +// somewhere outside the test folder. Also we should decide on a style for +// naming interfaces. +struct Queue { + void push(T value); + T peek(); + T pop(); + int size(); +} + +Queue makeNaiveQueue(T /*specify type for overloading*/) { + Queue queue = new Queue; + T[] data = new T[0]; + queue.push = new void(T value) { + data.push(value); + }; + queue.peek = new T() { + return data[0]; + }; + queue.pop = new T() { + T retv = data[0]; + data.delete(0); + return retv; + }; + queue.size = new int() { + return data.length; + }; + return queue; +} + +struct ArrayQueue { + T[] data = new T[8]; + data.cyclic = true; + int start = 0; + int size = 0; + + private void resize() { + T[] newData = new T[data.length * 2]; + newData.cyclic = true; + newData[:size] = data[start : start+size]; + data = newData; + start = 0; + } + + void push(T value) { + if (size == data.length) { + resize(); + } + data[start+size] = value; + ++size; + } + + T peek() { + return data[start]; + } + + T pop() { + T retv = data[start]; + ++start; + --size; + return retv; + } + + int size() { + return size; + } +} + +Queue cast(ArrayQueue queue) { + Queue queue_ = new Queue; + queue_.push = queue.push; + queue_.peek = queue.peek; + queue_.pop = queue.pop; + queue_.size = queue.size; + return queue_; +} + +Queue makeArrayQueue(T /*specify type for overloading*/) { + return new ArrayQueue; +} + +struct LinkedQueue { + struct Node { + T value; + Node next; + } + Node head; + Node tail; + int size = 0; + + void push(T value) { + Node node = new Node; + node.value = value; + if (size == 0) { + head = node; + tail = node; + } else { + tail.next = node; + tail = node; + } + ++size; + } + + T peek() { + return head.value; + } + + T pop() { + T retv = head.value; + head = head.next; + --size; + return retv; + } + + int size() { + return size; + } +} + +Queue cast(LinkedQueue queue) { + Queue queue_ = new Queue; + queue_.push = queue.push; + queue_.peek = queue.peek; + queue_.pop = queue.pop; + queue_.size = queue.size; + return queue_; +} + +Queue makeLinkedQueue(T /*specify type for overloading*/) { + return new LinkedQueue; +} + +// Specify a "default" queue implementation. +Queue makeQueue(T /*specify type for overloading*/) = makeArrayQueue; \ No newline at end of file From 18f9b5a38646ad86f79dbd2c6a4a346b4e93fa2e Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Sun, 4 Feb 2024 13:31:14 -0800 Subject: [PATCH 008/133] linkedlist (untested, probably buggy) --- tests/datastructures/linkedlist.asy | 103 ++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 tests/datastructures/linkedlist.asy diff --git a/tests/datastructures/linkedlist.asy b/tests/datastructures/linkedlist.asy new file mode 100644 index 000000000..56d9a4ffc --- /dev/null +++ b/tests/datastructures/linkedlist.asy @@ -0,0 +1,103 @@ +typedef import(T); + +struct LinkedIterator_T { + T next(); + bool hasNext(); + void delete(); +} + +struct LinkedList_T { + struct Node { + T data; + Node next; + void operator init(T data, Node next=null) { + this.data = data; + this.next = next; + } + }; + + private Node head = null; + private Node tail = null; + private int size = 0; + private int numChanges = 0; + + int size() { + return size; + } + + void add(T data) { + if (head == null) { + head = Node(data); + tail = head; + } else { + tail.next = Node(data); + tail = tail.next; + } + ++size; + ++numChanges; + } + + void insertAtBeginning(T data) { + Node newNode = Node(data, head); + head = newNode; + ++size; + ++numChanges; + } + + LinkedIterator_T iterator() { + Node next = head; + Node previous = new Node; + previous.next = head; + Node extraNode = previous; // This Node is not actually in the list. Remember it for bug checks. + LinkedIterator_T it = new LinkedIterator_T; + int it_numChanges = numChanges; + bool canDelete = false; + it.next = new T() { + assert(next != null, "No more elements in the list"); + assert(it_numChanges == numChanges, "Concurrent modification detected"); + assert(next == previous.next, "Bug in iterator"); + T retv = next.data; + if (next.next != null) { // If we're not at the end of the list, advance previous. + previous = next; + } + next = next.next; + canDelete = true; + assert(next != extraNode, "Bug in iterator"); + return retv; + }; + it.hasNext = new bool() { + assert(it_numChanges == numChanges, "Concurrent modification detected"); + return next != null; + }; + it.delete = new void() { + assert(it_numChanges == numChanges, "Concurrent modification detected"); + assert(canDelete, "No element to delete"); + assert(previous != null, "Bug in iterator"); + assert(size > 0, "Bug in iterator"); + if (size == 1) { + // Delete the only element in the list. + head = null; + tail = null; + } else if (next == null) { + // Delete the tail. + assert(previous != extraNode, "Bug in iterator"); + tail = previous; // This works because we did not advance previous when we reached the end of the list. + tail.next = null; + } else { + assert(previous != extraNode, "Bug in iterator"); + // Copy next to previous. + previous.data = next.data; + previous.next = next.next; + + // Advance next. + next = next.next; + } + + --size; + ++numChanges; + ++it_numChanges; + canDelete = false; + }; + return it; + } +} \ No newline at end of file From da9fd36479c2fb84f332a50307d94d63410ad4c2 Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Sun, 4 Feb 2024 14:08:21 -0800 Subject: [PATCH 009/133] improved queue naming convention and overloading --- tests/datastructures/queue.asy | 50 ++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/tests/datastructures/queue.asy b/tests/datastructures/queue.asy index ea2f82175..e2a4cad57 100644 --- a/tests/datastructures/queue.asy +++ b/tests/datastructures/queue.asy @@ -3,16 +3,17 @@ typedef import(T); // This is supposed to be an interface. We should probably import it from // somewhere outside the test folder. Also we should decide on a style for // naming interfaces. -struct Queue { +struct Queue_T { void push(T value); T peek(); T pop(); int size(); } -Queue makeNaiveQueue(T /*specify type for overloading*/) { - Queue queue = new Queue; +Queue_T makeNaiveQueue(T[] initialData) { + Queue_T queue = new Queue_T; T[] data = new T[0]; + data.append(initialData); queue.push = new void(T value) { data.push(value); }; @@ -30,7 +31,7 @@ Queue makeNaiveQueue(T /*specify type for overloading*/) { return queue; } -struct ArrayQueue { +struct ArrayQueue_T { T[] data = new T[8]; data.cyclic = true; int start = 0; @@ -44,6 +45,23 @@ struct ArrayQueue { start = 0; } + void operator init(T[] initialData) { + if (initialData.length == 0 || alias(initialData, null)) { + return; + } + desiredLength = data.length; + // TODO: Do this computation using CLZ. + while (desiredLength < initialData.length) { + desiredLength *= 2; + } + if (desiredLength != data.length) { + data = new T[desiredLength]; + data.cyclic = true; + } + size = initialData.length; + data[:size] = initialData; + } + void push(T value) { if (size == data.length) { resize(); @@ -68,8 +86,8 @@ struct ArrayQueue { } } -Queue cast(ArrayQueue queue) { - Queue queue_ = new Queue; +Queue_T cast(ArrayQueue_T queue) { + Queue_T queue_ = new Queue_T; queue_.push = queue.push; queue_.peek = queue.peek; queue_.pop = queue.pop; @@ -77,11 +95,11 @@ Queue cast(ArrayQueue queue) { return queue_; } -Queue makeArrayQueue(T /*specify type for overloading*/) { - return new ArrayQueue; +Queue_T makeArrayQueue(T[] initialData /*specify type for overloading*/) { + return ArrayQueue_T(initialData); } -struct LinkedQueue { +struct LinkedQueue_T { struct Node { T value; Node next; @@ -119,8 +137,8 @@ struct LinkedQueue { } } -Queue cast(LinkedQueue queue) { - Queue queue_ = new Queue; +Queue_T cast(LinkedQueue_T queue) { + Queue_T queue_ = new Queue_T; queue_.push = queue.push; queue_.peek = queue.peek; queue_.pop = queue.pop; @@ -128,9 +146,13 @@ Queue cast(LinkedQueue queue) { return queue_; } -Queue makeLinkedQueue(T /*specify type for overloading*/) { - return new LinkedQueue; +Queue_T makeLinkedQueue(T[] initialData) { + var queue = new LinkedQueue_T; + for (T value in initialData) { + queue.push(value); + } + return queue; } // Specify a "default" queue implementation. -Queue makeQueue(T /*specify type for overloading*/) = makeArrayQueue; \ No newline at end of file +Queue_T makeQueue(T[]) = makeArrayQueue; \ No newline at end of file From d0fb07406e6b9e85ff33302625c54ce0fb83759f Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Tue, 13 Feb 2024 17:36:44 -0800 Subject: [PATCH 010/133] Partially written test for queue --- tests/datastructures/queue.asy | 27 +++++++++++++--- tests/datastructures/queueTest.asy | 50 ++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 4 deletions(-) create mode 100644 tests/datastructures/queueTest.asy diff --git a/tests/datastructures/queue.asy b/tests/datastructures/queue.asy index e2a4cad57..a317cd65e 100644 --- a/tests/datastructures/queue.asy +++ b/tests/datastructures/queue.asy @@ -8,6 +8,7 @@ struct Queue_T { T peek(); T pop(); int size(); + T[] toArray(); } Queue_T makeNaiveQueue(T[] initialData) { @@ -28,6 +29,9 @@ Queue_T makeNaiveQueue(T[] initialData) { queue.size = new int() { return data.length; }; + queue.toArray = new T[]() { + return copy(data); + }; return queue; } @@ -45,11 +49,15 @@ struct ArrayQueue_T { start = 0; } + T[] toArray() { + return data[start : start+size]; + } + void operator init(T[] initialData) { if (initialData.length == 0 || alias(initialData, null)) { return; } - desiredLength = data.length; + int desiredLength = data.length; // TODO: Do this computation using CLZ. while (desiredLength < initialData.length) { desiredLength *= 2; @@ -86,12 +94,13 @@ struct ArrayQueue_T { } } -Queue_T cast(ArrayQueue_T queue) { +Queue_T operator cast(ArrayQueue_T queue) { Queue_T queue_ = new Queue_T; queue_.push = queue.push; queue_.peek = queue.peek; queue_.pop = queue.pop; queue_.size = queue.size; + queue_.toArray = queue.toArray; return queue_; } @@ -108,6 +117,15 @@ struct LinkedQueue_T { Node tail; int size = 0; + T[] toArray() { + T[] retv = new T[]; + for (Node node = head; node != null; node = node.next) { + retv.push(node.value); + } + assert(retv.length == size, "Size mismatch in toArray"); + return retv; + } + void push(T value) { Node node = new Node; node.value = value; @@ -137,18 +155,19 @@ struct LinkedQueue_T { } } -Queue_T cast(LinkedQueue_T queue) { +Queue_T operator cast(LinkedQueue_T queue) { Queue_T queue_ = new Queue_T; queue_.push = queue.push; queue_.peek = queue.peek; queue_.pop = queue.pop; queue_.size = queue.size; + queue_.toArray = queue.toArray; return queue_; } Queue_T makeLinkedQueue(T[] initialData) { var queue = new LinkedQueue_T; - for (T value in initialData) { + for (T value : initialData) { queue.push(value); } return queue; diff --git a/tests/datastructures/queueTest.asy b/tests/datastructures/queueTest.asy new file mode 100644 index 000000000..8ab8ed1bf --- /dev/null +++ b/tests/datastructures/queueTest.asy @@ -0,0 +1,50 @@ +from queue(T=int) access + Queue_T as Queue_int, + makeNaiveQueue, + makeArrayQueue, + makeLinkedQueue, + makeQueue, + operator cast; + + +struct ActionEnum { + static restricted int numActions = 0; + static private int next() { + return ++numActions - 1; + } + static restricted int PUSH = next(); + static restricted int POP = next(); +} + +// Shouldn't this be builtin? +int[][] transpose(int[][] a) { + int n = a.length; + int m = a[0].length; + int[][] b = new int[m][n]; + for (int i = 0; i < n; ++i) { + for (int j = 0; j < m; ++j) { + b[j][i] = a[i][j]; + } + } + return b; +} + +string differences(Queue_int a, Queue_int b) { + if (a.size() != b.size()) { + return 'Different sizes: ' + string(a.size()) + ' vs ' + string(b.size()); + } + if (a.size() != 0) { + if (a.peek() != b.peek()) { + return 'Different peek: ' + string(a.peek()) + ' vs ' + string(b.peek()); + } + } + if (!all(a.toArray() == b.toArray())) { + write(transpose(new int[][]{a.toArray(), b.toArray()})); + return 'Different contents'; + } + return ''; +} + +typedef void Action(Queue_int); + +var actions = new Action[ActionEnum.numActions]; \ No newline at end of file From 972a1a0df30e61e78efc1e09331fd95620b234cf Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Tue, 13 Feb 2024 17:37:58 -0800 Subject: [PATCH 011/133] Added naive version of linkedlist to help with testing. --- tests/datastructures/linkedlist.asy | 43 +++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/tests/datastructures/linkedlist.asy b/tests/datastructures/linkedlist.asy index 56d9a4ffc..704f30ef1 100644 --- a/tests/datastructures/linkedlist.asy +++ b/tests/datastructures/linkedlist.asy @@ -100,4 +100,47 @@ struct LinkedList_T { }; return it; } +} + +struct NaiveList_T { + T[] data = new T[0]; + + int size() { + return data.length; + } + + void add(T elem) { + data.push(elem); + } + + void insertAtBeginning(T elem) { + data.insert(0, elem); + } + + LinkedIterator_T iterator() { + int i = 0; + int[] lastSeen = new int[]; + LinkedIterator_T it = new LinkedIterator_T; + it.next = new T() { + assert(i < data.length, "No more elements in the list"); + T retv = data[i]; + if (lastSeen.length > 0) { + lastSeen[0] = i; + } else { + lastSeen.push(i); + } + assert(lastSeen.length == 1); + ++i; + return retv; + }; + it.hasNext = new bool() { + return i < data.length; + }; + it.delete = new void() {} + assert(lastSeen.length == 1, "No element to delete"); + assert(lastSeen.pop() == --i); + data.delete(i); + }; + return it; + } } \ No newline at end of file From c34c691ac943886cf6f8f14f0acd6ed0a881003d Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Tue, 13 Feb 2024 18:29:17 -0800 Subject: [PATCH 012/133] Successful test of Queue --- {tests/datastructures => base}/queue.asy | 4 +- base/zip.asy | 5 ++ tests/datastructures/queueTest.asy | 87 ++++++++++++++++++++---- 3 files changed, 79 insertions(+), 17 deletions(-) rename {tests/datastructures => base}/queue.asy (98%) create mode 100644 base/zip.asy diff --git a/tests/datastructures/queue.asy b/base/queue.asy similarity index 98% rename from tests/datastructures/queue.asy rename to base/queue.asy index a317cd65e..71d46e6a6 100644 --- a/tests/datastructures/queue.asy +++ b/base/queue.asy @@ -113,8 +113,8 @@ struct LinkedQueue_T { T value; Node next; } - Node head; - Node tail; + Node head = null; + Node tail = null; int size = 0; T[] toArray() { diff --git a/base/zip.asy b/base/zip.asy new file mode 100644 index 000000000..4d02f98b6 --- /dev/null +++ b/base/zip.asy @@ -0,0 +1,5 @@ +typedef import(T); + +T[][] zip(...T[][] arrays) { + return transpose(arrays); +} \ No newline at end of file diff --git a/tests/datastructures/queueTest.asy b/tests/datastructures/queueTest.asy index 8ab8ed1bf..4debd1218 100644 --- a/tests/datastructures/queueTest.asy +++ b/tests/datastructures/queueTest.asy @@ -1,3 +1,7 @@ +import TestLib; + +StartTest("Queue"); + from queue(T=int) access Queue_T as Queue_int, makeNaiveQueue, @@ -16,18 +20,7 @@ struct ActionEnum { static restricted int POP = next(); } -// Shouldn't this be builtin? -int[][] transpose(int[][] a) { - int n = a.length; - int m = a[0].length; - int[][] b = new int[m][n]; - for (int i = 0; i < n; ++i) { - for (int j = 0; j < m; ++j) { - b[j][i] = a[i][j]; - } - } - return b; -} +from zip(T=int) access zip; string differences(Queue_int a, Queue_int b) { if (a.size() != b.size()) { @@ -39,12 +32,76 @@ string differences(Queue_int a, Queue_int b) { } } if (!all(a.toArray() == b.toArray())) { - write(transpose(new int[][]{a.toArray(), b.toArray()})); + write(zip(a.toArray(), b.toArray())); return 'Different contents'; } return ''; } -typedef void Action(Queue_int); +string string(int[] a) { + string result = '['; + for (int i = 0; i < a.length; ++i) { + if (i > 0) { + result += ', '; + } + result += string(a[i]); + } + result += ']'; + return result; +} + +typedef void Action(...Queue_int[]); + +Action[] actions = new Action[ActionEnum.numActions]; +actions[ActionEnum.PUSH] = new void(...Queue_int[] qs) { + int toPush = rand(); + for (Queue_int q : qs) { + q.push(toPush); + } +}; +actions[ActionEnum.POP] = new void(...Queue_int[] qs) { + int[] results = new int[]; + for (Queue_int q : qs) { + if (q.size() > 0) { + results.push(q.pop()); + } + } + if (results.length > 0) { + int expected = results[0]; + for (int r : results) { + assert(r == expected, 'Different results: ' + string(results)); + } + } +}; + +real[] increasingProbs = new real[ActionEnum.numActions]; +increasingProbs[ActionEnum.PUSH] = 0.7; +increasingProbs[ActionEnum.POP] = 0.3; + +real[] decreasingProbs = new real[ActionEnum.numActions]; +decreasingProbs[ActionEnum.PUSH] = 0.3; +decreasingProbs[ActionEnum.POP] = 0.7; + +Queue_int naive = makeNaiveQueue(new int[]); +Queue_int array = makeArrayQueue(new int[]); +Queue_int linked = makeLinkedQueue(new int[]); + +for (int i = 0; i < 2000; ++i) { + // if (i % 100 == 0) { + // write('Step ' + string(i)); + // write('Naive: ' + string(naive.toArray())); + // write('Array: ' + string(array.toArray())); + // write('Linked: ' + string(linked.toArray())); + // } + real[] probs = i < 800 ? increasingProbs : decreasingProbs; + int choice = (unitrand() < probs[ActionEnum.PUSH] + ? ActionEnum.PUSH + : ActionEnum.POP); + actions[choice](naive, array, linked); + string diffs = differences(naive, array); + assert(diffs == '', 'Naive vs array: \n' + diffs); + diffs = differences(naive, linked); + assert(diffs == '', 'Naive vs linked: \n' + diffs); +} -var actions = new Action[ActionEnum.numActions]; \ No newline at end of file +EndTest(); \ No newline at end of file From 81e6924fe840d9f49302ad852ec78100d06d7cc0 Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Wed, 14 Feb 2024 14:22:30 -0800 Subject: [PATCH 013/133] Debugging linkedlist. --- tests/datastructures/linkedlist.asy | 58 +++++- tests/datastructures/linkedlistTest.asy | 252 ++++++++++++++++++++++++ 2 files changed, 309 insertions(+), 1 deletion(-) create mode 100644 tests/datastructures/linkedlistTest.asy diff --git a/tests/datastructures/linkedlist.asy b/tests/datastructures/linkedlist.asy index 704f30ef1..06dcb2fe5 100644 --- a/tests/datastructures/linkedlist.asy +++ b/tests/datastructures/linkedlist.asy @@ -5,6 +5,15 @@ struct LinkedIterator_T { bool hasNext(); void delete(); } +typedef LinkedIterator_T Iter; // for qualified access + +struct List_T { + int size(); + void add(T elem); + void insertAtBeginning(T elem); + LinkedIterator_T iterator(); +} +typedef List_T L; // for qualified access struct LinkedList_T { struct Node { @@ -81,10 +90,13 @@ struct LinkedList_T { } else if (next == null) { // Delete the tail. assert(previous != extraNode, "Bug in iterator"); + write('Deleting tail'); tail = previous; // This works because we did not advance previous when we reached the end of the list. tail.next = null; } else { + assert(previous != extraNode, "Bug in iterator"); + write('Deleting middle'); // Copy next to previous. previous.data = next.data; previous.next = next.next; @@ -101,6 +113,24 @@ struct LinkedList_T { return it; } } +typedef LinkedList_T Linked; + +List_T makeLinked(T[] initialData) { + List_T list = new List_T; + LinkedList_T linked = new LinkedList_T; + list.add = linked.add; + list.size = linked.size; + list.insertAtBeginning = linked.insertAtBeginning; + list.iterator = linked.iterator; + for (T elem : initialData) { + list.add(elem); + } + return list; +} + +L make() { // for qualified access + return makeLinked(new T[0]); +} struct NaiveList_T { T[] data = new T[0]; @@ -136,11 +166,37 @@ struct NaiveList_T { it.hasNext = new bool() { return i < data.length; }; - it.delete = new void() {} + it.delete = new void() { assert(lastSeen.length == 1, "No element to delete"); assert(lastSeen.pop() == --i); data.delete(i); }; return it; } +} +typedef NaiveList_T Naive; // for qualified access + +List_T makeNaive(T[] initialData) { + List_T list = new List_T; + NaiveList_T naive = new NaiveList_T; + list.add = naive.add; + list.size = naive.size; + list.insertAtBeginning = naive.insertAtBeginning; + list.iterator = naive.iterator; + for (T elem: initialData) { + list.add(elem); + } + return list; +} + +L makeNaive() { // for qualified access + return makeNaive(new T[0]); +} + +T[] toArray(List_T list) { + T[] retv = new T[]; + for (Iter it = list.iterator(); it.hasNext(); ) { + retv.push(it.next()); + } + return retv; } \ No newline at end of file diff --git a/tests/datastructures/linkedlistTest.asy b/tests/datastructures/linkedlistTest.asy new file mode 100644 index 000000000..f9e086de1 --- /dev/null +++ b/tests/datastructures/linkedlistTest.asy @@ -0,0 +1,252 @@ +import TestLib; + +StartTest("LinkedList"); + +access "datastructures/linkedlist"(T=int) as list_int; + +struct ListActionEnum { + static restricted int numActions = 0; + static private int next() { + return ++numActions - 1; + } + static restricted int ADD = next(); + static restricted int INSERT = next(); + static restricted int ITERATE = next(); + restricted int choice; + void operator init(int choice) { + assert(choice < numActions, 'Invalid ListActionEnum choice: ' + string(choice)); + assert(choice >= 0, 'Invalid ListActionEnum choice: ' + string(choice)); + this.choice = choice; + } + static ListActionEnum add = ListActionEnum(ADD); + static ListActionEnum insert = ListActionEnum(INSERT); + static ListActionEnum iterate = ListActionEnum(ITERATE); +} +bool operator == (ListActionEnum a, ListActionEnum b) { + return a.choice == b.choice; +} +bool operator == (ListActionEnum a, int b) { + return a.choice == b; +} +string operator ecast(ListActionEnum a) { + if (a == ListActionEnum.add) { + return 'ADD'; + } else if (a == ListActionEnum.insert) { + return 'INSERT'; + } else if (a == ListActionEnum.iterate) { + return 'ITERATE'; + } + assert(false); + return ''; +} + +// Actions that can be taken using an iterator. +struct IterActionEnum { + static restricted int numActions = 0; + static private int next() { + return ++numActions - 1; + } + static restricted int TRY_NEXT = next(); + static restricted int TRY_DELETE = next(); + static restricted int END_EARLY = next(); + restricted int choice; + void operator init(int choice) { + assert(choice < numActions, 'Invalid IterActionEnum choice: ' + string(choice)); + assert(choice >= 0, 'Invalid IterActionEnum choice: ' + string(choice)); + this.choice = choice; + } + static IterActionEnum tryNext = IterActionEnum(TRY_NEXT); + static IterActionEnum tryDelete = IterActionEnum(TRY_DELETE); + static IterActionEnum endEarly = IterActionEnum(END_EARLY); +} +bool operator == (IterActionEnum a, IterActionEnum b) { + return a.choice == b.choice; +} +bool operator == (IterActionEnum a, int b) { + return a.choice == b; +} +string operator ecast(IterActionEnum a) { + if (a == IterActionEnum.tryNext) { + return 'TRY_NEXT'; + } else if (a == IterActionEnum.tryDelete) { + return 'TRY_DELETE'; + } else if (a == IterActionEnum.endEarly) { + return 'END_EARLY'; + } + assert(false); + return ''; +} + +from zip(T=int) access zip; + +void writeArrays(int unused); +void writeArrays(...int[][] arrays) { + write(zip(...arrays)); +} + +string differences(list_int.L a, list_int.L b) { + if (a.size() != b.size()) { + return 'Different sizes: ' + string(a.size()) + ' vs ' + string(b.size()); + } + if (!all(list_int.toArray(a) == list_int.toArray(b))) { + writeArrays(list_int.toArray(a), list_int.toArray(b)); + return 'Different contents'; + } + return ''; +} + +string string(int[] a) { + string result = '['; + for (int i = 0; i < a.length; ++i) { + if (i > 0) { + result += ', '; + } + result += string(a[i]); + } + result += ']'; + return result; +} + +typedef void ListAction(...list_int.L[]); +list_int.Iter[] iters; +void endIters() { + iters.delete(); +} +bool inIterMode() { + return iters.length > 0; +} +bool canDelete; +int numDeletions = 0; + +ListAction[] listActions = new ListAction[ListActionEnum.numActions]; +listActions[ListActionEnum.ADD] = new void(...list_int.L[] lists) { + int toAdd = rand() % 100; + for (list_int.L list : lists) { + list.add(toAdd); + } +}; +listActions[ListActionEnum.INSERT] = new void(...list_int.L[] lists) { + int toAdd = rand() % 100 - 100; + for (list_int.L list : lists) { + list.insertAtBeginning(toAdd); + } +}; +listActions[ListActionEnum.ITERATE] = new void(...list_int.L[] lists) { + for (list_int.L list : lists) { + iters.push(list.iterator()); + } + canDelete = false; +}; + +typedef void IterAction(); +IterAction[] iterActions = new IterAction[IterActionEnum.numActions]; +iterActions[IterActionEnum.TRY_NEXT] = new void() { + if (iters.length == 0) { + return; + } + int[] nexts; + if (!iters[0].hasNext()) { + write('no next'); + for (list_int.Iter iter : iters) { + if (iter.hasNext()) { + writeArrays(0); + write('hasNext should be false'); + assert(false); + } + } + endIters(); + return; + } + for (list_int.Iter iter : iters) { + write('examining next'); + if (!iter.hasNext()) { + writeArrays(0); + write('hasNext should be true'); + assert(false); + } + nexts.push(iter.next()); + } + canDelete = true; + int val = nexts[0]; + write('next: ' + string(val)); + if (!all(nexts == val)) { + writeArrays(0); + write('Nexts should all be ' + string(val)); + write('Nexts: ' + string(nexts)); + assert(false); + } +}; +iterActions[IterActionEnum.TRY_DELETE] = new void() { + if (!canDelete) { + return; + } + for (list_int.Iter iter : iters) { + iter.delete(); + } + write('deleted item'); + ++numDeletions; + canDelete = false; +}; +iterActions[IterActionEnum.END_EARLY] = endIters; + +real clamp(real x, real keyword min, real keyword max) { + if (x < min) { + return min; + } + if (x > max) { + return max; + } + return x; +} + +ListActionEnum nextListAction(real desiredLength, int length) { + real lengthenProb = clamp(0.1 + 0.01 * (desiredLength - length), min=0, max=1); + if (unitrand() < lengthenProb) { + if (rand() % 2 == 0) { + return ListActionEnum.add; + } else { + return ListActionEnum.insert; + } + } + return ListActionEnum.iterate; +} +IterActionEnum nextIterAction(real desiredLength, int length) { + real deleteProb = clamp(0.1 + 0.01 * (length - desiredLength), min=0, max=1); + if (unitrand() < deleteProb) + return IterActionEnum.tryDelete; + real endProb = 1 / (2 * length); + if (unitrand() < endProb) + return IterActionEnum.endEarly; + return IterActionEnum.tryNext; +} + +list_int.L naive = list_int.makeNaive(); +list_int.L linked = list_int.make(); + +writeArrays = new void(int unused) { + writeArrays(list_int.toArray(naive), list_int.toArray(linked)); +}; + +for (int i = 0; i < 2000; ++i) { + write(i); + if (i > 200) { + write('Step ' + string(i)); + writeArrays(0); + } + int desiredLength = (i < 800 ? 10 : -1); + if (inIterMode()) { + IterActionEnum action = nextIterAction(desiredLength, iters.length); + if (i > 200) + write('next action: ' + (string)action); + iterActions[action.choice](); + } else { + ListActionEnum action = nextListAction(desiredLength, linked.size()); + if (i > 200) + write('next action: ' + (string)action); + listActions[action.choice](naive, linked); + } + string diffs = differences(naive, linked); + assert(diffs == '', diffs); +} + +EndTest(); \ No newline at end of file From e8de9d321c5e9857f7ffba921a949bc1da4c444f Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Wed, 14 Feb 2024 15:06:59 -0800 Subject: [PATCH 014/133] LinkedList appears to be fixed. --- tests/datastructures/linkedlist.asy | 75 ++++++++++--------------- tests/datastructures/linkedlistTest.asy | 9 +-- 2 files changed, 36 insertions(+), 48 deletions(-) diff --git a/tests/datastructures/linkedlist.asy b/tests/datastructures/linkedlist.asy index 06dcb2fe5..3a8d6606f 100644 --- a/tests/datastructures/linkedlist.asy +++ b/tests/datastructures/linkedlist.asy @@ -35,12 +35,13 @@ struct LinkedList_T { } void add(T data) { + Node newNode = Node(data); if (head == null) { - head = Node(data); - tail = head; + head = newNode; + tail = newNode; } else { - tail.next = Node(data); - tail = tail.next; + tail.next = newNode; + tail = newNode; } ++size; ++numChanges; @@ -55,24 +56,23 @@ struct LinkedList_T { LinkedIterator_T iterator() { Node next = head; - Node previous = new Node; - previous.next = head; - Node extraNode = previous; // This Node is not actually in the list. Remember it for bug checks. + Node current = null; + Node previous = null; LinkedIterator_T it = new LinkedIterator_T; int it_numChanges = numChanges; - bool canDelete = false; + bool canDelete() { + return current != null; + } it.next = new T() { assert(next != null, "No more elements in the list"); assert(it_numChanges == numChanges, "Concurrent modification detected"); - assert(next == previous.next, "Bug in iterator"); - T retv = next.data; - if (next.next != null) { // If we're not at the end of the list, advance previous. - previous = next; + // Advance previous, current, next: + if (current != null) { + previous = current; } + current = next; next = next.next; - canDelete = true; - assert(next != extraNode, "Bug in iterator"); - return retv; + return current.data; }; it.hasNext = new bool() { assert(it_numChanges == numChanges, "Concurrent modification detected"); @@ -80,35 +80,22 @@ struct LinkedList_T { }; it.delete = new void() { assert(it_numChanges == numChanges, "Concurrent modification detected"); - assert(canDelete, "No element to delete"); - assert(previous != null, "Bug in iterator"); + assert(canDelete(), "No element to delete"); assert(size > 0, "Bug in iterator"); - if (size == 1) { - // Delete the only element in the list. - head = null; - tail = null; - } else if (next == null) { - // Delete the tail. - assert(previous != extraNode, "Bug in iterator"); - write('Deleting tail'); - tail = previous; // This works because we did not advance previous when we reached the end of the list. - tail.next = null; + if (current == tail) { + tail = previous; + } + if (previous != null) { + previous.next = next; + current = null; } else { - - assert(previous != extraNode, "Bug in iterator"); - write('Deleting middle'); - // Copy next to previous. - previous.data = next.data; - previous.next = next.next; - - // Advance next. - next = next.next; + assert(current == head, "Bug in iterator"); + head = next; + current = null; } - --size; ++numChanges; ++it_numChanges; - canDelete = false; }; return it; } @@ -118,12 +105,12 @@ typedef LinkedList_T Linked; List_T makeLinked(T[] initialData) { List_T list = new List_T; LinkedList_T linked = new LinkedList_T; - list.add = linked.add; list.size = linked.size; + list.add = linked.add; list.insertAtBeginning = linked.insertAtBeginning; list.iterator = linked.iterator; - for (T elem : initialData) { - list.add(elem); + for (int i = initialData.length - 1; i >= 0; --i) { + list.insertAtBeginning(initialData[i]); } return list; } @@ -179,12 +166,12 @@ typedef NaiveList_T Naive; // for qualified access List_T makeNaive(T[] initialData) { List_T list = new List_T; NaiveList_T naive = new NaiveList_T; - list.add = naive.add; list.size = naive.size; + list.add = naive.add; list.insertAtBeginning = naive.insertAtBeginning; list.iterator = naive.iterator; - for (T elem: initialData) { - list.add(elem); + for (int i = initialData.length - 1; i >= 0; --i) { + list.insertAtBeginning(initialData[i]); } return list; } diff --git a/tests/datastructures/linkedlistTest.asy b/tests/datastructures/linkedlistTest.asy index f9e086de1..3e5781f29 100644 --- a/tests/datastructures/linkedlistTest.asy +++ b/tests/datastructures/linkedlistTest.asy @@ -31,7 +31,8 @@ bool operator == (ListActionEnum a, int b) { string operator ecast(ListActionEnum a) { if (a == ListActionEnum.add) { return 'ADD'; - } else if (a == ListActionEnum.insert) { + } else + if (a == ListActionEnum.insert) { return 'INSERT'; } else if (a == ListActionEnum.iterate) { return 'ITERATE'; @@ -200,7 +201,7 @@ real clamp(real x, real keyword min, real keyword max) { } ListActionEnum nextListAction(real desiredLength, int length) { - real lengthenProb = clamp(0.1 + 0.01 * (desiredLength - length), min=0, max=1); + real lengthenProb = clamp(0.1 + 0.1 * (desiredLength - length), min=0, max=1); if (unitrand() < lengthenProb) { if (rand() % 2 == 0) { return ListActionEnum.add; @@ -211,7 +212,7 @@ ListActionEnum nextListAction(real desiredLength, int length) { return ListActionEnum.iterate; } IterActionEnum nextIterAction(real desiredLength, int length) { - real deleteProb = clamp(0.1 + 0.01 * (length - desiredLength), min=0, max=1); + real deleteProb = clamp(0.1 + 0.1 * (length - desiredLength), min=0, max=1); if (unitrand() < deleteProb) return IterActionEnum.tryDelete; real endProb = 1 / (2 * length); @@ -233,7 +234,7 @@ for (int i = 0; i < 2000; ++i) { write('Step ' + string(i)); writeArrays(0); } - int desiredLength = (i < 800 ? 10 : -1); + int desiredLength = (i < 800 ? 100 : 1); if (inIterMode()) { IterActionEnum action = nextIterAction(desiredLength, iters.length); if (i > 200) From 3ce65140c603ff347462cbe4884dd8f396b6b771 Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Wed, 14 Feb 2024 15:22:09 -0800 Subject: [PATCH 015/133] Made linkedlistTest less verbose --- tests/datastructures/linkedlistTest.asy | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/tests/datastructures/linkedlistTest.asy b/tests/datastructures/linkedlistTest.asy index 3e5781f29..b400629a6 100644 --- a/tests/datastructures/linkedlistTest.asy +++ b/tests/datastructures/linkedlistTest.asy @@ -147,7 +147,7 @@ iterActions[IterActionEnum.TRY_NEXT] = new void() { } int[] nexts; if (!iters[0].hasNext()) { - write('no next'); + // write('no next'); for (list_int.Iter iter : iters) { if (iter.hasNext()) { writeArrays(0); @@ -159,7 +159,7 @@ iterActions[IterActionEnum.TRY_NEXT] = new void() { return; } for (list_int.Iter iter : iters) { - write('examining next'); + // write('examining next'); if (!iter.hasNext()) { writeArrays(0); write('hasNext should be true'); @@ -169,7 +169,7 @@ iterActions[IterActionEnum.TRY_NEXT] = new void() { } canDelete = true; int val = nexts[0]; - write('next: ' + string(val)); + // write('next: ' + string(val)); if (!all(nexts == val)) { writeArrays(0); write('Nexts should all be ' + string(val)); @@ -184,7 +184,7 @@ iterActions[IterActionEnum.TRY_DELETE] = new void() { for (list_int.Iter iter : iters) { iter.delete(); } - write('deleted item'); + // write('deleted item'); ++numDeletions; canDelete = false; }; @@ -229,21 +229,16 @@ writeArrays = new void(int unused) { }; for (int i = 0; i < 2000; ++i) { - write(i); - if (i > 200) { - write('Step ' + string(i)); - writeArrays(0); - } + // write('Step ' + string(i)); + // writeArrays(0); int desiredLength = (i < 800 ? 100 : 1); if (inIterMode()) { IterActionEnum action = nextIterAction(desiredLength, iters.length); - if (i > 200) - write('next action: ' + (string)action); + // write('next action: ' + (string)action); iterActions[action.choice](); } else { ListActionEnum action = nextListAction(desiredLength, linked.size()); - if (i > 200) - write('next action: ' + (string)action); + // write('next action: ' + (string)action); listActions[action.choice](naive, linked); } string diffs = differences(naive, linked); From 8289e953851341142a560a1c9c8a4fa677a47f20 Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Wed, 14 Feb 2024 15:24:53 -0800 Subject: [PATCH 016/133] Moved linkedlist to base/. --- {tests/datastructures => base}/linkedlist.asy | 0 tests/datastructures/linkedlistTest.asy | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename {tests/datastructures => base}/linkedlist.asy (100%) diff --git a/tests/datastructures/linkedlist.asy b/base/linkedlist.asy similarity index 100% rename from tests/datastructures/linkedlist.asy rename to base/linkedlist.asy diff --git a/tests/datastructures/linkedlistTest.asy b/tests/datastructures/linkedlistTest.asy index b400629a6..8bb1531d9 100644 --- a/tests/datastructures/linkedlistTest.asy +++ b/tests/datastructures/linkedlistTest.asy @@ -2,7 +2,7 @@ import TestLib; StartTest("LinkedList"); -access "datastructures/linkedlist"(T=int) as list_int; +access linkedlist(T=int) as list_int; struct ListActionEnum { static restricted int numActions = 0; From ad802222df9a606b085eec47d7cff255e097cbaf Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Wed, 14 Feb 2024 15:25:35 -0800 Subject: [PATCH 017/133] Remove outdated comment. --- base/queue.asy | 3 --- 1 file changed, 3 deletions(-) diff --git a/base/queue.asy b/base/queue.asy index 71d46e6a6..07870a101 100644 --- a/base/queue.asy +++ b/base/queue.asy @@ -1,8 +1,5 @@ typedef import(T); -// This is supposed to be an interface. We should probably import it from -// somewhere outside the test folder. Also we should decide on a style for -// naming interfaces. struct Queue_T { void push(T value); T peek(); From a0b297b96b3d5ad79bdcd052bdc1dda42a61cb2e Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Wed, 14 Feb 2024 15:27:50 -0800 Subject: [PATCH 018/133] Add datastructures tests to makefile --- tests/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Makefile b/tests/Makefile index d26b65050..53591a399 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -1,6 +1,6 @@ .NOTPARALLEL: -TESTDIRS = string arith frames types imp array pic gs io template +TESTDIRS = string arith frames types imp array pic gs io template datastructures EXTRADIRS = gsl output From d35fe42c9db4560f2a30f3dfc5da64d4dce3473f Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Thu, 15 Feb 2024 08:31:46 -0800 Subject: [PATCH 019/133] Add some generated files to gitignore. --- .gitignore | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.gitignore b/.gitignore index afb93c096..90a2eea75 100644 --- a/.gitignore +++ b/.gitignore @@ -58,7 +58,9 @@ GUI/*/__pycache__ /config.status /configure /doc/asy-latex.i* +/doc/asy-latex.hd /doc/asy.1 +/doc/asymptote_html/ /glrender.d.54461 /gsl.symbols.h /keywords.cc @@ -67,6 +69,10 @@ GUI/*/__pycache__ /types.symbols.h *.dSYM .DS_Store +/errors.temp +/base/webgl/asygl.js +/v3dheadertypes.py +/v3dtypes.py ### TeX-related ## Core latex/pdflatex auxiliary files: @@ -94,6 +100,9 @@ GUI/*/__pycache__ /doc/**/asymptote.* !/doc/asymptote.texi /doc/options +/doc/latexusage-?.asy +/doc/latexusage-?.tex +/doc/latexusage-*.pbsdat .asy_* ## Bibliography auxiliary files (bibtex/biblatex/biber): @@ -159,6 +168,7 @@ renderDocSettings *.html !index.html +/doc/png/index.html !webgl/WebGL*.html v3dheadertypes.h From da1d93c1a2ad4a683cc5db137c99b01c98838b24 Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Sat, 17 Feb 2024 16:12:50 -0800 Subject: [PATCH 020/133] Add hash function for integer arrays. --- runarray.in | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/runarray.in b/runarray.in index 6748f7a89..f7dcc3948 100644 --- a/runarray.in +++ b/runarray.in @@ -20,6 +20,10 @@ triplearray2* => tripleArray2() callableReal* => realRealFunction() +#include +#include // for std::hash +#include + #include "array.h" #include "arrayop.h" #include "triple.h" @@ -27,6 +31,15 @@ callableReal* => realRealFunction() #include "Delaunay.h" #include "glrender.h" +#ifdef __has_include +# if __has_include() +# include +# endif +#endif +#ifndef __cpp_lib_string_view +# define __cpp_lib_string_view 0 +#endif + #ifdef HAVE_LIBFFTW3 #include "fftw++.h" static const char *rectangular="matrix must be rectangular"; @@ -932,6 +945,32 @@ Intarray* complement(Intarray *a, Int n) return r; } +Int hash(Intarray *a) +{ + size_t asize=checkArray(a); +#if COMPACT + char* dataPointer = static_cast(static_cast(a->data())); + size_t size = (sizeof(item) / sizeof(char)) * asize; +#else + std::vector v{asize}; + for (size_t i = 0; i < asize; ++i) { + v[i] = static_cast(read(a,i)); + } + char* dataPointer = static_cast(static_cast(v->data())); + size_t size = (sizeof(uint64_t) / sizeof(char)) * asize; +#endif +#if __cpp_lib_string_view + std::string_view dataView = {dataPointer, size}; + uint64_t hashResult = std::hash{}(dataView); +#else + std::string dataView = {dataPointer, size}; + uint64_t hashResult = std::hash{}(dataView); +#endif + // Zero out the highest two bits to avoid conflicts with DefaultValue and + // Undefined: + return hashResult & UINT64_C(0x3fffffffffffffff); +} + // Generate the sequence {f(i) : i=0,1,...n-1} given a function f and integer n Intarray* :arraySequence(callable *f, Int n) { From 8aa3bf3c2c7a0b3c05738077274446f924776882 Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Mon, 19 Feb 2024 16:07:36 -0800 Subject: [PATCH 021/133] Partial spaytree implementation. --- base/splaytree.asy | 250 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 250 insertions(+) create mode 100644 base/splaytree.asy diff --git a/base/splaytree.asy b/base/splaytree.asy new file mode 100644 index 000000000..b6de6bb65 --- /dev/null +++ b/base/splaytree.asy @@ -0,0 +1,250 @@ +typedef import(T); + +private struct treenode { + treenode leftchild; + treenode rightchild; + T value; + void operator init(T value) { + this.value = value; + } + + void inOrder(void run(T)) { + if (leftchild != null) leftchild.inOrder(run); + run(value); + if (rightchild != null) rightchild.inOrder(run); + } +} + +private treenode splay(treenode[] ancestors, bool lessthan(T a, T b)) { + bool operator < (T a, T b) = lessthan; + + if (ancestors.length == 0) return null; + + treenode root = ancestors[0]; + treenode current = ancestors.pop(); + + while (ancestors.length >= 2) { + treenode parent = ancestors.pop(); + treenode grandparent = ancestors.pop(); + + if (ancestors.length > 0) { + treenode greatparent = ancestors[-1]; + if (greatparent.leftchild == grandparent) { + greatparent.leftchild = current; + } else greatparent.rightchild = current; + } + + bool currentside = (parent.leftchild == current); + bool grandside = (grandparent.leftchild == parent); + + if (currentside == grandside) { // zig-zig + if (currentside) { // both left + treenode B = current.rightchild; + treenode C = parent.rightchild; + + current.rightchild = parent; + parent.leftchild = B; + parent.rightchild = grandparent; + grandparent.leftchild = C; + } else { // both right + treenode B = parent.leftchild; + treenode C = current.leftchild; + + current.leftchild = parent; + parent.leftchild = grandparent; + parent.rightchild = C; + grandparent.rightchild = B; + } + } else { // zig-zag + if (grandside) { // left-right + treenode B = current.leftchild; + treenode C = current.rightchild; + + current.leftchild = parent; + current.rightchild = grandparent; + parent.rightchild = B; + grandparent.leftchild = C; + } else { //right-left + treenode B = current.leftchild; + treenode C = current.rightchild; + + current.leftchild = grandparent; + current.rightchild = parent; + grandparent.rightchild = B; + parent.leftchild = C; + } + } + } + + if (ancestors.length > 0) { + ancestors.pop(); + if (current == root.leftchild) { + treenode B = current.rightchild; + current.rightchild = root; + root.leftchild = B; + } else { + treenode B = current.leftchild; + current.leftchild = root; + root.rightchild = B; + } + } + + return current; +} + +struct splaytree_T { + treenode root = null; + restricted int size = 0; + private bool operator < (T a, T b); + + void operator init(bool lessthan(T,T)) { + operator< = lessthan; + } + + bool contains(T value) { + treenode[] parentStack = new treenode[0]; + parentStack.cyclic = true; + parentStack.push(root); + while (true) { + treenode current = parentStack[-1]; + if (current == null) { + parentStack.pop(); + root = splay(parentStack, operator<); + return false; + } + if (value < current.value) { + parentStack.push(current.leftchild); + } else if (current.value < value) { + parentStack.push(current.rightchild); + } else break; + } + root = splay(parentStack, operator<); + return true; + } + + /* + * returns true iff the tree was modified + */ + bool add(T value) { + if (root == null) { + root = treenode(value); + ++size; + return true; + } + treenode[] ancestors = new treenode[0]; + ancestors.cyclic = true; + ancestors.push(root); + + bool toReturn = false; + + while (!toReturn) { + treenode current = ancestors[-1]; + if (value < current.value) { + if (current.leftchild == null) { + current.leftchild = treenode(value); + toReturn = true; + } + ancestors.push(current.leftchild); + } else if (current.value < value) { + if (current.rightchild == null) { + current.rightchild = treenode(value); + toReturn = true; + } + ancestors.push(current.rightchild); + } else { + root = splay(ancestors, operator<); + return false; + } + } + + root = splay(ancestors, operator<); + ++size; + return true; + } + + /* + * returns true iff the tree was modified + */ + bool delete(T value) { + treenode[] ancestors = new treenode[0]; + ancestors.cyclic = true; // Makes ancestors[-1] refer to the last entry. + ancestors.push(root); + + while (true) { + treenode current = ancestors[-1]; + if (current == null) { + ancestors.pop(); + root = splay(ancestors, operator<); + return false; + } + if (value < current.value) + ancestors.push(current.leftchild); + else if (current.value < value) + ancestors.push(current.rightchild); + else break; + } + + treenode toDelete = ancestors.pop(); + treenode parent = null; + if (ancestors.length > 0) parent = ancestors[-1]; + + if (toDelete.leftchild == null) { + if (parent != null) { + if (parent.rightchild == toDelete) { + parent.rightchild = toDelete.rightchild; + } else { + parent.leftchild = toDelete.rightchild; + } + } else root = toDelete.rightchild; + } else if (toDelete.rightchild == null) { + if (parent == null) { + root = toDelete.leftchild; + } else if (parent.rightchild == toDelete) { + parent.rightchild = toDelete.leftchild; + } else parent.leftchild = toDelete.leftchild; + } else { + treenode[] innerStack = new treenode[0]; + innerStack.cyclic = true; + treenode current = toDelete.rightchild; + while (current != null) { + innerStack.push(current); + current = current.leftchild; + } + toDelete.rightchild = splay(innerStack, operator<); + toDelete.value = toDelete.rightchild.value; + toDelete.rightchild = toDelete.rightchild.rightchild; + } + + if (parent != null) root = splay(ancestors, operator<); + --size; + return true; + } + + T popMax(T default) { + if (root == null) return default; + treenode[] ancestors = new treenode[0]; + ancestors.cyclic = true; + treenode current = root; + while (current != null) { + ancestors.push(current); + current = current.rightchild; + } + root = splay(ancestors, operator<); + T toReturn = root.value; + // TODO(optimization): Refactor deleteRoot out of the delete function, + // and call deleteRoot instead of delete. + delete(toReturn); + return toReturn; + } + + bool empty() { + assert((root == null) == (size == 0)); + return root == null; + } + + void forEach(void run(T)) { + if (root == null) return; + root.inOrder(run); + } + +} From c4a218e4b9cb9706ee071fda2c7780b54de06b4b Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Tue, 20 Feb 2024 18:21:53 -0800 Subject: [PATCH 022/133] Add SortedSet interface and reference implementation. --- base/sortedset.asy | 137 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 base/sortedset.asy diff --git a/base/sortedset.asy b/base/sortedset.asy new file mode 100644 index 000000000..71bffa503 --- /dev/null +++ b/base/sortedset.asy @@ -0,0 +1,137 @@ +typedef import(T); + +struct SortedSet_T { + int size(); + bool empty() { return size() == 0; } + bool contains(T item); + // Returns the least element > item, or emptyresponse if there is no such + // element. + T after(T item); + // Returns the greatest element < item, or emptyresponse if there is no such + // element. + T before(T item); + T firstGEQ(T item) { return contains(item) ? item : after(item); } + T firstLEQ(T item) { return contains(item) ? item : before(item); } + T min(); // Returns emptyresponse if collection is empty. + T popMin(); // Returns emptyresponse if collection is empty. + T max(); // Returns emptyresponse if collection is empty. + T popMax(); // Returns emptyresponse if collection is empty. + bool insert(T item); // Returns true iff the collection is modified. + T get(T item); // Returns the item in the collection that is + // equivalent to item, or emptyresponse if there is no + // such item. + bool delete(T item); // Returns true iff the collection is modified. + // Calls process on each item in the collection, in ascending order, + // until process returns false. + void foreach(bool process(T item)); +} + +// For testing purposes, we provide a naive implementation of SortedSet_T. +// This implementation is highly inefficient, but it is correct, and can be +// used to test other implementations of SortedSet_T. +struct NaiveSortedSet_T { + private bool lt(T a, T b); + private T[] buffer = new T[0]; + private T emptyresponse; + + private bool leq(T a, T b) { + return !lt(b, a); + } + private bool gt(T a, T b) { + return lt(b, a); + } + private bool geq(T a, T b) { + return leq(b, a); + } + private bool equiv(T a, T b) { + return leq(a, b) && leq(b, a); + } + + void operator init(bool lessThan(T, T), T emptyresponse) { + this.lt = lessThan; + this.emptyresponse = emptyresponse; + } + + int size() { + return buffer.length; + } + + bool contains(T item) { + for (T possibility in buffer) { + if (equiv(possibility, item)) return true; + } + return false; + } + + T after(T item) { + for (T possibility in buffer) { + if (gt(possibility, item)) return possibility; + } + return emptyresponse; + } + + T before(T item) { + for (int ii = buffer.length - 1; ii >= 0; --ii) { + T possibility = buffer[ii]; + if (lt(possibility, item)) return possibility; + } + return emptyresponse; + } + + T min() { + if (buffer.length == 0) return emptyresponse; + return buffer[0]; + } + + T popMin() { + if (buffer.length == 0) return emptyresponse; + T toreturn = buffer[0]; + buffer.delete(0); + return toreturn; + } + + T max() { + if (buffer.length == 0) return emptyresponse; + return buffer[buffer.length - 1]; + } + + T popMax() { + if (buffer.length == 0) return emptyresponse; + return buffer.pop(); + } + + bool insert(T item) { + for (int i = 0; i < buffer.length; ++i) { + if (equiv(buffer[i], item)) return false; + else if (gt(buffer[i], item)) { + buffer.insert(i, item); + return true; + } + } + buffer.push(item); + return true; + } + + T get(T item) { + for (T possibility in buffer) { + if (equiv(possibility, item)) return possibility; + } + return emptyresponse; + } + + bool delete(T item) { + for (int i = 0; i < buffer.length; ++i) { + if (equiv(buffer[i], item)) { + buffer.delete(i); + return true; + } + } + return false; + } + + void foreach(bool process(T item)) { + for (T item in buffer) { + if (!process(item)) break; + } + } +} From cca198312ca4ef055d33a434bfee8e26afac1967 Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Sat, 24 Feb 2024 22:16:26 -0800 Subject: [PATCH 023/133] Test NaiveSortedSet (and fix bugs). --- base/pureset.asy | 116 +++++++++++ base/sortedset.asy | 83 +++++++- base/wrapper.asy | 12 ++ tests/datastructures/sortedsetTest.asy | 260 +++++++++++++++++++++++++ 4 files changed, 465 insertions(+), 6 deletions(-) create mode 100644 base/pureset.asy create mode 100644 base/wrapper.asy create mode 100644 tests/datastructures/sortedsetTest.asy diff --git a/base/pureset.asy b/base/pureset.asy new file mode 100644 index 000000000..472a37ccb --- /dev/null +++ b/base/pureset.asy @@ -0,0 +1,116 @@ +typedef import(T); + +struct Set_T { + int size(); + bool empty() { + return size() == 0; + } + bool contains(T item); + bool insert(T item); + T replace(T item); // Inserts item, and returns the item that was + // replaced, or emptyresponse if no item was replaced. + T get(T item); + bool delete(T item); + // Calls process on each item in the collection until process returns false. + void forEach(bool process(T item)); +} + +struct NaiveSet_T { + private T[] buffer = new T[0]; + private T emptyresponse; + private bool equiv(T a, T b); + + void operator init(bool equiv(T a, T b), T emptyresponse) { + this.equiv = equiv; + this.emptyresponse = emptyresponse; + } + + int size() { + return buffer.length; + } + + bool contains(T item) { + for (T a : buffer) { + if (equiv(a, item)) { + return true; + } + } + return false; + } + + bool insert(T item) { + if (contains(item)) { + return false; + } + buffer.push(item); + return true; + } + + T replace(T item) { + for (int i = 0; i < buffer.length; ++i) { + if (equiv(buffer[i], item)) { + T old = buffer[i]; + buffer[i] = item; + return old; + } + } + buffer.push(item); + return emptyresponse; + } + + T get(T item) { + for (T a : buffer) { + if (equiv(a, item)) { + return a; + } + } + return emptyresponse; + } + + bool delete(T item) { + for (int i = 0; i < buffer.length; ++i) { + if (equiv(buffer[i], item)) { + buffer[i] = buffer[buffer.length - 1]; + buffer.pop(); + return true; + } + } + return false; + } + + void forEach(bool process(T item)) { + for (T a : buffer) { + if (!process(a)) { + return; + } + } + } + +} + +Set_T operator cast(NaiveSet_T naiveSet) { + Set_T set = new Set_T; + set.size = naiveSet.size; + set.contains = naiveSet.contains; + set.insert = naiveSet.insert; + set.replace = naiveSet.replace; + set.get = naiveSet.get; + set.delete = naiveSet.delete; + set.forEach = naiveSet.forEach; + return set; +} + +T[] operator cast(Set_T set) { + T[] buffer = new T[set.size()]; + int i = 0; + set.forEach(new bool(T item) { + buffer[i] = item; + ++i; + return true; + }); + return buffer; +} + +Set_T makeNaiveSet(bool equiv(T, T), T emptyresponse) { + return NaiveSet_T(equiv, emptyresponse); +} \ No newline at end of file diff --git a/base/sortedset.asy b/base/sortedset.asy index 71bffa503..bae09958b 100644 --- a/base/sortedset.asy +++ b/base/sortedset.asy @@ -1,5 +1,7 @@ typedef import(T); +from pureset(T=T) access Set_T, operator cast, makeNaiveSet; + struct SortedSet_T { int size(); bool empty() { return size() == 0; } @@ -17,15 +19,41 @@ struct SortedSet_T { T max(); // Returns emptyresponse if collection is empty. T popMax(); // Returns emptyresponse if collection is empty. bool insert(T item); // Returns true iff the collection is modified. + T replace(T item); // Inserts item, and returns the item that was + // replaced, or emptyresponse if no item was replaced. T get(T item); // Returns the item in the collection that is // equivalent to item, or emptyresponse if there is no // such item. bool delete(T item); // Returns true iff the collection is modified. // Calls process on each item in the collection, in ascending order, // until process returns false. - void foreach(bool process(T item)); + void forEach(bool process(T item)); +} + +T[] operator cast(SortedSet_T set) { + T[] result; + set.forEach(new bool(T item) { + result.push(item); + return true; + }); + return result; } +Set_T unSort(SortedSet_T sorted_set) { + Set_T set = new Set_T; + set.size = sorted_set.size; + set.empty = sorted_set.empty; + set.contains = sorted_set.contains; + set.insert = sorted_set.insert; + set.replace = sorted_set.replace; + set.get = sorted_set.get; + set.delete = sorted_set.delete; + set.forEach = sorted_set.forEach; + return set; +} + +Set_T operator cast(SortedSet_T) = unSort; + // For testing purposes, we provide a naive implementation of SortedSet_T. // This implementation is highly inefficient, but it is correct, and can be // used to test other implementations of SortedSet_T. @@ -57,14 +85,14 @@ struct NaiveSortedSet_T { } bool contains(T item) { - for (T possibility in buffer) { + for (T possibility : buffer) { if (equiv(possibility, item)) return true; } return false; } T after(T item) { - for (T possibility in buffer) { + for (T possibility : buffer) { if (gt(possibility, item)) return possibility; } return emptyresponse; @@ -112,8 +140,24 @@ struct NaiveSortedSet_T { return true; } + T replace(T item) { + for (int i = 0; i < buffer.length; ++i) { + if (equiv(buffer[i], item)) { + T toreturn = buffer[i]; + buffer[i] = item; + return toreturn; + } + else if (gt(buffer[i], item)) { + buffer.insert(i, item); + return emptyresponse; + } + } + buffer.push(item); + return emptyresponse; + } + T get(T item) { - for (T possibility in buffer) { + for (T possibility : buffer) { if (equiv(possibility, item)) return possibility; } return emptyresponse; @@ -129,9 +173,36 @@ struct NaiveSortedSet_T { return false; } - void foreach(bool process(T item)) { - for (T item in buffer) { + void forEach(bool process(T item)) { + for (T item : buffer) { if (!process(item)) break; } } } + +SortedSet_T operator cast(NaiveSortedSet_T naive) { + SortedSet_T toreturn; + toreturn.size = naive.size; + toreturn.contains = naive.contains; + toreturn.after = naive.after; + toreturn.before = naive.before; + toreturn.min = naive.min; + toreturn.popMin = naive.popMin; + toreturn.max = naive.max; + toreturn.popMax = naive.popMax; + toreturn.insert = naive.insert; + toreturn.replace = naive.replace; + toreturn.get = naive.get; + toreturn.delete = naive.delete; + toreturn.forEach = naive.forEach; + return toreturn; +} + +// Compose cast operators, since implicit casting is not transitive. +T[] operator cast(NaiveSortedSet_T naive) { + return (SortedSet_T)naive; +} + +SortedSet_T makeNaiveSortedSet(bool lessThan(T, T), T emptyresponse) { + return NaiveSortedSet_T(lessThan, emptyresponse); +} \ No newline at end of file diff --git a/base/wrapper.asy b/base/wrapper.asy new file mode 100644 index 000000000..1e44cdd79 --- /dev/null +++ b/base/wrapper.asy @@ -0,0 +1,12 @@ +typedef import(T); + +struct Wrapper_T { + T t; + void operator init(T t) { + this.t = t; + } +} + +Wrapper_T wrap(T t) { + return Wrapper_T(t); +} \ No newline at end of file diff --git a/tests/datastructures/sortedsetTest.asy b/tests/datastructures/sortedsetTest.asy new file mode 100644 index 000000000..c22b4e620 --- /dev/null +++ b/tests/datastructures/sortedsetTest.asy @@ -0,0 +1,260 @@ +import TestLib; + +StartTest("NaiveSortedSet"); + +from wrapper(T=int) access + Wrapper_T as wrapped_int, + wrap, + alias; + +bool operator < (wrapped_int a, wrapped_int b) { + return a.t < b.t; +} + +bool operator == (wrapped_int a, wrapped_int b) { + return a.t == b.t; +} + +// ISSUE: We have to import these from sortedset. If we import directly from +// pureset instead, identical types are not recognized as such when resolving +// function calls and implicit casts. +// from pureset(T=wrapped_int) access +// Set_T as Set_wrapped_int, +// makeNaiveSet, +// operator cast; + +from sortedset(T=wrapped_int) access + Set_T as Set_wrapped_int, + makeNaiveSet, + SortedSet_T as SortedSet_wrapped_int, + makeNaiveSortedSet, + operator cast, + unSort; + +struct ActionEnum { + static restricted int numActions = 0; + static private int next() { + return ++numActions - 1; + } + static restricted int INSERT = next(); + static restricted int REPLACE = next(); + static restricted int DELETE = next(); + static restricted int CONTAINS = next(); + static restricted int DELETE_CONTAINS = next(); +} + +from zip(T=int) access zip; +//from sort(T=wrapped_int) access mergeSort as sort; +from mapArray(Src=wrapped_int, Dst=int) access map; +int get(wrapped_int a) { + return a.t; +} + +int[] operator cast(wrapped_int[] a) { + for (wrapped_int x : a) { + assert(!alias(x, null), 'Null element in array'); + } + return map(get, a); +} + +string differences(Set_wrapped_int a, Set_wrapped_int b) { + if (a.size() != b.size()) { + return 'Different sizes: ' + string(a.size()) + ' vs ' + string(b.size()); + } + wrapped_int[] aArray = sort(a, operator<); + int[] aIntArray = map(get, aArray); + wrapped_int[] bArray = sort(b, operator<); + int[] bIntArray = map(get, bArray); + string arrayValues = '[\n'; + bool different = false; + for (int i = 0; i < aIntArray.length; ++i) { + arrayValues += ' [' + format('%5d', aIntArray[i]) + ',' + + format('%5d', bIntArray[i]) + ']'; + if (!alias(aArray[i], bArray[i])) { + arrayValues += ' <---'; + different = true; + } + arrayValues += '\n'; + } + arrayValues += ']'; + // write(arrayValues + '\n'); + if (different) { + return arrayValues; + } + return ''; +} + +string string(int[] a) { + string result = '['; + for (int i = 0; i < a.length; ++i) { + if (i > 0) { + result += ', '; + } + result += string(a[i]); + } + result += ']'; + return result; +} + +string string(bool[] a) { + string result = '['; + for (int i = 0; i < a.length; ++i) { + if (i > 0) { + result += ', '; + } + result += a[i] ? 'true' : 'false'; + } + result += ']'; + return result; +} + +typedef void Action(int ...Set_wrapped_int[]); + +Action[] actions = new Action[ActionEnum.numActions]; +actions[ActionEnum.INSERT] = new void(int maxItem ...Set_wrapped_int[] sets) { + wrapped_int toInsert = wrap(rand() % maxItem); + // write('Inserting ' + string(toInsert.t) + '\n'); + for (Set_wrapped_int s : sets) { + s.insert(toInsert); + } +}; +actions[ActionEnum.REPLACE] = new void(int maxItem ...Set_wrapped_int[] sets) { + wrapped_int toReplace = wrap(rand() % maxItem); + // write('Replacing ' + string(toReplace.t) + '\n'); + wrapped_int[] results = new wrapped_int[]; + for (Set_wrapped_int s : sets) { + results.push(s.replace(toReplace)); + } + if (results.length > 0) { + wrapped_int expected = results[0]; + for (wrapped_int r : results) { + if (!alias(r, expected)) { + assert(false, 'Different results: ' + string(results)); + } + } + } +}; +actions[ActionEnum.DELETE] = new void(int maxItem ...Set_wrapped_int[] sets) { + wrapped_int toDelete = wrap(rand() % maxItem); + // write('Deleting ' + string(toDelete.t) + '\n'); + bool[] results = new bool[]; + for (Set_wrapped_int s : sets) { + results.push(s.delete(toDelete)); + } + if (results.length > 0) { + bool expected = results[0]; + for (bool r : results) { + assert(r == expected, 'Different results: ' + string(results)); + } + } +}; +actions[ActionEnum.CONTAINS] = new void(int maxItem ...Set_wrapped_int[] sets) { + int toCheck = rand() % maxItem; + // write('Checking ' + string(toCheck) + '\n'); + bool[] results = new bool[]; + for (Set_wrapped_int s : sets) { + results.push(s.contains(wrap(toCheck))); + } + if (results.length > 0) { + bool expected = results[0]; + for (bool r : results) { + assert(r == expected, 'Different results: ' + string(results)); + } + } +}; +actions[ActionEnum.DELETE_CONTAINS] = new void(int ...Set_wrapped_int[] sets) { + if (sets.length == 0) { + return; + } + int initialSize = sets[0].size(); + if (initialSize == 0) { + return; + } + int indexToDelete = rand() % initialSize; + int i = 0; + wrapped_int toDelete = null; + bool process(wrapped_int a) { + if (i == indexToDelete) { + toDelete = wrap(a.t); + return false; + } + ++i; + return true; + } + sets[0].forEach(process); + assert(i < initialSize, 'Index out of range'); + // write('Deleting ' + string(toDelete.t) + '\n'); + int i = 0; + for (Set_wrapped_int s : sets) { + assert(s.contains(toDelete), 'Contains failed ' + string(i)); + assert(s.delete(toDelete), 'Delete failed'); + assert(!s.contains(toDelete), 'Contains failed'); + assert(s.size() == initialSize - 1, 'Size failed'); + ++i; + } +}; +real[] increasingProbs = new real[ActionEnum.numActions]; +increasingProbs[ActionEnum.INSERT] = 0.7; +increasingProbs[ActionEnum.REPLACE] = 0.1; +increasingProbs[ActionEnum.DELETE] = 0.05; +increasingProbs[ActionEnum.CONTAINS] = 0.1; +increasingProbs[ActionEnum.DELETE_CONTAINS] = 0.05; +assert(sum(increasingProbs) == 1, 'Probabilities do not sum to 1'); + +real[] decreasingProbs = new real[ActionEnum.numActions]; +decreasingProbs[ActionEnum.INSERT] = 0.1; +decreasingProbs[ActionEnum.REPLACE] = 0.1; +decreasingProbs[ActionEnum.DELETE] = 0.4; +decreasingProbs[ActionEnum.CONTAINS] = 0.1; +decreasingProbs[ActionEnum.DELETE_CONTAINS] = 0.3; +assert(sum(decreasingProbs) == 1, 'Probabilities do not sum to 1'); + +Set_wrapped_int pure_set = makeNaiveSet(operator ==, (wrapped_int)null); +SortedSet_wrapped_int sorted_set = + makeNaiveSortedSet(operator <, (wrapped_int)null); + +int chooseAction(real[] probs) { + real r = unitrand(); + real sum = 0; + for (int i = 0; i < probs.length; ++i) { + sum += probs[i]; + if (r < sum) { + return i; + } + } + return probs.length - 1; +} + +bool isStrictlySorted(wrapped_int[] arr) { + for (int i = 1; i < arr.length; ++i) { + if (!(arr[i - 1] < arr[i])) { + return false; + } + } + return true; +} + +int maxSize = 0; +for (int i = 0; i < 2000; ++i) { + real[] probs = i < 800 ? increasingProbs : decreasingProbs; + int choice = chooseAction(probs); + actions[choice](100, pure_set, sorted_set); + string diffs = differences(pure_set, sorted_set); + assert(diffs == '', 'Pure vs sorted: \n' + diffs); + assert(isStrictlySorted(sorted_set), 'Not sorted'); + maxSize = max(maxSize, pure_set.size()); +} +// write('Max size: ' + string(maxSize) + '\n'); + +// int maxSize = 0; +// for (int i = 0; i < 2000; ++i) { +// real[] probs = i < 800 ? increasingProbs : decreasingProbs; +// int choice = chooseAction(probs); +// actions[choice](1000, pure_set, unSort(sorted_set)); +// string diffs = differences(pure_set, sorted_set); +// assert(diffs == '', 'Pure vs sorted: \n' + diffs); +// maxSize = max(maxSize, pure_set.size()); +// } +// write('Max size: ' + string(maxSize) + '\n'); + +EndTest(); \ No newline at end of file From 869763463a044658f460c82e684df690b19aaade Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Sun, 25 Feb 2024 13:27:13 -0800 Subject: [PATCH 024/133] Set of small positive ints (using bool[]). --- base/set_smallpositiveint.asy | 95 +++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 base/set_smallpositiveint.asy diff --git a/base/set_smallpositiveint.asy b/base/set_smallpositiveint.asy new file mode 100644 index 000000000..394f4692f --- /dev/null +++ b/base/set_smallpositiveint.asy @@ -0,0 +1,95 @@ +from pureset(int) access + Set_T as set_int, + operator cast; + +struct Set_smallPositiveInt { + bool[] buffer = new bool[]; + + int size() { + return sum(buffer); + } + + bool empty() { + return all(!buffer); + } + + bool contains(int item) { + if (item < 0 || item >= buffer.length) { + return false; + } + return buffer[item]; + } + + bool insert(int item) { + if (item < 0) { + return false; + } + while (item >= buffer.length) { + buffer.push(false); + } + if (buffer[item]) { + return false; + } + buffer[item] = true; + return true; + } + + int replace(int item) { + if (item < 0) { + return -1; + } + while (item >= buffer.length) { + buffer.push(false); + } + if (buffer[item]) { + return item; + } + buffer[item] = true; + return -1; + } + + int get(int item) { + if (item < 0 || item >= buffer.length) { + return -1; + } + if (buffer[item]) { + return item; + } + return -1; + } + + bool delete(int item) { + if (item < 0 || item >= buffer.length) { + return false; + } + if (buffer[item]) { + buffer[item] = false; + return true; + } + return false; + } + + void foreach(bool process(int item)) { + for (int i = 0; i < buffer.length; ++i) { + if (buffer[i]) { + if (!process(i)) { + return; + } + } + } + } + +} + +Set_int operator cast(Set_smallPositiveInt set) { + Set_int result = new Set_int; + result.size = set.size; + result.empty = set.empty; + result.contains = set.contains; + result.insert = set.insert; + result.replace = set.replace; + result.get = set.get; + result.delete = set.delete; + result.foreach = set.foreach; + return result; +} \ No newline at end of file From 71480ff136424fe589af48873dc67c24efe78b51 Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Mon, 26 Feb 2024 13:51:15 -0800 Subject: [PATCH 025/133] Splaytree with preliminary testing. --- base/splaytree.asy | 361 ++++++++++++++++++++++--- tests/datastructures/splaytreeTest.asy | 241 +++++++++++++++++ 2 files changed, 570 insertions(+), 32 deletions(-) create mode 100644 tests/datastructures/splaytreeTest.asy diff --git a/base/splaytree.asy b/base/splaytree.asy index b6de6bb65..885f9a006 100644 --- a/base/splaytree.asy +++ b/base/splaytree.asy @@ -1,5 +1,8 @@ typedef import(T); +from sortedset(T=T) access Set_T, SortedSet_T, operator cast, + makeNaiveSortedSet; // needed for bug workaround: https://github.com/vectorgraphics/asymptote/issues/429 + private struct treenode { treenode leftchild; treenode rightchild; @@ -8,13 +11,65 @@ private struct treenode { this.value = value; } - void inOrder(void run(T)) { - if (leftchild != null) leftchild.inOrder(run); - run(value); - if (rightchild != null) rightchild.inOrder(run); + bool inOrder(bool run(T)) { + if (leftchild != null) { + if (!leftchild.inOrder(run)) return false; + } + if (!run(value)) return false; + if (rightchild != null) { + if (!rightchild.inOrder(run)) return false; + } + return true; + } +} + +private struct NodeProgressEnum { + restricted static int num = 0; + private static int make() { + return (++num - 1); } + static int NOT_STARTED = make(); + static int LEFT_DONE = make(); + static int SELF_DONE = make(); + static int RIGHT_DONE = make(); } +private struct NodeInProgress { + treenode node; + int progress = NodeProgressEnum.NOT_STARTED; + void operator init(treenode node) { + this.node = node; + } +} + +void inOrderNonRecursive(treenode root, bool run(T)) { + if (root == null) return; + NodeInProgress[] stack = new NodeInProgress[0]; + stack.cyclic = true; + stack.push(NodeInProgress(root)); + while (stack.length > 0) { + NodeInProgress current = stack[-1]; + if (current.progress == NodeProgressEnum.NOT_STARTED) { + if (current.node.leftchild != null) { + stack.push(NodeInProgress(current.node.leftchild)); + } + current.progress = NodeProgressEnum.LEFT_DONE; + } else if (current.progress == NodeProgressEnum.LEFT_DONE) { + if (!run(current.node.value)) return; + current.progress = NodeProgressEnum.SELF_DONE; + } else if (current.progress == NodeProgressEnum.SELF_DONE) { + if (current.node.rightchild != null) { + stack.push(NodeInProgress(current.node.rightchild)); + } + current.progress = NodeProgressEnum.RIGHT_DONE; + } else { + assert(current.progress == NodeProgressEnum.RIGHT_DONE); + stack.pop(); + } + } +} + + private treenode splay(treenode[] ancestors, bool lessthan(T a, T b)) { bool operator < (T a, T b) = lessthan; @@ -92,13 +147,24 @@ private treenode splay(treenode[] ancestors, bool lessthan(T a, T b)) { return current; } -struct splaytree_T { - treenode root = null; +struct SplayTree_T { + private treenode root = null; restricted int size = 0; private bool operator < (T a, T b); + private T emptyresponse; - void operator init(bool lessthan(T,T)) { + void operator init(bool lessthan(T,T), T emptyresponse) { operator< = lessthan; + this.emptyresponse = emptyresponse; + } + + int size() { + return size; + } + + bool empty() { + assert((root == null) == (size == 0)); + return root == null; } bool contains(T value) { @@ -122,10 +188,172 @@ struct splaytree_T { return true; } + T after(T item) { + treenode[] parentStack = new treenode[0]; + parentStack.cyclic = true; + parentStack.push(root); + T strictUpperBound = emptyresponse; + bool found = false; + while (true) { + treenode current = parentStack[-1]; + if (current == null) { + parentStack.pop(); + root = splay(parentStack, operator<); + return strictUpperBound; + } + if (found || item < current.value) { + strictUpperBound = current.value; + parentStack.push(current.leftchild); + } else { + parentStack.push(current.rightchild); + if (!(current.value < item)) + found = true; + } + } + assert(false, "Unreachable code"); + return emptyresponse; + } + + T before(T item) { + treenode[] parentStack = new treenode[0]; + parentStack.cyclic = true; + parentStack.push(root); + T strictLowerBound = emptyresponse; + bool found = false; + while (true) { + treenode current = parentStack[-1]; + if (current == null) { + parentStack.pop(); + root = splay(parentStack, operator<); + return strictLowerBound; + } + if (found || current.value < item) { + strictLowerBound = current.value; + parentStack.push(current.rightchild); + } else { + parentStack.push(current.leftchild); + if (!(item < current.value)) + found = true; + } + } + assert(false, "Unreachable code"); + return emptyresponse; + } + + T firstGEQ(T item) { + treenode[] parentStack = new treenode[0]; + parentStack.cyclic = true; + parentStack.push(root); + T upperBound = emptyresponse; + while (true) { + treenode current = parentStack[-1]; + if (current == null) { + parentStack.pop(); + root = splay(parentStack, operator<); + return upperBound; + } + if (current.value < item) { + parentStack.push(current.rightchild); + } else if (item < current.value) { + upperBound = current.value; + parentStack.push(current.leftchild); + } else { + root = splay(parentStack, operator<); + return current.value; + } + } + assert(false, "Unreachable code"); + return emptyresponse; + } + + T firstLEQ(T item) { + treenode[] parentStack = new treenode[0]; + parentStack.cyclic = true; + parentStack.push(root); + T lowerBound = emptyresponse; + while (true) { + treenode current = parentStack[-1]; + if (current == null) { + parentStack.pop(); + root = splay(parentStack, operator<); + return lowerBound; + } + if (item < current.value) { + parentStack.push(current.leftchild); + } else if (current.value < item) { + lowerBound = current.value; + parentStack.push(current.rightchild); + } else { + root = splay(parentStack, operator<); + return current.value; + } + } + assert(false, "Unreachable code"); + return emptyresponse; + } + + T min() { + if (root == null) return emptyresponse; + treenode[] ancestors = new treenode[0]; + ancestors.cyclic = true; + treenode current = root; + while (current != null) { + ancestors.push(current); + current = current.leftchild; + } + root = splay(ancestors, operator<); + return root.value; + } + + T popMin() { + if (root == null) return emptyresponse; + treenode[] ancestors = new treenode[0]; + ancestors.cyclic = true; + treenode current = root; + while (current != null) { + ancestors.push(current); + current = current.leftchild; + } + root = splay(ancestors, operator<); + T toReturn = root.value; + root = root.rightchild; + --size; + return toReturn; + } + + T max() { + if (root == null) return emptyresponse; + treenode[] ancestors = new treenode[0]; + ancestors.cyclic = true; + treenode current = root; + while (current != null) { + ancestors.push(current); + current = current.rightchild; + } + root = splay(ancestors, operator<); + return root.value; + } + + T popMax() { + if (root == null) return emptyresponse; + treenode[] ancestors = new treenode[0]; + ancestors.cyclic = true; + treenode current = root; + while (current != null) { + ancestors.push(current); + current = current.rightchild; + } + root = splay(ancestors, operator<); + T toReturn = root.value; + root = root.leftchild; + --size; + return toReturn; + } + /* * returns true iff the tree was modified */ - bool add(T value) { + bool insert(T value) { if (root == null) { root = treenode(value); ++size; @@ -162,6 +390,69 @@ struct splaytree_T { return true; } + T replace(T item) { + if (root == null) { + insert(item); + return emptyresponse; + } + treenode[] ancestors = new treenode[0]; + ancestors.cyclic = true; + ancestors.push(root); + treenode current = root; + while (true) { + if (item < current.value) { + if (current.leftchild == null) { + current.leftchild = treenode(item); + ancestors.push(current.leftchild); + break; + } + ancestors.push(current.leftchild); + current = current.leftchild; + } else if (current.value < item) { + if (current.rightchild == null) { + current.rightchild = treenode(item); + ancestors.push(current.rightchild); + break; + } + ancestors.push(current.rightchild); + current = current.rightchild; + } else { + T toReturn = current.value; + current.value = item; + root = splay(ancestors, operator<); + return toReturn; + } + } + root = splay(ancestors, operator<); + ++size; + return emptyresponse; + } + + T get(T item) { + if (root == null) return emptyresponse; + treenode[] parentStack = new treenode[0]; + parentStack.cyclic = true; + parentStack.push(root); + while (true) { + treenode current = parentStack[-1]; + if (current == null) { + parentStack.pop(); + root = splay(parentStack, operator<); + return emptyresponse; + } + if (item < current.value) { + parentStack.push(current.leftchild); + } else if (current.value < item) { + parentStack.push(current.rightchild); + } else { + root = splay(parentStack, operator<); + return current.value; + } + } + assert(false, "Unreachable code"); + return emptyresponse; + } + /* * returns true iff the tree was modified */ @@ -220,31 +511,37 @@ struct splaytree_T { return true; } - T popMax(T default) { - if (root == null) return default; - treenode[] ancestors = new treenode[0]; - ancestors.cyclic = true; - treenode current = root; - while (current != null) { - ancestors.push(current); - current = current.rightchild; - } - root = splay(ancestors, operator<); - T toReturn = root.value; - // TODO(optimization): Refactor deleteRoot out of the delete function, - // and call deleteRoot instead of delete. - delete(toReturn); - return toReturn; + void forEach(bool run(T)) { + inOrderNonRecursive(root, run); } + +} - bool empty() { - assert((root == null) == (size == 0)); - return root == null; - } +SortedSet_T operator cast(SplayTree_T splaytree) { + SortedSet_T result = new SortedSet_T; + result.size = splaytree.size; + result.empty = splaytree.empty; + result.contains = splaytree.contains; + result.after = splaytree.after; + result.before = splaytree.before; + result.firstGEQ = splaytree.firstGEQ; + result.firstLEQ = splaytree.firstLEQ; + result.min = splaytree.min; + result.popMin = splaytree.popMin; + result.max = splaytree.max; + result.popMax = splaytree.popMax; + result.insert = splaytree.insert; + result.replace = splaytree.replace; + result.get = splaytree.get; + result.delete = splaytree.delete; + result.forEach = splaytree.forEach; + return result; +} - void forEach(void run(T)) { - if (root == null) return; - root.inOrder(run); - } - +Set_T operator cast(SplayTree_T splaytree) { + return (SortedSet_T)splaytree; } + +T[] operator cast(SplayTree_T splaytree) { + return (SortedSet_T)splaytree; +} \ No newline at end of file diff --git a/tests/datastructures/splaytreeTest.asy b/tests/datastructures/splaytreeTest.asy new file mode 100644 index 000000000..0eb6fb0b6 --- /dev/null +++ b/tests/datastructures/splaytreeTest.asy @@ -0,0 +1,241 @@ + +import TestLib; + +StartTest("SplayTree_as_Set"); + +from wrapper(T=int) access + Wrapper_T as wrapped_int, + wrap, + alias; + +bool operator < (wrapped_int a, wrapped_int b) { + return a.t < b.t; +} + +bool operator == (wrapped_int a, wrapped_int b) { + return a.t == b.t; +} + +from splaytree(T=wrapped_int) access + makeNaiveSortedSet, + SortedSet_T as SortedSet_wrapped_int, + SplayTree_T as SplayTree_wrapped_int, + operator cast; + +struct ActionEnum { + static restricted int numActions = 0; + static private int next() { + return ++numActions - 1; + } + static restricted int INSERT = next(); + static restricted int REPLACE = next(); + static restricted int DELETE = next(); + static restricted int CONTAINS = next(); + static restricted int DELETE_CONTAINS = next(); +} + +from mapArray(Src=wrapped_int, Dst=int) access map; +int get(wrapped_int a) { + return a.t; +} + +int[] operator cast(wrapped_int[] a) { + for (wrapped_int x : a) { + assert(!alias(x, null), 'Null element in array'); + } + return map(get, a); +} + +string differences(SortedSet_wrapped_int a, SortedSet_wrapped_int b) { + if (a.size() != b.size()) { + return 'Different sizes: ' + string(a.size()) + ' vs ' + string(b.size()); + } + wrapped_int[] aArray = a; + int[] aIntArray = aArray; + wrapped_int[] bArray = b; + int[] bIntArray = bArray; + string arrayValues = '[\n'; + bool different = false; + for (int i = 0; i < aIntArray.length; ++i) { + arrayValues += ' [' + format('%5d', aIntArray[i]) + ',' + + format('%5d', bIntArray[i]) + ']'; + if (!alias(aArray[i], bArray[i])) { + arrayValues += ' <---'; + different = true; + } + arrayValues += '\n'; + } + arrayValues += ']'; + // write(arrayValues + '\n'); + if (different) { + return arrayValues; + } + return ''; +} + +string string(int[] a) { + string result = '['; + for (int i = 0; i < a.length; ++i) { + if (i > 0) { + result += ', '; + } + result += string(a[i]); + } + result += ']'; + return result; +} + +string string(bool[] a) { + string result = '['; + for (int i = 0; i < a.length; ++i) { + if (i > 0) { + result += ', '; + } + result += a[i] ? 'true' : 'false'; + } + result += ']'; + return result; +} + +typedef void Action(int ...SortedSet_wrapped_int[]); + +Action[] actions = new Action[ActionEnum.numActions]; +actions[ActionEnum.INSERT] = new void(int maxItem ...SortedSet_wrapped_int[] sets) { + wrapped_int toInsert = wrap(rand() % maxItem); + // write('Inserting ' + string(toInsert.t) + '\n'); + for (SortedSet_wrapped_int s : sets) { + s.insert(toInsert); + } +}; +actions[ActionEnum.REPLACE] = new void(int maxItem ...SortedSet_wrapped_int[] sets) { + wrapped_int toReplace = wrap(rand() % maxItem); + // write('Replacing ' + string(toReplace.t) + '\n'); + wrapped_int[] results = new wrapped_int[]; + for (SortedSet_wrapped_int s : sets) { + results.push(s.replace(toReplace)); + } + if (results.length > 0) { + wrapped_int expected = results[0]; + for (wrapped_int r : results) { + if (!alias(r, expected)) { + assert(false, 'Different results: ' + string(results)); + } + } + } +}; +actions[ActionEnum.DELETE] = new void(int maxItem ...SortedSet_wrapped_int[] sets) { + wrapped_int toDelete = wrap(rand() % maxItem); + // write('Deleting ' + string(toDelete.t) + '\n'); + bool[] results = new bool[]; + for (SortedSet_wrapped_int s : sets) { + results.push(s.delete(toDelete)); + } + if (results.length > 0) { + bool expected = results[0]; + for (bool r : results) { + assert(r == expected, 'Different results: ' + string(results)); + } + } +}; +actions[ActionEnum.CONTAINS] = new void(int maxItem ...SortedSet_wrapped_int[] sets) { + int toCheck = rand() % maxItem; + // write('Checking ' + string(toCheck) + '\n'); + bool[] results = new bool[]; + for (SortedSet_wrapped_int s : sets) { + results.push(s.contains(wrap(toCheck))); + } + if (results.length > 0) { + bool expected = results[0]; + for (bool r : results) { + assert(r == expected, 'Different results: ' + string(results)); + } + } +}; +actions[ActionEnum.DELETE_CONTAINS] = new void(int ...SortedSet_wrapped_int[] sets) { + if (sets.length == 0) { + return; + } + int initialSize = sets[0].size(); + if (initialSize == 0) { + return; + } + int indexToDelete = rand() % initialSize; + int i = 0; + wrapped_int toDelete = null; + bool process(wrapped_int a) { + if (i == indexToDelete) { + toDelete = wrap(a.t); + return false; + } + ++i; + return true; + } + sets[0].forEach(process); + assert(i < initialSize, 'Index out of range'); + // write('Deleting ' + string(toDelete.t) + '\n'); + int i = 0; + for (SortedSet_wrapped_int s : sets) { + assert(s.contains(toDelete), 'Contains failed ' + string(i)); + assert(s.delete(toDelete), 'Delete failed'); + assert(!s.contains(toDelete), 'Contains failed'); + assert(s.size() == initialSize - 1, 'Size failed'); + ++i; + } +}; +real[] increasingProbs = new real[ActionEnum.numActions]; +increasingProbs[ActionEnum.INSERT] = 0.7; +increasingProbs[ActionEnum.REPLACE] = 0.1; +increasingProbs[ActionEnum.DELETE] = 0.05; +increasingProbs[ActionEnum.CONTAINS] = 0.1; +increasingProbs[ActionEnum.DELETE_CONTAINS] = 0.05; +assert(sum(increasingProbs) == 1, 'Probabilities do not sum to 1'); + +real[] decreasingProbs = new real[ActionEnum.numActions]; +decreasingProbs[ActionEnum.INSERT] = 0.1; +decreasingProbs[ActionEnum.REPLACE] = 0.1; +decreasingProbs[ActionEnum.DELETE] = 0.4; +decreasingProbs[ActionEnum.CONTAINS] = 0.1; +decreasingProbs[ActionEnum.DELETE_CONTAINS] = 0.3; +assert(sum(decreasingProbs) == 1, 'Probabilities do not sum to 1'); + +SortedSet_wrapped_int sorted_set = + makeNaiveSortedSet(operator <, (wrapped_int)null); +SplayTree_wrapped_int splayset = + SplayTree_wrapped_int(operator <, (wrapped_int)null); + +int chooseAction(real[] probs) { + real r = unitrand(); + real sum = 0; + for (int i = 0; i < probs.length; ++i) { + sum += probs[i]; + if (r < sum) { + return i; + } + } + return probs.length - 1; +} + +bool isStrictlySorted(wrapped_int[] arr) { + for (int i = 1; i < arr.length; ++i) { + if (!(arr[i - 1] < arr[i])) { + return false; + } + } + return true; +} + +int maxSize = 0; +for (int i = 0; i < 2000; ++i) { + real[] probs = i < 800 ? increasingProbs : decreasingProbs; + int choice = chooseAction(probs); + actions[choice](100, sorted_set, splayset); + string diffs = differences(sorted_set, splayset); + assert(diffs == '', 'Naive vs splayset: \n' + diffs); + assert(isStrictlySorted(splayset), 'Not sorted'); + maxSize = max(maxSize, splayset.size()); +} +EndTest(); + +StartTest("SplayTree_as_SortedSet"); +write("TODO: Implement this test."); +EndTest(); \ No newline at end of file From b0408a8e44c08674702fef8bbb467b28ac6fb7e3 Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Thu, 29 Feb 2024 20:07:20 -0800 Subject: [PATCH 026/133] More thorough splaytree testing. --- base/sortedset.asy | 10 +- tests/datastructures/sortedsetTest.asy | 3 +- tests/datastructures/splaytreeTest.asy | 474 ++++++++++++++++++++----- 3 files changed, 399 insertions(+), 88 deletions(-) diff --git a/base/sortedset.asy b/base/sortedset.asy index bae09958b..7a0b05dc5 100644 --- a/base/sortedset.asy +++ b/base/sortedset.asy @@ -6,14 +6,17 @@ struct SortedSet_T { int size(); bool empty() { return size() == 0; } bool contains(T item); + T get(T item); // Returns the item in the collection that is + // equivalent to item, or emptyresponse if there is no + // such item. // Returns the least element > item, or emptyresponse if there is no such // element. T after(T item); // Returns the greatest element < item, or emptyresponse if there is no such // element. T before(T item); - T firstGEQ(T item) { return contains(item) ? item : after(item); } - T firstLEQ(T item) { return contains(item) ? item : before(item); } + T firstGEQ(T item) { return contains(item) ? get(item) : after(item); } + T firstLEQ(T item) { return contains(item) ? get(item) : before(item); } T min(); // Returns emptyresponse if collection is empty. T popMin(); // Returns emptyresponse if collection is empty. T max(); // Returns emptyresponse if collection is empty. @@ -21,9 +24,6 @@ struct SortedSet_T { bool insert(T item); // Returns true iff the collection is modified. T replace(T item); // Inserts item, and returns the item that was // replaced, or emptyresponse if no item was replaced. - T get(T item); // Returns the item in the collection that is - // equivalent to item, or emptyresponse if there is no - // such item. bool delete(T item); // Returns true iff the collection is modified. // Calls process on each item in the collection, in ascending order, // until process returns false. diff --git a/tests/datastructures/sortedsetTest.asy b/tests/datastructures/sortedsetTest.asy index c22b4e620..9713c101d 100644 --- a/tests/datastructures/sortedsetTest.asy +++ b/tests/datastructures/sortedsetTest.asy @@ -148,7 +148,8 @@ actions[ActionEnum.DELETE] = new void(int maxItem ...Set_wrapped_int[] sets) { } } }; -actions[ActionEnum.CONTAINS] = new void(int maxItem ...Set_wrapped_int[] sets) { +actions[ActionEnum.CONTAINS] = new void(int maxItem ...Set_wrapped_int[] sets) +{ int toCheck = rand() % maxItem; // write('Checking ' + string(toCheck) + '\n'); bool[] results = new bool[]; diff --git a/tests/datastructures/splaytreeTest.asy b/tests/datastructures/splaytreeTest.asy index 0eb6fb0b6..cb5bf6295 100644 --- a/tests/datastructures/splaytreeTest.asy +++ b/tests/datastructures/splaytreeTest.asy @@ -100,88 +100,93 @@ string string(bool[] a) { typedef void Action(int ...SortedSet_wrapped_int[]); Action[] actions = new Action[ActionEnum.numActions]; -actions[ActionEnum.INSERT] = new void(int maxItem ...SortedSet_wrapped_int[] sets) { - wrapped_int toInsert = wrap(rand() % maxItem); - // write('Inserting ' + string(toInsert.t) + '\n'); - for (SortedSet_wrapped_int s : sets) { - s.insert(toInsert); - } -}; -actions[ActionEnum.REPLACE] = new void(int maxItem ...SortedSet_wrapped_int[] sets) { - wrapped_int toReplace = wrap(rand() % maxItem); - // write('Replacing ' + string(toReplace.t) + '\n'); - wrapped_int[] results = new wrapped_int[]; - for (SortedSet_wrapped_int s : sets) { - results.push(s.replace(toReplace)); - } - if (results.length > 0) { - wrapped_int expected = results[0]; - for (wrapped_int r : results) { - if (!alias(r, expected)) { - assert(false, 'Different results: ' + string(results)); +actions[ActionEnum.INSERT] = + new void(int maxItem ...SortedSet_wrapped_int[] sets) { + wrapped_int toInsert = wrap(rand() % maxItem); + // write('Inserting ' + string(toInsert.t) + '\n'); + for (SortedSet_wrapped_int s : sets) { + s.insert(toInsert); } - } - } -}; -actions[ActionEnum.DELETE] = new void(int maxItem ...SortedSet_wrapped_int[] sets) { - wrapped_int toDelete = wrap(rand() % maxItem); - // write('Deleting ' + string(toDelete.t) + '\n'); - bool[] results = new bool[]; - for (SortedSet_wrapped_int s : sets) { - results.push(s.delete(toDelete)); - } - if (results.length > 0) { - bool expected = results[0]; - for (bool r : results) { - assert(r == expected, 'Different results: ' + string(results)); - } - } -}; -actions[ActionEnum.CONTAINS] = new void(int maxItem ...SortedSet_wrapped_int[] sets) { - int toCheck = rand() % maxItem; - // write('Checking ' + string(toCheck) + '\n'); - bool[] results = new bool[]; - for (SortedSet_wrapped_int s : sets) { - results.push(s.contains(wrap(toCheck))); - } - if (results.length > 0) { - bool expected = results[0]; - for (bool r : results) { - assert(r == expected, 'Different results: ' + string(results)); - } - } -}; -actions[ActionEnum.DELETE_CONTAINS] = new void(int ...SortedSet_wrapped_int[] sets) { - if (sets.length == 0) { - return; - } - int initialSize = sets[0].size(); - if (initialSize == 0) { - return; - } - int indexToDelete = rand() % initialSize; - int i = 0; - wrapped_int toDelete = null; - bool process(wrapped_int a) { - if (i == indexToDelete) { - toDelete = wrap(a.t); - return false; - } - ++i; - return true; - } - sets[0].forEach(process); - assert(i < initialSize, 'Index out of range'); - // write('Deleting ' + string(toDelete.t) + '\n'); - int i = 0; - for (SortedSet_wrapped_int s : sets) { - assert(s.contains(toDelete), 'Contains failed ' + string(i)); - assert(s.delete(toDelete), 'Delete failed'); - assert(!s.contains(toDelete), 'Contains failed'); - assert(s.size() == initialSize - 1, 'Size failed'); - ++i; - } -}; + }; +actions[ActionEnum.REPLACE] = + new void(int maxItem ...SortedSet_wrapped_int[] sets) { + wrapped_int toReplace = wrap(rand() % maxItem); + // write('Replacing ' + string(toReplace.t) + '\n'); + wrapped_int[] results = new wrapped_int[]; + for (SortedSet_wrapped_int s : sets) { + results.push(s.replace(toReplace)); + } + if (results.length > 0) { + wrapped_int expected = results[0]; + for (wrapped_int r : results) { + if (!alias(r, expected)) { + assert(false, 'Different results: ' + string(results)); + } + } + } + }; +actions[ActionEnum.DELETE] = + new void(int maxItem ...SortedSet_wrapped_int[] sets) { + wrapped_int toDelete = wrap(rand() % maxItem); + // write('Deleting ' + string(toDelete.t) + '\n'); + bool[] results = new bool[]; + for (SortedSet_wrapped_int s : sets) { + results.push(s.delete(toDelete)); + } + if (results.length > 0) { + bool expected = results[0]; + for (bool r : results) { + assert(r == expected, 'Different results: ' + string(results)); + } + } + }; +actions[ActionEnum.CONTAINS] = + new void(int maxItem ...SortedSet_wrapped_int[] sets) { + int toCheck = rand() % maxItem; + // write('Checking ' + string(toCheck) + '\n'); + bool[] results = new bool[]; + for (SortedSet_wrapped_int s : sets) { + results.push(s.contains(wrap(toCheck))); + } + if (results.length > 0) { + bool expected = results[0]; + for (bool r : results) { + assert(r == expected, 'Different results: ' + string(results)); + } + } + }; +actions[ActionEnum.DELETE_CONTAINS] = + new void(int ...SortedSet_wrapped_int[] sets) { + if (sets.length == 0) { + return; + } + int initialSize = sets[0].size(); + if (initialSize == 0) { + return; + } + int indexToDelete = rand() % initialSize; + int i = 0; + wrapped_int toDelete = null; + bool process(wrapped_int a) { + if (i == indexToDelete) { + toDelete = wrap(a.t); + return false; + } + ++i; + return true; + } + sets[0].forEach(process); + assert(i < initialSize, 'Index out of range'); + // write('Deleting ' + string(toDelete.t) + '\n'); + int i = 0; + for (SortedSet_wrapped_int s : sets) { + assert(s.contains(toDelete), 'Contains failed ' + string(i)); + assert(s.delete(toDelete), 'Delete failed'); + assert(!s.contains(toDelete), 'Contains failed'); + assert(s.size() == initialSize - 1, 'Size failed'); + ++i; + } + }; real[] increasingProbs = new real[ActionEnum.numActions]; increasingProbs[ActionEnum.INSERT] = 0.7; increasingProbs[ActionEnum.REPLACE] = 0.1; @@ -237,5 +242,310 @@ for (int i = 0; i < 2000; ++i) { EndTest(); StartTest("SplayTree_as_SortedSet"); -write("TODO: Implement this test."); + +struct ActionEnum { + static restricted int numActions = 0; + static private int next() { + return ++numActions - 1; + } + static restricted int CONTAINS = next(); + static restricted int AFTER = next(); + static restricted int BEFORE = next(); + static restricted int FIRST_GEQ = next(); + static restricted int FIRST_LEQ = next(); + static restricted int MIN = next(); + static restricted int POP_MIN = next(); + static restricted int MAX = next(); + static restricted int POP_MAX = next(); + static restricted int INSERT = next(); + static restricted int REPLACE = next(); + static restricted int GET = next(); + static restricted int DELETE = next(); + static restricted int DELETE_CONTAINS = next(); +} + +Action[] actions = new Action[ActionEnum.numActions]; +actions[ActionEnum.CONTAINS] = + new void(int maxItem ...SortedSet_wrapped_int[] sets) { + int toCheck = rand() % maxItem; + // write('Checking ' + string(toCheck) + '\n'); + bool[] results = new bool[]; + for (SortedSet_wrapped_int s : sets) { + results.push(s.contains(wrap(toCheck))); + } + if (results.length > 0) { + bool expected = results[0]; + for (bool r : results) { + assert(r == expected, 'Different results: ' + string(results)); + } + } + }; +actions[ActionEnum.AFTER] = + new void(int maxItem ...SortedSet_wrapped_int[] sets) { + int toCheck = rand() % maxItem; + // write('After ' + string(toCheck) + '\n'); + wrapped_int[] results = new wrapped_int[]; + for (SortedSet_wrapped_int s : sets) { + results.push(s.after(wrap(toCheck))); + } + if (results.length > 0) { + wrapped_int expected = results[0]; + for (wrapped_int r : results) { + if (!alias(r, expected)) { + assert(false, 'Different results: ' + string(results)); + } + } + } + }; +actions[ActionEnum.BEFORE] = + new void(int maxItem ...SortedSet_wrapped_int[] sets) { + int toCheck = rand() % maxItem; + // write('Before ' + string(toCheck) + '\n'); + wrapped_int[] results = new wrapped_int[]; + for (SortedSet_wrapped_int s : sets) { + results.push(s.before(wrap(toCheck))); + } + if (results.length > 0) { + wrapped_int expected = results[0]; + for (wrapped_int r : results) { + if (!alias(r, expected)) { + assert(false, 'Different results: ' + string(results)); + } + } + } + }; +actions[ActionEnum.FIRST_GEQ] = + new void(int maxItem ...SortedSet_wrapped_int[] sets) { + int toCheck = rand() % maxItem; + // write('First greater or equal ' + string(toCheck) + '\n'); + wrapped_int[] results = new wrapped_int[]; + for (SortedSet_wrapped_int s : sets) { + results.push(s.firstGEQ(wrap(toCheck))); + } + if (results.length > 0) { + wrapped_int expected = results[0]; + for (wrapped_int r : results) { + if (!alias(r, expected)) { + assert(false, 'Different results: ' + string(results)); + } + } + } + }; +actions[ActionEnum.FIRST_LEQ] = + new void(int maxItem ...SortedSet_wrapped_int[] sets) { + int toCheck = rand() % maxItem; + // write('First less or equal ' + string(toCheck) + '\n'); + wrapped_int[] results = new wrapped_int[]; + for (SortedSet_wrapped_int s : sets) { + results.push(s.firstLEQ(wrap(toCheck))); + } + if (results.length > 0) { + wrapped_int expected = results[0]; + for (wrapped_int r : results) { + if (!alias(r, expected)) { + assert(false, 'Different results: ' + string(results)); + } + } + } + }; +actions[ActionEnum.MIN] = new void(int ...SortedSet_wrapped_int[] sets) { + // write('Min\n'); + wrapped_int[] results = new wrapped_int[]; + for (SortedSet_wrapped_int s : sets) { + results.push(s.min()); + } + if (results.length > 0) { + wrapped_int expected = results[0]; + for (wrapped_int r : results) { + if (!alias(r, expected)) { + assert(false, 'Different results: ' + string(results)); + } + } + } +}; +actions[ActionEnum.POP_MIN] = new void(int ...SortedSet_wrapped_int[] sets) { + // write('Pop min\n'); + wrapped_int[] results = new wrapped_int[]; + for (SortedSet_wrapped_int s : sets) { + results.push(s.popMin()); + } + if (results.length > 0) { + wrapped_int expected = results[0]; + for (wrapped_int r : results) { + if (!alias(r, expected)) { + assert(false, 'Different results: ' + string(results)); + } + } + } +}; +actions[ActionEnum.MAX] = new void(int ...SortedSet_wrapped_int[] sets) { + // write('Max\n'); + wrapped_int[] results = new wrapped_int[]; + for (SortedSet_wrapped_int s : sets) { + results.push(s.max()); + } + if (results.length > 0) { + wrapped_int expected = results[0]; + for (wrapped_int r : results) { + if (!alias(r, expected)) { + assert(false, 'Different results: ' + string(results)); + } + } + } +}; +actions[ActionEnum.POP_MAX] = new void(int ...SortedSet_wrapped_int[] sets) { + // write('Pop max\n'); + wrapped_int[] results = new wrapped_int[]; + for (SortedSet_wrapped_int s : sets) { + results.push(s.popMax()); + } + if (results.length > 0) { + wrapped_int expected = results[0]; + for (wrapped_int r : results) { + if (!alias(r, expected)) { + assert(false, 'Different results: ' + string(results)); + } + } + } +}; +actions[ActionEnum.INSERT] = + new void(int maxItem ...SortedSet_wrapped_int[] sets) { + wrapped_int toInsert = wrap(rand() % maxItem); + // write('Inserting ' + string(toInsert.t) + '\n'); + for (SortedSet_wrapped_int s : sets) { + s.insert(toInsert); + } + }; +actions[ActionEnum.REPLACE] = + new void(int maxItem ...SortedSet_wrapped_int[] sets) { + wrapped_int toReplace = wrap(rand() % maxItem); + // write('Replacing ' + string(toReplace.t) + '\n'); + wrapped_int[] results = new wrapped_int[]; + for (SortedSet_wrapped_int s : sets) { + results.push(s.replace(toReplace)); + } + if (results.length > 0) { + wrapped_int expected = results[0]; + for (wrapped_int r : results) { + if (!alias(r, expected)) { + assert(false, 'Different results: ' + string(results)); + } + } + } + }; +actions[ActionEnum.GET] = new void(int maxItem ...SortedSet_wrapped_int[] sets) +{ + wrapped_int toGet = wrap(rand() % maxItem); + // write('Getting ' + string(toGet) + '\n'); + wrapped_int[] results = new wrapped_int[]; + for (SortedSet_wrapped_int s : sets) { + results.push(s.get(toGet)); + } + if (results.length > 0) { + wrapped_int expected = results[0]; + for (wrapped_int r : results) { + if (!alias(r, expected)) { + assert(false, 'Different results: ' + string(results)); + } + } + } +}; +actions[ActionEnum.DELETE] = + new void(int maxItem ...SortedSet_wrapped_int[] sets) { + wrapped_int toDelete = wrap(rand() % maxItem); + // write('Deleting ' + string(toDelete.t) + '\n'); + bool[] results = new bool[]; + for (SortedSet_wrapped_int s : sets) { + results.push(s.delete(toDelete)); + } + if (results.length > 0) { + bool expected = results[0]; + for (bool r : results) { + assert(r == expected, 'Different results: ' + string(results)); + } + } + }; +actions[ActionEnum.DELETE_CONTAINS] = + new void(int ...SortedSet_wrapped_int[] sets) { + if (sets.length == 0) { + return; + } + int initialSize = sets[0].size(); + if (initialSize == 0) { + return; + } + int indexToDelete = rand() % initialSize; + int i = 0; + wrapped_int toDelete = null; + bool process(wrapped_int a) { + if (i == indexToDelete) { + toDelete = wrap(a.t); + return false; + } + ++i; + return true; + } + sets[0].forEach(process); + assert(i < initialSize, 'Index out of range'); + // write('Deleting ' + string(toDelete.t) + '\n'); + int i = 0; + for (SortedSet_wrapped_int s : sets) { + assert(s.delete(toDelete), 'Delete failed'); + assert(!s.contains(toDelete), 'Contains failed'); + assert(s.size() == initialSize - 1, 'Size failed'); + ++i; + } + }; + +real[] increasingProbs = array(n=ActionEnum.numActions, value=0.0); +// Actions that don't modify the set (except for rebalancing): +increasingProbs[ActionEnum.CONTAINS] = 1 / 2^5; +increasingProbs[ActionEnum.AFTER] = 1 / 2^5; +increasingProbs[ActionEnum.BEFORE] = 1 / 2^5; +increasingProbs[ActionEnum.FIRST_GEQ] = 1 / 2^5; +increasingProbs[ActionEnum.FIRST_LEQ] = 1 / 2^5; +increasingProbs[ActionEnum.MIN] = 1 / 2^5; +increasingProbs[ActionEnum.MAX] = 1 / 2^5; +increasingProbs[ActionEnum.GET] = 1 / 2^5; +// 1/4 probability of this sort of action: +assert(sum(increasingProbs) == 8 / 2^5); +// Actions that might add an element: +increasingProbs[ActionEnum.INSERT] = 1 / 4; +increasingProbs[ActionEnum.REPLACE] = 1 / 4; +assert(sum(increasingProbs) == 3/4); +// Actions that might remove an element: +increasingProbs[ActionEnum.POP_MIN] = 1 / 16; +increasingProbs[ActionEnum.POP_MAX] = 1 / 16; +increasingProbs[ActionEnum.DELETE] = 1 / 16; +increasingProbs[ActionEnum.DELETE_CONTAINS] = 1 / 16; +assert(sum(increasingProbs) == 1, 'Probabilities do not sum to 1'); + +real[] decreasingProbs = copy(increasingProbs); +// Actions that might add an element: +decreasingProbs[ActionEnum.INSERT] = 1 / 8; +decreasingProbs[ActionEnum.REPLACE] = 1 / 8; +// Actions that might remove an element: +decreasingProbs[ActionEnum.POP_MIN] = 1 / 8; +decreasingProbs[ActionEnum.POP_MAX] = 1 / 8; +decreasingProbs[ActionEnum.DELETE] = 1 / 8; +decreasingProbs[ActionEnum.DELETE_CONTAINS] = 1 / 8; +assert(sum(decreasingProbs) == 1, 'Probabilities do not sum to 1'); + +SortedSet_wrapped_int sorted_set = + makeNaiveSortedSet(operator <, (wrapped_int)null); +SplayTree_wrapped_int splayset = + SplayTree_wrapped_int(operator <, (wrapped_int)null); + + +int maxSize = 0; +for (int i = 0; i < 2000; ++i) { + real[] probs = i < 800 ? increasingProbs : decreasingProbs; + int choice = chooseAction(probs); + actions[choice](100, sorted_set, splayset); + string diffs = differences(sorted_set, splayset); + assert(diffs == '', 'Naive vs splayset: \n' + diffs); + assert(isStrictlySorted(splayset), 'Not sorted'); + maxSize = max(maxSize, splayset.size()); +} + EndTest(); \ No newline at end of file From 4c385fa31d75be1faebf40a246d57a10b944c6fb Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Mon, 11 Mar 2024 13:45:01 -0700 Subject: [PATCH 027/133] Preliminary (non-working) hashing code --- .vscode/c_cpp_properties.json | 3 +- Makefile.in | 2 +- hashing.cc | 128 ++++++++++++++++++++++++++++++++++ hashing.h | 18 +++++ runarray.in | 44 ++++++------ runmath.in | 35 +++++++++- runstring.in | 40 ++++------- 7 files changed, 220 insertions(+), 50 deletions(-) create mode 100644 hashing.cc create mode 100644 hashing.h diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json index fe18329bc..841289a2a 100644 --- a/.vscode/c_cpp_properties.json +++ b/.vscode/c_cpp_properties.json @@ -5,7 +5,8 @@ "includePath": [ "${workspaceFolder}/**", "/usr/include/gc", - "/usr/include/tirpc" + "/usr/include/tirpc", + "/usr/local/opt/boost", ], "defines": [ "USEGC", diff --git a/Makefile.in b/Makefile.in index 15b2a7c6b..c21c1f13c 100644 --- a/Makefile.in +++ b/Makefile.in @@ -51,7 +51,7 @@ COREFILES = $(CAMP) $(SYMBOL_FILES) env genv stm dec errormsg \ @getopt@ locate parser program application varinit fundec refaccess \ envcompleter process constructor array Delaunay predicates \ $(PRC) glrender tr shaders jsfile v3dfile tinyexr EXRFiles GLTextures \ - lspserv symbolmaps + lspserv symbolmaps hashing FILES = $(COREFILES) main diff --git a/hashing.cc b/hashing.cc new file mode 100644 index 000000000..3e7bb0673 --- /dev/null +++ b/hashing.cc @@ -0,0 +1,128 @@ +#include "hashing.h" + +#include // For Debugging ONLY +#include +#include + +namespace hashing { + +uint64_t random_bits(int8_t bits) { + static std::random_device *rd = new std::random_device(); + static auto *gen = new std::mt19937_64((*rd)()); + std::uniform_int_distribution dist(0, (UINT64_C(1) << bits) - 1); + return dist(*gen); +} + +uint64_t random_odd(int8_t bits) { + if (bits == 0) { + // There's no odd number with 0 bits, so we return 1. + return 1; + } + uint64_t r = random_bits(bits - 1); + return (r << 1) | 1; +} + +bool checkCycleLength(uint64_t m, int minAllowedLength = 1<<20) { + uint64_t a = UINT64_C(1); + int i; + for (i = 0; i < minAllowedLength; ++i) { + a *= m; + if (a == UINT64_C(1)) return false; + } + return true; +} + +// Checks a simple bit-distribution condition: no byte can be all 0s or all 1s. +bool checkBits(uint64_t a) { + uint8_t* start = reinterpret_cast(&a); + for (size_t i = 0; i < sizeof(uint64_t); ++i) { + uint8_t currentByte = start[i]; + if (currentByte == 0 || currentByte == 0xff) { + return false; + } + } + return true; +} + +// Note: checkCycle is expensive, so it's disabled by default. +uint64_t niceRandomOdd(bool checkCycle = false, int8_t bits = 64) { + uint64_t m; + do { + m = random_odd(bits); + } while (!checkBits(m) || (checkCycle && !checkCycleLength(m))); + return m; +} + +// The id is used to generate different hash functions with the same code +// (by generating different multipliers and bias). +template +uint32_t hash64Tuple(const span tuple) { + static const std::vector *multipliers = []() { + std::vector *v = new std::vector(64); + for (int8_t i = 0; i < 64; ++i) { + (*v)[i] = niceRandomOdd(); + } + return v; + }(); + static const uint64_t bias = niceRandomOdd(); + uint64_t result = bias; + auto tupleIt = tuple.begin(); + auto multiplierIt = multipliers->begin(); + for (; tupleIt != tuple.end() && multiplierIt != multipliers->end(); + ++tupleIt, ++multiplierIt) { + result += (*tupleIt) * (*multiplierIt); + } + return static_cast(result >> 32); +} + +uint64_t hash32Tuple(const span tuple) { + span tuple32 = { + reinterpret_cast(tuple.data()), + tuple.size() * sizeof(uint64_t) / sizeof(uint32_t) + }; + return (static_cast(hash64Tuple<0>(tuple32)) << 32) | + hash64Tuple<1>(tuple32); +} + +uint32_t hashSpan(span s, int8_t bits) { + static constexpr uint64_t p = UINT64_C(1) << 61 - 1; + static const uint64_t coefficient = niceRandomOdd(true, 61); + uint64_t result = 0; + +} + + +uint64_t hashSpan(span s, int8_t bits) { + auto condensedSize = (s.size() + 31) >> 5; // Divide by 32, rounding up. + std::vector condensed{condensedSize}; + for (int i = 0; i < condensedSize; ++i) { + condensed[i] = hash64Tuple(s.subspan(i << 6)); + } + static const uint64_t coefficient = niceRandomOdd(true); + uint64_t result = 0; + for (uint64_t a : condensed) { + result = result * coefficient + a; + } + return result >> (64 - bits); +} + + +uint64_t hashInt(uint64_t h, int8_t bits) { + static const std::vector *multipliers = []() { + std::vector *v = new std::vector(64); + for (int8_t i = 0; i < 64; ++i) { + (*v)[i] = niceRandomOdd(); + } + return v; + }(); + // std::cout << "h: " << h << " bits: " << (int)bits << std::endl; + uint64_t a = (*multipliers)[bits]; + // std::cout << "a: " << a << std::endl; + // std::cout << "h * a: " << (h * a) << std::endl; + // std::cout << "64 - bits: " << (64 - bits) << std::endl; + uint64_t result = (h * a) >> (64 - bits); + // std::cout << "result: " << result << std::endl; + return result; +} + +} // namespace hashing \ No newline at end of file diff --git a/hashing.h b/hashing.h new file mode 100644 index 000000000..748f8462e --- /dev/null +++ b/hashing.h @@ -0,0 +1,18 @@ +#include + +#include "common.h" + +#if __cplusplus < 202002L +#include +using boost::span; +#else +#include +using std::span; +#endif + +namespace hashing { + +uint64_t hashInt(uint64_t h, int8_t bits); +uint64_t hashSpan(span s, int8_t bits); + +} // namespace hashing \ No newline at end of file diff --git a/runarray.in b/runarray.in index f7dcc3948..8b5b2a596 100644 --- a/runarray.in +++ b/runarray.in @@ -30,16 +30,17 @@ callableReal* => realRealFunction() #include "path3.h" #include "Delaunay.h" #include "glrender.h" +#include "hashing.h" -#ifdef __has_include -# if __has_include() -# include -# endif -#endif -#ifndef __cpp_lib_string_view -# define __cpp_lib_string_view 0 +#if __cplusplus < 202002L +#include +using boost::span; +#else +#include +using std::span; #endif + #ifdef HAVE_LIBFFTW3 #include "fftw++.h" static const char *rectangular="matrix must be rectangular"; @@ -945,30 +946,29 @@ Intarray* complement(Intarray *a, Int n) return r; } -Int hash(Intarray *a) +Int hash(Intarray *a, Int bits) { + if (bits > 62) { + ostringstream buf; + buf << "hash can produce at most 62 bits, requested " << bits; + error(buf); + } else if (bits < 0) { + ostringstream buf; + buf << "hash cannot produce negative bits, requested " << bits; + error(buf); + } + int8_t bits8 = static_cast(bits); size_t asize=checkArray(a); #if COMPACT - char* dataPointer = static_cast(static_cast(a->data())); - size_t size = (sizeof(item) / sizeof(char)) * asize; + const uint64_t *start = reinterpret_cast(a->data()); + return hashing::hashSpan({start, asize}, bits8); #else std::vector v{asize}; for (size_t i = 0; i < asize; ++i) { v[i] = static_cast(read(a,i)); } - char* dataPointer = static_cast(static_cast(v->data())); - size_t size = (sizeof(uint64_t) / sizeof(char)) * asize; -#endif -#if __cpp_lib_string_view - std::string_view dataView = {dataPointer, size}; - uint64_t hashResult = std::hash{}(dataView); -#else - std::string dataView = {dataPointer, size}; - uint64_t hashResult = std::hash{}(dataView); + return hashing::hashSpan(v, bits8); #endif - // Zero out the highest two bits to avoid conflicts with DefaultValue and - // Undefined: - return hashResult & UINT64_C(0x3fffffffffffffff); } // Generate the sequence {f(i) : i=0,1,...n-1} given a function f and integer n diff --git a/runmath.in b/runmath.in index 4beba44fc..9eabcc702 100644 --- a/runmath.in +++ b/runmath.in @@ -9,8 +9,9 @@ pair => primPair() realarray* => realArray() pairarray* => pairArray() -#include +#include +#include "hashing.h" #include "mathop.h" #include "path.h" @@ -444,3 +445,35 @@ Int bitreverse(Int a, Int bits) unsigned int bytes=(bits+7)/8; return B[bytes-1]((unsigned long long) a) >> (8*bytes-bits); } + +Int hash(Int h, Int bits) +{ + if (bits > 62) { + ostringstream buf; + buf << "binHash can produce at most 62 bits, requested " << bits; + error(buf); + } else if (bits < 0) { + ostringstream buf; + buf << "binHash cannot produce negative bits, requested " << bits; + error(buf); + } + int8_t bits8 = static_cast(bits); + return hashing::hashInt(static_cast(h), bits8); +} + +Int hash(real h, Int bits) +{ + static_assert(sizeof(real) == sizeof(uint64_t), + "To hash a real, it must be a 64-bit float."); + if (bits > 62) { + ostringstream buf; + buf << "binHash can produce at most 62 bits, requested " << bits; + error(buf); + } else if (bits < 0) { + ostringstream buf; + buf << "binHash cannot produce negative bits, requested " << bits; + error(buf); + } + uint64_t hAsInt = *reinterpret_cast(&h); + return hashing::hashInt(hAsInt, static_cast(bits)); +} \ No newline at end of file diff --git a/runstring.in b/runstring.in index f0a3167d1..ebe72fb9b 100644 --- a/runstring.in +++ b/runstring.in @@ -8,11 +8,13 @@ stringarray2* => stringArray2() #include +#include #include #include #include #include "array.h" +#include "hashing.h" using namespace camp; using namespace vm; @@ -107,17 +109,6 @@ void checkformat(const char *ptr, bool intformat) } } -// Generates one random 62-bit integer that is different every time asy is run. -// The function is not intended to be run more than once, but it will likely -// give different results every time it is run. -// **NOT CRYPTOGRAPHICALLY SECURE** -unsigned long long one_random_int() { - std::random_device rd; - std::uniform_int_distribution - dist(0, 0x3fffffffffffffffULL); - return dist(rd); -} - // Autogenerated routines: @@ -417,23 +408,22 @@ Int ascii(string s) return s.empty() ? -1 : (unsigned char) s[0]; } -Int hash(string s, bool try_consistent=false) +Int hash(string s, Int bits) { - // A single random int that is generated the first time this function is - // run. - static unsigned long long per_run_entropy = one_random_int(); - unsigned long long hash_result = std::hash{}(s); + if (bits > 62) { + ostringstream buf; + buf << "hash can produce at most 62 bits, requested " << bits; + error(buf); + } else if (bits < 0) { + ostringstream buf; + buf << "hash cannot produce negative bits, requested " << bits; + error(buf); + } + int8_t bits8 = static_cast(bits); + uint64_t hashResult = std::hash{}(s); // Zero out the highest two bits to avoid conflicts with DefaultValue and // Undefined: - long long retv = hash_result & 0x3fffffffffffffffULL; - // Mix in the highest two bits in case the hash distribution needs them. - retv ^= (hash_result >> 62); - // Xor with a random bitstring to ensure the hash behaves differently each - // run; see Hyrum's Law. - if (not try_consistent) { - retv ^= per_run_entropy; - } - return retv; + return hashing::hashInt(hashResult, bits8); } string string(Int x) From f19d7a652960bd3f35f6a104bf5017bd99602d9b Mon Sep 17 00:00:00 2001 From: Charles Staats III Date: Mon, 11 Mar 2024 13:45:19 -0700 Subject: [PATCH 028/133] git subrepo clone (merge) https://github.com/google/highwayhash.git highwayhash subrepo: subdir: "highwayhash" merged: "5ad3bf84" upstream: origin: "https://github.com/google/highwayhash.git" branch: "master" commit: "5ad3bf84" git-subrepo: version: "0.4.6" origin: "???" commit: "???" --- highwayhash/.gitignore | 13 + highwayhash/.gitrepo | 12 + highwayhash/.travis.yml | 10 + highwayhash/CMakeLists.txt | 244 ++++ highwayhash/CONTRIBUTING | 27 + highwayhash/LICENSE | 203 ++++ highwayhash/Makefile | 140 +++ highwayhash/README.md | 404 +++++++ highwayhash/c/highwayhash.c | 261 +++++ highwayhash/c/highwayhash.h | 100 ++ highwayhash/c/highwayhash_test.c | 70 ++ .../google3/third_party/highwayhash/WORKSPACE | 1 + highwayhash/highwayhash.3 | 107 ++ highwayhash/highwayhash/arch_specific.cc | 193 +++ highwayhash/highwayhash/arch_specific.h | 179 +++ highwayhash/highwayhash/benchmark.cc | 331 ++++++ highwayhash/highwayhash/c_bindings.cc | 35 + highwayhash/highwayhash/c_bindings.h | 57 + highwayhash/highwayhash/compiler_specific.h | 90 ++ highwayhash/highwayhash/data_parallel.h | 341 ++++++ .../highwayhash/data_parallel_benchmark.cc | 157 +++ highwayhash/highwayhash/data_parallel_test.cc | 175 +++ highwayhash/highwayhash/endianess.h | 108 ++ highwayhash/highwayhash/example.cc | 40 + highwayhash/highwayhash/hh_avx2.cc | 19 + highwayhash/highwayhash/hh_avx2.h | 381 ++++++ highwayhash/highwayhash/hh_buffer.h | 116 ++ highwayhash/highwayhash/hh_neon.cc | 22 + highwayhash/highwayhash/hh_neon.h | 336 ++++++ highwayhash/highwayhash/hh_portable.cc | 19 + highwayhash/highwayhash/hh_portable.h | 302 +++++ highwayhash/highwayhash/hh_sse41.cc | 19 + highwayhash/highwayhash/hh_sse41.h | 336 ++++++ highwayhash/highwayhash/hh_types.h | 50 + highwayhash/highwayhash/hh_vsx.cc | 22 + highwayhash/highwayhash/hh_vsx.h | 335 ++++++ highwayhash/highwayhash/highwayhash.h | 216 ++++ highwayhash/highwayhash/highwayhash_fuzzer.cc | 25 + highwayhash/highwayhash/highwayhash_target.cc | 104 ++ highwayhash/highwayhash/highwayhash_target.h | 91 ++ highwayhash/highwayhash/highwayhash_test.cc | 391 +++++++ .../highwayhash/highwayhash_test_avx2.cc | 19 + .../highwayhash/highwayhash_test_neon.cc | 22 + .../highwayhash/highwayhash_test_portable.cc | 19 + .../highwayhash/highwayhash_test_sse41.cc | 19 + .../highwayhash/highwayhash_test_target.cc | 220 ++++ .../highwayhash/highwayhash_test_target.h | 90 ++ .../highwayhash/highwayhash_test_vsx.cc | 22 + highwayhash/highwayhash/iaca.h | 63 + highwayhash/highwayhash/instruction_sets.cc | 144 +++ highwayhash/highwayhash/instruction_sets.h | 118 ++ highwayhash/highwayhash/load3.h | 144 +++ highwayhash/highwayhash/nanobenchmark.cc | 451 +++++++ highwayhash/highwayhash/nanobenchmark.h | 189 +++ .../highwayhash/nanobenchmark_example.cc | 58 + highwayhash/highwayhash/os_mac.cc | 44 + highwayhash/highwayhash/os_mac.h | 62 + highwayhash/highwayhash/os_specific.cc | 260 +++++ highwayhash/highwayhash/os_specific.h | 54 + highwayhash/highwayhash/profiler.h | 762 ++++++++++++ highwayhash/highwayhash/profiler_example.cc | 97 ++ highwayhash/highwayhash/robust_statistics.h | 135 +++ highwayhash/highwayhash/scalar.h | 352 ++++++ .../highwayhash/scalar_sip_tree_hash.cc | 183 +++ .../highwayhash/scalar_sip_tree_hash.h | 37 + highwayhash/highwayhash/sip_hash.cc | 33 + highwayhash/highwayhash/sip_hash.h | 171 +++ highwayhash/highwayhash/sip_hash_fuzzer.cc | 20 + highwayhash/highwayhash/sip_hash_test.cc | 148 +++ highwayhash/highwayhash/sip_tree_hash.cc | 227 ++++ highwayhash/highwayhash/sip_tree_hash.h | 52 + highwayhash/highwayhash/state_helpers.h | 130 +++ highwayhash/highwayhash/tsc_timer.h | 208 ++++ highwayhash/highwayhash/vector128.h | 796 +++++++++++++ highwayhash/highwayhash/vector256.h | 758 ++++++++++++ highwayhash/highwayhash/vector_neon.h | 1037 +++++++++++++++++ highwayhash/highwayhash/vector_test.cc | 66 ++ highwayhash/highwayhash/vector_test_avx2.cc | 19 + highwayhash/highwayhash/vector_test_neon.cc | 19 + .../highwayhash/vector_test_portable.cc | 19 + highwayhash/highwayhash/vector_test_sse41.cc | 19 + highwayhash/highwayhash/vector_test_target.cc | 225 ++++ highwayhash/highwayhash/vector_test_target.h | 37 + highwayhash/highwayhash_logo.png | Bin 0 -> 45234 bytes .../com/google/highwayhash/HighwayHash.java | 309 +++++ .../google/highwayhash/HighwayHashTest.java | 76 ++ highwayhash/msvc/Benchmark.vcxproj | 165 +++ highwayhash/msvc/HighwayHash.sln | 78 ++ highwayhash/msvc/HighwayHash.vcxproj | 164 +++ highwayhash/msvc/NanobenchmarkExample.vcxproj | 134 +++ highwayhash/msvc/ProfilerExample.vcxproj | 134 +++ highwayhash/msvc/SipHashTest.vcxproj | 141 +++ highwayhash/msvc/VectorTest.vcxproj | 141 +++ 93 files changed, 14972 insertions(+) create mode 100644 highwayhash/.gitignore create mode 100644 highwayhash/.gitrepo create mode 100644 highwayhash/.travis.yml create mode 100644 highwayhash/CMakeLists.txt create mode 100644 highwayhash/CONTRIBUTING create mode 100644 highwayhash/LICENSE create mode 100644 highwayhash/Makefile create mode 100644 highwayhash/README.md create mode 100644 highwayhash/c/highwayhash.c create mode 100644 highwayhash/c/highwayhash.h create mode 100644 highwayhash/c/highwayhash_test.c create mode 100644 highwayhash/google3/third_party/highwayhash/WORKSPACE create mode 100644 highwayhash/highwayhash.3 create mode 100644 highwayhash/highwayhash/arch_specific.cc create mode 100644 highwayhash/highwayhash/arch_specific.h create mode 100644 highwayhash/highwayhash/benchmark.cc create mode 100644 highwayhash/highwayhash/c_bindings.cc create mode 100644 highwayhash/highwayhash/c_bindings.h create mode 100644 highwayhash/highwayhash/compiler_specific.h create mode 100644 highwayhash/highwayhash/data_parallel.h create mode 100644 highwayhash/highwayhash/data_parallel_benchmark.cc create mode 100644 highwayhash/highwayhash/data_parallel_test.cc create mode 100644 highwayhash/highwayhash/endianess.h create mode 100644 highwayhash/highwayhash/example.cc create mode 100644 highwayhash/highwayhash/hh_avx2.cc create mode 100644 highwayhash/highwayhash/hh_avx2.h create mode 100644 highwayhash/highwayhash/hh_buffer.h create mode 100644 highwayhash/highwayhash/hh_neon.cc create mode 100644 highwayhash/highwayhash/hh_neon.h create mode 100644 highwayhash/highwayhash/hh_portable.cc create mode 100644 highwayhash/highwayhash/hh_portable.h create mode 100644 highwayhash/highwayhash/hh_sse41.cc create mode 100644 highwayhash/highwayhash/hh_sse41.h create mode 100644 highwayhash/highwayhash/hh_types.h create mode 100644 highwayhash/highwayhash/hh_vsx.cc create mode 100644 highwayhash/highwayhash/hh_vsx.h create mode 100644 highwayhash/highwayhash/highwayhash.h create mode 100644 highwayhash/highwayhash/highwayhash_fuzzer.cc create mode 100644 highwayhash/highwayhash/highwayhash_target.cc create mode 100644 highwayhash/highwayhash/highwayhash_target.h create mode 100644 highwayhash/highwayhash/highwayhash_test.cc create mode 100644 highwayhash/highwayhash/highwayhash_test_avx2.cc create mode 100644 highwayhash/highwayhash/highwayhash_test_neon.cc create mode 100644 highwayhash/highwayhash/highwayhash_test_portable.cc create mode 100644 highwayhash/highwayhash/highwayhash_test_sse41.cc create mode 100644 highwayhash/highwayhash/highwayhash_test_target.cc create mode 100644 highwayhash/highwayhash/highwayhash_test_target.h create mode 100644 highwayhash/highwayhash/highwayhash_test_vsx.cc create mode 100644 highwayhash/highwayhash/iaca.h create mode 100644 highwayhash/highwayhash/instruction_sets.cc create mode 100644 highwayhash/highwayhash/instruction_sets.h create mode 100644 highwayhash/highwayhash/load3.h create mode 100644 highwayhash/highwayhash/nanobenchmark.cc create mode 100644 highwayhash/highwayhash/nanobenchmark.h create mode 100644 highwayhash/highwayhash/nanobenchmark_example.cc create mode 100644 highwayhash/highwayhash/os_mac.cc create mode 100644 highwayhash/highwayhash/os_mac.h create mode 100644 highwayhash/highwayhash/os_specific.cc create mode 100644 highwayhash/highwayhash/os_specific.h create mode 100644 highwayhash/highwayhash/profiler.h create mode 100644 highwayhash/highwayhash/profiler_example.cc create mode 100644 highwayhash/highwayhash/robust_statistics.h create mode 100644 highwayhash/highwayhash/scalar.h create mode 100644 highwayhash/highwayhash/scalar_sip_tree_hash.cc create mode 100644 highwayhash/highwayhash/scalar_sip_tree_hash.h create mode 100644 highwayhash/highwayhash/sip_hash.cc create mode 100644 highwayhash/highwayhash/sip_hash.h create mode 100644 highwayhash/highwayhash/sip_hash_fuzzer.cc create mode 100644 highwayhash/highwayhash/sip_hash_test.cc create mode 100644 highwayhash/highwayhash/sip_tree_hash.cc create mode 100644 highwayhash/highwayhash/sip_tree_hash.h create mode 100644 highwayhash/highwayhash/state_helpers.h create mode 100644 highwayhash/highwayhash/tsc_timer.h create mode 100644 highwayhash/highwayhash/vector128.h create mode 100644 highwayhash/highwayhash/vector256.h create mode 100644 highwayhash/highwayhash/vector_neon.h create mode 100644 highwayhash/highwayhash/vector_test.cc create mode 100644 highwayhash/highwayhash/vector_test_avx2.cc create mode 100644 highwayhash/highwayhash/vector_test_neon.cc create mode 100644 highwayhash/highwayhash/vector_test_portable.cc create mode 100644 highwayhash/highwayhash/vector_test_sse41.cc create mode 100644 highwayhash/highwayhash/vector_test_target.cc create mode 100644 highwayhash/highwayhash/vector_test_target.h create mode 100644 highwayhash/highwayhash_logo.png create mode 100644 highwayhash/java/com/google/highwayhash/HighwayHash.java create mode 100644 highwayhash/java/com/google/highwayhash/HighwayHashTest.java create mode 100644 highwayhash/msvc/Benchmark.vcxproj create mode 100644 highwayhash/msvc/HighwayHash.sln create mode 100644 highwayhash/msvc/HighwayHash.vcxproj create mode 100644 highwayhash/msvc/NanobenchmarkExample.vcxproj create mode 100644 highwayhash/msvc/ProfilerExample.vcxproj create mode 100644 highwayhash/msvc/SipHashTest.vcxproj create mode 100644 highwayhash/msvc/VectorTest.vcxproj diff --git a/highwayhash/.gitignore b/highwayhash/.gitignore new file mode 100644 index 000000000..1da3cef1b --- /dev/null +++ b/highwayhash/.gitignore @@ -0,0 +1,13 @@ +bin +lib +obj +deps.mk +OWNERS +*.a +*.o +benchmark +nanobenchmark_example +profiler_example +sip_hash_test +vector_test +highwayhash_test diff --git a/highwayhash/.gitrepo b/highwayhash/.gitrepo new file mode 100644 index 000000000..58fee452f --- /dev/null +++ b/highwayhash/.gitrepo @@ -0,0 +1,12 @@ +; DO NOT EDIT (unless you know what you are doing) +; +; This subdirectory is a git "subrepo", and this file is maintained by the +; git-subrepo command. See https://github.com/ingydotnet/git-subrepo#readme +; +[subrepo] + remote = https://github.com/google/highwayhash.git + branch = master + commit = 5ad3bf8444cfc663b11bf367baaa31f36e7ff7c8 + parent = 4c385fa31d75be1faebf40a246d57a10b944c6fb + method = merge + cmdver = 0.4.6 diff --git a/highwayhash/.travis.yml b/highwayhash/.travis.yml new file mode 100644 index 000000000..e05097581 --- /dev/null +++ b/highwayhash/.travis.yml @@ -0,0 +1,10 @@ +language: cpp + +dist: trusty + +compiler: + - clang + - gcc + +script: + - make diff --git a/highwayhash/CMakeLists.txt b/highwayhash/CMakeLists.txt new file mode 100644 index 000000000..c8ae55126 --- /dev/null +++ b/highwayhash/CMakeLists.txt @@ -0,0 +1,244 @@ + +project(highwayhash C CXX) + +cmake_minimum_required(VERSION 3.18) + +# BUILD_SHARED_LIBS is a standard CMake variable, but we declare it here to make +# it prominent in the GUI. +option(BUILD_SHARED_LIBS "Build library as shared." OFF) + +# Force PIC on unix when building shared libs +# see: https://en.wikipedia.org/wiki/Position-independent_code +if(BUILD_SHARED_LIBS AND UNIX) + option(CMAKE_POSITION_INDEPENDENT_CODE "Build with Position Independant Code." ON) +endif() + + +set(PROCESSOR_IS_ARM FALSE) +set(PROCESSOR_IS_AARCH64 FALSE) +set(PROCESSOR_IS_X86 FALSE) +set(PROCESSOR_IS_POWER FALSE) + +if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64)") + set(PROCESSOR_IS_AARCH64 TRUE) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm") + set(PROCESSOR_IS_ARM TRUE) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)") + set(PROCESSOR_IS_X86 TRUE) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)") + set(PROCESSOR_IS_POWER TRUE) +endif() + + +if(CMAKE_COMPILER_IS_GNUCXX OR CLANG) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -O3 -fPIC -pthread -Wno-maybe-uninitialized") + if(PROCESSOR_IS_ARM) + # aarch64 and ARM use the same code, although ARM usually needs an extra flag for NEON. + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=hard -march=armv7-a -mfpu=neon") + endif() +endif() + + +# +# library : highwayhash +# + +set(HH_INCLUDES + ${PROJECT_SOURCE_DIR}/highwayhash/c_bindings.h + ${PROJECT_SOURCE_DIR}/highwayhash/highwayhash.h +) + +set(HH_SOURCES + ${PROJECT_SOURCE_DIR}/highwayhash/c_bindings.cc + ${PROJECT_SOURCE_DIR}/highwayhash/hh_portable.cc + ${PROJECT_SOURCE_DIR}/highwayhash/arch_specific.cc + + ${PROJECT_SOURCE_DIR}/highwayhash/scalar_sip_tree_hash.cc + ${PROJECT_SOURCE_DIR}/highwayhash/sip_hash.cc + ${PROJECT_SOURCE_DIR}/highwayhash/sip_tree_hash.cc + + ${PROJECT_SOURCE_DIR}/highwayhash/hh_portable.h + ${PROJECT_SOURCE_DIR}/highwayhash/state_helpers.h + + ${PROJECT_SOURCE_DIR}/highwayhash/arch_specific.h + ${PROJECT_SOURCE_DIR}/highwayhash/compiler_specific.h + ${PROJECT_SOURCE_DIR}/highwayhash/load3.h + ${PROJECT_SOURCE_DIR}/highwayhash/vector128.h + ${PROJECT_SOURCE_DIR}/highwayhash/vector256.h + ${PROJECT_SOURCE_DIR}/highwayhash/endianess.h + ${PROJECT_SOURCE_DIR}/highwayhash/iaca.h + ${PROJECT_SOURCE_DIR}/highwayhash/hh_types.h + ${PROJECT_SOURCE_DIR}/highwayhash/hh_buffer.h + + ${PROJECT_SOURCE_DIR}/highwayhash/scalar_sip_tree_hash.h + ${PROJECT_SOURCE_DIR}/highwayhash/sip_hash.h + ${PROJECT_SOURCE_DIR}/highwayhash/sip_tree_hash.h +) + +if(PROCESSOR_IS_ARM OR PROCESSOR_IS_AARCH64) + list(APPEND HH_SOURCES ${PROJECT_SOURCE_DIR}/highwayhash/hh_neon.cc) + list(APPEND HH_SOURCES ${PROJECT_SOURCE_DIR}/highwayhash/hh_neon.h) + +elseif(PROCESSOR_IS_POWER) + list(APPEND HH_SOURCES ${PROJECT_SOURCE_DIR}/highwayhash/hh_vsx.cc) + list(APPEND HH_SOURCES ${PROJECT_SOURCE_DIR}/highwayhash/hh_vsx.h) + + set_source_files_properties( + ${PROJECT_SOURCE_DIR}/highwayhash/benchmark.cc + PROPERTIES COMPILE_FLAGS -mvsx) + + set_source_files_properties( + ${PROJECT_SOURCE_DIR}/highwayhash/hh_vsx.cc + PROPERTIES COMPILE_FLAGS -mvsx) + +elseif(PROCESSOR_IS_X86) + list(APPEND HH_SOURCES ${PROJECT_SOURCE_DIR}/highwayhash/hh_avx2.cc) + list(APPEND HH_SOURCES ${PROJECT_SOURCE_DIR}/highwayhash/hh_sse41.cc) + list(APPEND HH_SOURCES ${PROJECT_SOURCE_DIR}/highwayhash/hh_avx2.h) + list(APPEND HH_SOURCES ${PROJECT_SOURCE_DIR}/highwayhash/hh_sse41.h) + + # TODO: Portability: Have AVX2 be optional so benchmarking can be done on older machines. + set_source_files_properties( + ${PROJECT_SOURCE_DIR}/highwayhash/benchmark.cc + PROPERTIES COMPILE_FLAGS -mavx2) + + set_source_files_properties( + ${PROJECT_SOURCE_DIR}/highwayhash/sip_tree_hash.cc + PROPERTIES COMPILE_FLAGS -mavx2) + + set_source_files_properties( + ${PROJECT_SOURCE_DIR}/highwayhash/hh_avx2.cc + PROPERTIES COMPILE_FLAGS -mavx2) + + set_source_files_properties( + ${PROJECT_SOURCE_DIR}/highwayhash/hh_sse41.cc + PROPERTIES COMPILE_FLAGS -msse4.1) + + set_source_files_properties( + ${PROJECT_SOURCE_DIR}/highwayhash/hh_portable.cc + PROPERTIES COMPILE_FLAGS -DHH_TARGET_NAME=Portable) + +else() + # Unknown architecture. + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHH_DISABLE_TARGET_SPECIFIC") +endif() + + +add_library(highwayhash ${HH_INCLUDES} ${HH_SOURCES}) +set_target_properties(highwayhash PROPERTIES PUBLIC_HEADER "${HH_INCLUDES}") + +target_include_directories(highwayhash + PUBLIC $ +) +target_include_directories(highwayhash + PUBLIC $ +) + +if(NOT WIN32 AND NOT ANDROID) + target_link_libraries(highwayhash pthread) +endif() + +add_library(highwayhash::highwayhash ALIAS highwayhash) + + +# +# Tests & Similar +# + +add_library(nanobenchmark OBJECT + ${PROJECT_SOURCE_DIR}/highwayhash/nanobenchmark.h + ${PROJECT_SOURCE_DIR}/highwayhash/nanobenchmark.cc + + ${PROJECT_SOURCE_DIR}/highwayhash/instruction_sets.h + ${PROJECT_SOURCE_DIR}/highwayhash/os_specific.h + ${PROJECT_SOURCE_DIR}/highwayhash/profiler.h + ${PROJECT_SOURCE_DIR}/highwayhash/tsc_timer.h + + ${PROJECT_SOURCE_DIR}/highwayhash/instruction_sets.cc + ${PROJECT_SOURCE_DIR}/highwayhash/os_specific.cc +) +target_include_directories(nanobenchmark PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + + +add_executable(highwayhash_test) +target_sources(highwayhash_test PRIVATE + + ${PROJECT_SOURCE_DIR}/highwayhash/highwayhash_test.cc + ${PROJECT_SOURCE_DIR}/highwayhash/highwayhash_test_portable.cc + ${PROJECT_SOURCE_DIR}/highwayhash/highwayhash_test_target.h +) +target_link_libraries(highwayhash_test highwayhash nanobenchmark) + + +add_executable(vector_test) +target_sources(vector_test PRIVATE + ${PROJECT_SOURCE_DIR}/highwayhash/vector_test.cc + ${PROJECT_SOURCE_DIR}/highwayhash/vector_test_portable.cc + ${PROJECT_SOURCE_DIR}/highwayhash/vector_test_target.h +) +target_link_libraries(vector_test highwayhash nanobenchmark) + + +if(PROCESSOR_IS_ARM OR PROCESSOR_IS_AARCH64) + target_sources(highwayhash_test PRIVATE + ${PROJECT_SOURCE_DIR}/highwayhash/highwayhash_test_neon.cc + ) + target_sources(vector_test PRIVATE + ${PROJECT_SOURCE_DIR}/highwayhash/vector_test_neon.cc + ) + +elseif(PROCESSOR_IS_X86) + target_sources(highwayhash_test PRIVATE + ${PROJECT_SOURCE_DIR}/highwayhash/highwayhash_test_avx2.cc + ${PROJECT_SOURCE_DIR}/highwayhash/highwayhash_test_sse41.cc + ) + target_sources(vector_test PRIVATE + ${PROJECT_SOURCE_DIR}/highwayhash/vector_test_avx2.cc + ${PROJECT_SOURCE_DIR}/highwayhash/vector_test_sse41.cc + ) + + set_source_files_properties( + ${PROJECT_SOURCE_DIR}/highwayhash/highwayhash_test_avx2.cc + PROPERTIES COMPILE_FLAGS -mavx2) + + set_source_files_properties( + ${PROJECT_SOURCE_DIR}/highwayhash/highwayhash_test_sse41.cc + PROPERTIES COMPILE_FLAGS -msse4.1) + + set_source_files_properties( + ${PROJECT_SOURCE_DIR}/highwayhash/vector_test_avx2.cc + PROPERTIES COMPILE_FLAGS -mavx2) + + set_source_files_properties( + ${PROJECT_SOURCE_DIR}/highwayhash/vector_test_sse41.cc + PROPERTIES COMPILE_FLAGS -msse4.1) + +elseif(PROCESSOR_IS_POWER) + target_sources(highwayhash_test PRIVATE + ${PROJECT_SOURCE_DIR}/highwayhash/highwayhash_test_vsx.cc + ) + + set_source_files_properties( + ${PROJECT_SOURCE_DIR}/highwayhash/highwayhash_test_vsx.cc + PROPERTIES COMPILE_FLAGS -mvsx) + + set_source_files_properties( + ${PROJECT_SOURCE_DIR}/highwayhash/vector_test.cc + PROPERTIES COMPILE_FLAGS -DHH_DISABLE_TARGET_SPECIFIC) + +endif() + + +add_executable(sip_hash_test) +target_sources(sip_hash_test PRIVATE + ${PROJECT_SOURCE_DIR}/highwayhash/sip_hash_test.cc +) +target_link_libraries(sip_hash_test highwayhash) + + +add_executable(example) +target_sources(example PRIVATE + ${PROJECT_SOURCE_DIR}/highwayhash/example.cc + ) +target_link_libraries(example highwayhash) + diff --git a/highwayhash/CONTRIBUTING b/highwayhash/CONTRIBUTING new file mode 100644 index 000000000..bd6072591 --- /dev/null +++ b/highwayhash/CONTRIBUTING @@ -0,0 +1,27 @@ +Want to contribute? Great! First, read this page (including the small print at the end). + +### Before you contribute +Before we can use your code, you must sign the +[Google Individual Contributor License Agreement] +(https://cla.developers.google.com/about/google-individual) +(CLA), which you can do online. The CLA is necessary mainly because you own the +copyright to your changes, even after your contribution becomes part of our +codebase, so we need your permission to use and distribute your code. We also +need to be sure of various other things-for instance that you'll tell us if you +know that your code infringes on other people's patents. You don't have to sign +the CLA until after you've submitted your code for review and a member has +approved it, but you must do it before we can put your code into our codebase. +Before you start working on a larger contribution, you should get in touch with +us first through the issue tracker with your idea so that we can help out and +possibly guide you. Coordinating up front makes it much easier to avoid +frustration later on. + +### Code reviews +All submissions, including submissions by project members, require review. We +use Github pull requests for this purpose. + +### The small print +Contributions made by corporations are covered by a different agreement than +the one above, the +[Software Grant and Corporate Contributor License Agreement] +(https://cla.developers.google.com/about/google-corporate). diff --git a/highwayhash/LICENSE b/highwayhash/LICENSE new file mode 100644 index 000000000..6b0b1270f --- /dev/null +++ b/highwayhash/LICENSE @@ -0,0 +1,203 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/highwayhash/Makefile b/highwayhash/Makefile new file mode 100644 index 000000000..a312bc263 --- /dev/null +++ b/highwayhash/Makefile @@ -0,0 +1,140 @@ +# We assume X64 unless HH_POWER, HH_ARM, or HH_AARCH64 are defined. + +override CPPFLAGS += -I. +override CXXFLAGS += -std=c++11 -Wall -O3 -fPIC -pthread +override LDFLAGS += -pthread + +PREFIX ?= /usr/local +INCDIR ?= $(PREFIX)/include +LIBDIR ?= $(PREFIX)/lib + +SIP_OBJS := $(addprefix obj/, \ + sip_hash.o \ + sip_tree_hash.o \ + scalar_sip_tree_hash.o \ +) + +DISPATCHER_OBJS := $(addprefix obj/, \ + arch_specific.o \ + instruction_sets.o \ + nanobenchmark.o \ + os_specific.o \ +) + +HIGHWAYHASH_OBJS := $(DISPATCHER_OBJS) obj/hh_portable.o +HIGHWAYHASH_TEST_OBJS := $(DISPATCHER_OBJS) obj/highwayhash_test_portable.o +VECTOR_TEST_OBJS := $(DISPATCHER_OBJS) obj/vector_test_portable.o + +# aarch64 and ARM use the same code, although ARM usually needs an extra flag for NEON. +ifdef HH_ARM +CXXFLAGS += -mfloat-abi=hard -march=armv7-a -mfpu=neon +HH_AARCH64 = 1 +endif + +ifdef HH_AARCH64 +HH_X64 = +HIGHWAYHASH_OBJS += obj/hh_neon.o +HIGHWAYHASH_TEST_OBJS += obj/highwayhash_test_neon.o +VECTOR_TEST_OBJS += obj/vector_test_neon.o +else +ifdef HH_POWER +HH_X64 = +HIGHWAYHASH_OBJS += obj/hh_vsx.o +HIGHWAYHASH_TEST_OBJS += obj/highwayhash_test_vsx.o +else +HH_X64 = 1 +HIGHWAYHASH_OBJS += obj/hh_avx2.o obj/hh_sse41.o +HIGHWAYHASH_TEST_OBJS += obj/highwayhash_test_avx2.o obj/highwayhash_test_sse41.o +VECTOR_TEST_OBJS += obj/vector_test_avx2.o obj/vector_test_sse41.o +endif +endif + +# In case highwayhash_test defines PRINT_RESULTS. +HIGHWAYHASH_TEST_OBJS += $(HIGHWAYHASH_OBJS) + +all: $(addprefix bin/, \ + profiler_example nanobenchmark_example vector_test sip_hash_test \ + highwayhash_test benchmark) lib/libhighwayhash.a + +obj/%.o: highwayhash/%.cc + @mkdir -p -- $(dir $@) + $(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $< -o $@ + +bin/%: obj/%.o + @mkdir -p -- $(dir $@) + $(CXX) $(LDFLAGS) $^ -o $@ + +.DELETE_ON_ERROR: +deps.mk: $(wildcard highwayhash/*.cc) $(wildcard highwayhash/*.h) Makefile + set -eu; for file in highwayhash/*.cc; do \ + target=obj/$${file##*/}; target=$${target%.*}.o; \ + [ "$$target" = "obj/highwayhash_target.o" ] || \ + [ "$$target" = "obj/data_parallel_benchmark.o" ] || \ + [ "$$target" = "obj/data_parallel_test.o" ] || \ + $(CXX) -c $(CPPFLAGS) $(CXXFLAGS) -DHH_DISABLE_TARGET_SPECIFIC -MM -MT \ + "$$target" "$$file"; \ + done | sed -e ':b' -e 's-../[^./]*/--' -e 'tb' >$@ +-include deps.mk + +bin/profiler_example: $(DISPATCHER_OBJS) + +bin/nanobenchmark_example: $(DISPATCHER_OBJS) obj/nanobenchmark.o + +ifdef HH_X64 +# TODO: Portability: Have AVX2 be optional so benchmarking can be done on older machines. +obj/sip_tree_hash.o: CXXFLAGS+=-mavx2 +# (Compiled from same source file with different compiler flags) +obj/highwayhash_test_avx2.o: CXXFLAGS+=-mavx2 +obj/highwayhash_test_sse41.o: CXXFLAGS+=-msse4.1 +obj/hh_avx2.o: CXXFLAGS+=-mavx2 +obj/hh_sse41.o: CXXFLAGS+=-msse4.1 +obj/vector_test_avx2.o: CXXFLAGS+=-mavx2 +obj/vector_test_sse41.o: CXXFLAGS+=-msse4.1 + +obj/benchmark.o: CXXFLAGS+=-mavx2 +endif + +ifdef HH_POWER +obj/highwayhash_test_vsx.o: CXXFLAGS+=-mvsx +obj/hh_vsx.o: CXXFLAGS+=-mvsx +obj/benchmark.o: CXXFLAGS+=-mvsx +# Skip file - vector library/test not supported on PPC +obj/vector_test_target.o: CXXFLAGS+=-DHH_DISABLE_TARGET_SPECIFIC +obj/vector_test.o: CXXFLAGS+=-DHH_DISABLE_TARGET_SPECIFIC +endif + +lib/libhighwayhash.a: $(SIP_OBJS) $(HIGHWAYHASH_OBJS) obj/c_bindings.o + @mkdir -p -- $(dir $@) + $(AR) rcs $@ $^ + +lib/libhighwayhash.so: $(SIP_OBJS) $(HIGHWAYHASH_OBJS) obj/c_bindings.o + @mkdir -p -- $(dir $@) + $(CXX) $(CXXFLAGS) $(LDFLAGS) -shared $^ -o $@.0 -Wl,-soname,libhighwayhash.so.0 + @cd $(dir $@); ln -s libhighwayhash.so.0 libhighwayhash.so + +bin/highwayhash_test: $(HIGHWAYHASH_TEST_OBJS) + +bin/benchmark: obj/benchmark.o $(HIGHWAYHASH_TEST_OBJS) +bin/benchmark: $(SIP_OBJS) $(HIGHWAYHASH_OBJS) +bin/vector_test: $(VECTOR_TEST_OBJS) + +clean: + [ ! -d obj ] || $(RM) -r -- obj/ + +distclean: clean + [ ! -d bin ] || $(RM) -r -- bin/ + [ ! -d lib ] || $(RM) -r -- lib/ + +# Mode bits are from issue #58, thanks to yurivict for suggesting. +# Also added owner-write for stripping the .so in post-install. +install: lib/libhighwayhash.a lib/libhighwayhash.so + mkdir -p $(DESTDIR)/$(LIBDIR) + mkdir -p $(DESTDIR)/$(INCDIR)/highwayhash + install -m0444 lib/libhighwayhash.a $(DESTDIR)/$(LIBDIR) + install -m0755 lib/libhighwayhash.so $(DESTDIR)/$(LIBDIR) + install -m0444 highwayhash/*.h $(DESTDIR)/$(INCDIR)/highwayhash/ + +post-install: + ${STRIP_CMD} $(DESTDIR)/$(LIBDIR)/libhighwayhash.so + +.PHONY: clean distclean all install post-install diff --git a/highwayhash/README.md b/highwayhash/README.md new file mode 100644 index 000000000..d59f7ab6d --- /dev/null +++ b/highwayhash/README.md @@ -0,0 +1,404 @@ +Strong (well-distributed and unpredictable) hashes: + +* Portable implementation of + [SipHash](https://www.131002.net/siphash/siphash.pdf) +* HighwayHash, a 5x faster SIMD hash with [security + claims](https://arxiv.org/abs/1612.06257) + +## Quick Start + +To build on a Linux or Mac platform, simply run `make`. For Windows, we provide +a Visual Studio 2015 project in the `msvc` subdirectory. + +Run `benchmark` for speed measurements. `sip_hash_test` and `highwayhash_test` +ensure the implementations return known-good values for a given set of inputs. + +64-bit SipHash for any CPU: + +``` + #include "highwayhash/sip_hash.h" + using namespace highwayhash; + HH_ALIGNAS(16) const HH_U64 key2[2] = {1234, 5678}; + char in[8] = {1}; + return SipHash(key2, in, 8); +``` + +64, 128 or 256 bit HighwayHash for the CPU determined by compiler flags: + +``` + #include "highwayhash/highwayhash.h" + using namespace highwayhash; + HH_ALIGNAS(32) const HHKey key = {1, 2, 3, 4}; + char in[8] = {1}; + HHResult64 result; // or HHResult128 or HHResult256 + HHStateT state(key); + HighwayHashT(&state, in, 8, &result); +``` + +64, 128 or 256 bit HighwayHash for the CPU on which we're currently running: + +``` + #include "highwayhash/highwayhash_target.h" + #include "highwayhash/instruction_sets.h" + using namespace highwayhash; + HH_ALIGNAS(32) const HHKey key = {1, 2, 3, 4}; + char in[8] = {1}; + HHResult64 result; // or HHResult128 or HHResult256 + InstructionSets::Run(key, in, 8, &result); +``` + +C-callable 64-bit HighwayHash for the CPU on which we're currently running: + + #include "highwayhash/c_bindings.h" + const uint64_t key[4] = {1, 2, 3, 4}; + char in[8] = {1}; + return HighwayHash64(key, in, 8); + +Printing a 256-bit result in a hexadecimal format similar to sha1sum: + + HHResult256 result; + printf("%016"PRIx64"%016"PRIx64"%016"PRIx64"%016"PRIx64"\n", + result[3], result[2], result[1], result[0]); + +## Introduction + +Hash functions are widely used, so it is desirable to increase their speed and +security. This package provides two 'strong' (well-distributed and +unpredictable) hash functions: a faster version of SipHash, and an even faster +algorithm we call HighwayHash. + +SipHash is a fast but 'cryptographically strong' pseudo-random function by +Aumasson and Bernstein [https://www.131002.net/siphash/siphash.pdf]. + +HighwayHash is a new way of mixing inputs which may inspire new +cryptographically strong hashes. Large inputs are processed at a rate of 0.24 +cycles per byte, and latency remains low even for small inputs. HighwayHash is +faster than SipHash for all input sizes, with 5 times higher throughput at 1 +KiB. We discuss design choices and provide statistical analysis and preliminary +cryptanalysis in https://arxiv.org/abs/1612.06257. + +## Applications + +Unlike prior strong hashes, these functions are fast enough to be recommended +as safer replacements for weak hashes in many applications. The additional CPU +cost appears affordable, based on profiling data indicating C++ hash functions +account for less than 0.25% of CPU usage. + +Hash-based selection of random subsets is useful for A/B experiments and similar +applications. Such random generators are idempotent (repeatable and +deterministic), which is helpful for parallel algorithms and testing. To avoid +bias, it is important that the hash function be unpredictable and +indistinguishable from a uniform random generator. We have verified the bit +distribution and avalanche properties of SipHash and HighwayHash. + +64-bit hashes are also useful for authenticating short-lived messages such as +network/RPC packets. This requires that the hash function withstand +differential, length extension and other attacks. We have published a formal +security analysis for HighwayHash. New cryptanalysis tools may still need to be +developed for further analysis. + +Strong hashes are also important parts of methods for protecting hash tables +against unacceptable worst-case behavior and denial of service attacks +(see "hash flooding" below). + +128 and 256-bit hashes can be useful for verifying data integrity (checksums). + +## SipHash + +Our SipHash implementation is a fast and portable drop-in replacement for +the reference C code. Outputs are identical for the given test cases (messages +between 0 and 63 bytes). + +Interestingly, it is about twice as fast as a SIMD implementation using SSE4.1 +(https://goo.gl/80GBSD). This is presumably due to the lack of SIMD bit rotate +instructions prior to AVX-512. + +SipHash13 is a faster but weaker variant with one mixing round per update and +three during finalization. + +We also provide a data-parallel 'tree hash' variant that enables efficient SIMD +while retaining safety guarantees. This is about twice as fast as SipHash, but +does not return the same results. + +## HighwayHash + +We have devised a new way of mixing inputs with SIMD multiply and permute +instructions. The multiplications are 32x32 -> 64 bits and therefore infeasible +to reverse. Permuting equalizes the distribution of the resulting bytes. + +The internal state is quite large (1024 bits) but fits within SIMD registers. +Due to limitations of the AVX2 instruction set, the registers are partitioned +into two 512-bit halves that remain independent until the reduce phase. The +algorithm outputs 64 bit digests or up to 256 bits at no extra cost. + +In addition to high throughput, the algorithm is designed for low finalization +cost. The result is more than twice as fast as SipTreeHash. + +We also provide an SSE4.1 version (80% as fast for large inputs and 95% as fast +for short inputs), an implementation for VSX on POWER and a portable version +(10% as fast). A third-party ARM implementation is referenced below. + +Statistical analyses and preliminary cryptanalysis are given in +https://arxiv.org/abs/1612.06257. + +## Versioning and stability + +Now that 21 months have elapsed since their initial release, we have declared +all (64/128/256 bit) variants of HighwayHash frozen, i.e. unchanging forever. + +SipHash and HighwayHash are 'fingerprint functions' whose input -> hash +mapping will not change. This is important for applications that write hashes to +persistent storage. + +## Speed measurements + +To measure the CPU cost of a hash function, we can either create an artificial +'microbenchmark' (easier to control, but probably not representative of the +actual runtime), or insert instrumentation directly into an application (risks +influencing the results through observer overhead). We provide novel variants of +both approaches that mitigate their respective disadvantages. + +profiler.h uses software write-combining to stream program traces to memory +with minimal overhead. These can be analyzed offline, or when memory is full, +to learn how much time was spent in each (possibly nested) zone. + +nanobenchmark.h enables cycle-accurate measurements of very short functions. +It uses CPU fences and robust statistics to minimize variability, and also +avoids unrealistic branch prediction effects. + +We compile the 64-bit C++ implementations with a patched GCC 4.9 and run on a +single idle core of a Xeon E5-2690 v3 clocked at 2.6 GHz. CPU cost is measured +as cycles per byte for various input sizes: + +Algorithm | 8 | 31 | 32 | 63 | 64 | 1024 +---------------- | ----- | ---- | ---- | ---- | ---- | ---- +HighwayHashAVX2 | 7.34 | 1.81 | 1.71 | 1.04 | 0.95 | 0.24 +HighwayHashSSE41 | 8.00 | 2.11 | 1.75 | 1.13 | 0.96 | 0.30 +SipTreeHash | 16.51 | 4.57 | 4.09 | 2.22 | 2.29 | 0.57 +SipTreeHash13 | 12.33 | 3.47 | 3.06 | 1.68 | 1.63 | 0.33 +SipHash | 8.13 | 2.58 | 2.73 | 1.87 | 1.93 | 1.26 +SipHash13 | 6.96 | 2.09 | 2.12 | 1.32 | 1.33 | 0.68 + +SipTreeHash is slower than SipHash for small inputs because it processes blocks +of 32 bytes. AVX2 and SSE4.1 HighwayHash are faster than SipHash for all input +sizes due to their highly optimized handling of partial vectors. + +Note that previous measurements included the initialization of their input, +which dramatically increased timings especially for small inputs. + +## CPU requirements + +SipTreeHash(13) requires an AVX2-capable CPU (e.g. Haswell). HighwayHash +includes a dispatcher that chooses the implementation (AVX2, SSE4.1, VSX or +portable) at runtime, as well as a directly callable function template that can +only run on the CPU for which it was built. SipHash(13) and +ScalarSipTreeHash(13) have no particular CPU requirements. + +### AVX2 vs SSE4 + +When both AVX2 and SSE4 are available, the decision whether to use AVX2 is +non-obvious. AVX2 vectors are twice as wide, but require a higher power license +(integer multiplications count as 'heavy' instructions) and can thus reduce the +clock frequency of the core or entire socket(!) on Haswell systems. This +partially explains the observed 1.25x (not 2x) speedup over SSE4. Moreover, it +is inadvisable to only sporadically use AVX2 instructions because there is also +a ~56K cycle warmup period during which AVX2 operations are slower, and Haswell +can even stall during this period. Thus, we recommend avoiding AVX2 for +infrequent hashing if the rest of the application is also not using AVX2. For +any input larger than 1 MiB, it is probably worthwhile to enable AVX2. + +### SIMD implementations + +Our x86 implementations use custom vector classes with overloaded operators +(e.g. `const V4x64U a = b + c`) for type-safety and improved readability vs. +compiler intrinsics (e.g. `const __m256i a = _mm256_add_epi64(b, c)`). +The VSX implementation uses built-in vector types alongside Altivec intrinsics. +A high-performance third-party ARM implementation is mentioned below. + +### Dispatch + +Our instruction_sets dispatcher avoids running newer instructions on older CPUs +that do not support them. However, intrinsics, and therefore also any vector +classes that use them, require (on GCC < 4.9 or Clang < 3.9) a compiler flag +that also allows the compiler to generate code for that CPU. This means the +intrinsics must be placed in separate translation units that are compiled with +the required flags. It is important that these source files and their headers +not define any inline functions, because that might break the one definition +rule and cause crashes. + +To minimize dispatch overhead when hashes are computed often (e.g. in a loop), +we can inline the hash function into its caller using templates. The dispatch +overhead will only be paid once (e.g. before the loop). The template mechanism +also avoids duplicating code in each CPU-specific implementation. + +## Defending against hash flooding + +To mitigate hash flooding attacks, we need to take both the hash function and +the data structure into account. + +We wish to defend (web) services that utilize hash sets/maps against +denial-of-service attacks. Such data structures assign attacker-controlled +input messages `m` to a hash table bin `b` by computing the hash `H(s, m)` +using a hash function `H` seeded by `s`, and mapping it to a bin with some +narrowing function `b = R(h)`, discussed below. + +Attackers may attempt to trigger 'flooding' (excessive work in insertions or +lookups) by finding multiple `m` that map to the same bin. If the attacker has +local access, they can do far worse, so we assume the attacker can only issue +remote requests. If the attacker is able to send large numbers of requests, +they can already deny service, so we need only ensure the attacker's cost is +sufficiently large compared to the service's provisioning. + +If the hash function is 'weak', attackers can easily generate 'hash collisions' +(inputs mapping to the same hash values) that are independent of the seed. In +other words, certain input messages will cause collisions regardless of the seed +value. The author of SipHash has published C++ programs to generate such +'universal (key-independent) multicollisions' for CityHash and Murmur. Similar +'differential' attacks are likely possible for any hash function consisting only +of reversible operations (e.g. addition/multiplication/rotation) with a constant +operand. `n` requests with such inputs cause `n^2` work for an unprotected hash +table, which is unacceptable. + +By contrast, 'strong' hashes such as SipHash or HighwayHash require infeasible +attacker effort to find a hash collision (an expected 2^32 guesses of `m` per +the birthday paradox) or recover the seed (2^63 requests). These security claims +assume the seed is secret. It is reasonable to suppose `s` is initially unknown +to attackers, e.g. generated on startup or even per-connection. A timing attack +by Wool/Bar-Yosef recovers 13-bit seeds by testing all 8K possibilities using +millions of requests, which takes several days (even assuming unrealistic 150 us +round-trip times). It appears infeasible to recover 64-bit seeds in this way. + +However, attackers are only looking for multiple `m` mapping to the same bin +rather than identical hash values. We assume they know or are able to discover +the hash table size `p`. It is common to choose `p = 2^i` to enable an efficient +`R(h) := h & (p - 1)`, which simply retains the lower hash bits. It may be +easier for attackers to compute partial collisions where only the lower `i` bits +match. This can be prevented by choosing a prime `p` so that `R(h) := h % p` +incorporates all hash bits. The costly modulo operation can be avoided by +multiplying with the inverse (https://goo.gl/l7ASm8). An interesting alternative +suggested by Kyoung Jae Seo chooses a random subset of the `h` bits. Such an `R` +function can be computed in just 3 cycles using PEXT from the BMI2 instruction +set. This is expected to defend against SAT-solver attacks on the hash bits at a +slightly lower cost than the multiplicative inverse method, and still allows +power-of-two table sizes. + +Summary thus far: given a strong hash function and secret seed, it appears +infeasible for attackers to generate hash collisions because `s` and/or `R` are +unknown. However, they can still observe the timings of data structure +operations for various `m`. With typical table sizes of 2^10 to 2^17 entries, +attackers can detect some 'bin collisions' (inputs mapping to the same bin). +Although this will be costly for the attacker, they can then send many instances +of such inputs, so we need to limit the resulting work for our data structure. + +Hash tables with separate chaining typically store bin entries in a linked list, +so worst-case inputs lead to unacceptable linear-time lookup cost. We instead +seek optimal asymptotic worst-case complexity for each operation (insertion, +deletion and lookups), which is a constant factor times the logarithm of the +data structure size. This naturally leads to a tree-like data structure for each +bin. The Java8 HashMap only replaces its linked list with trees when needed. +This leads to additional cost and complexity for deciding whether a bin is a +list or tree. + +Our first proposal (suggested by Github user funny-falcon) avoids this overhead +by always storing one tree per bin. It may also be worthwhile to store the first +entry directly in the bin, which avoids allocating any tree nodes in the common +case where bins are sparsely populated. What kind of tree should be used? + +Given SipHash and HighwayHash provide high quality randomness, depending on +expecting attack surface simple non-balancing binary search tree could perform +reasonably well. [Wikipedia says](https://en.wikipedia.org/wiki/Binary_search_tree#Definition) +> After a long intermixed sequence of random insertion and deletion, the +> expected height of the tree approaches square root of the number of keys, √n, +> which grows much faster than log n. + +While `O(√n)` is much larger than `O(log n)`, it is still much smaller than `O(n)`. +And it will certainly complicate the timing attack, since the time of operation +on collisioned bin will grow slower. + +If stronger safety guarantees are needed, then a balanced tree should be used. +Scapegoat and splay trees only offer amortized complexity guarantees, whereas +treaps require an entropy source and have higher constant factors in practice. +Self-balancing structures such as 2-3 or red-black trees require additional +bookkeeping information. We can hope to reduce rebalancing cost by realizing +that the output bits of strong `H` functions are uniformly distributed. When +using them as keys instead of the original message `m`, recent relaxed balancing +schemes such as left-leaning red-black or weak AVL trees may require fewer tree +rotations to maintain their invariants. Note that `H` already determines the +bin, so we should only use the remaining bits. 64-bit hashes are likely +sufficient for this purpose, and HighwayHash generates up to 256 bits. It seems +unlikely that attackers can craft inputs resulting in worst cases for both the +bin index and tree key without being able to generate hash collisions, which +would contradict the security claims of strong hashes. Even if they succeed, the +relaxed tree balancing still guarantees an upper bound on height and therefore +the worst-case operation cost. For the AVL variant, the constant factors are +slightly lower than for red-black trees. + +The second proposed approach uses augmented/de-amortized cuckoo hash tables +(https://goo.gl/PFwwkx). These guarantee worst-case `log n` bounds for all +operations, but only if the hash function is 'indistinguishable from random' +(uniformly distributed regardless of the input distribution), which is claimed +for SipHash and HighwayHash but certainly not for weak hashes. + +Both alternatives retain good average case performance and defend against +flooding by limiting the amount of extra work an attacker can cause. The first +approach guarantees an upper bound of `log n` additional work even if the hash +function is compromised. + +In summary, a strong hash function is not, by itself, sufficient to protect a +chained hash table from flooding attacks. However, strong hash functions are +important parts of two schemes for preventing denial of service. Using weak hash +functions can slightly accelerate the best-case and average-case performance of +a service, but at the risk of greatly reduced attack costs and worst-case +performance. + +## Third-party implementations / bindings + +Thanks to Damian Gryski and Frank Wessels for making us aware of these +third-party implementations or bindings. Please feel free to get in touch or +raise an issue and we'll add yours as well. + +By | Language | URL +--- | --- | --- +Damian Gryski | Go and x64 assembly | https://github.com/dgryski/go-highway/ +Simon Abdullah | NPM package | https://www.npmjs.com/package/highwayhash-nodejs +Lovell Fuller | node.js bindings | https://github.com/lovell/highwayhash +Andreas Sonnleitner | [WebAssembly](https://github.com/asonnleitner/highwayhash-wasm) and NPM package | https://www.npmjs.com/package/highwayhash-wasm +Nick Babcock | Rust port | https://github.com/nickbabcock/highway-rs +Caleb Zulawski | Rust portable SIMD | https://github.com/calebzulawski/autobahn-hash +Vinzent Steinberg | Rust bindings | https://github.com/vks/highwayhash-rs +Frank Wessels & Andreas Auernhammer | Go and ARM assembly | https://github.com/minio/highwayhash +Phil Demetriou | Python 3 bindings | https://github.com/kpdemetriou/highwayhash-cffi +Jonathan Beard | C++20 constexpr | https://gist.github.com/jonathan-beard/632017faa1d9d1936eb5948ac9186657 +James Cook | Ruby bindings | https://github.com/jamescook/highwayhash + +## Modules + +### Hashes + +* c_bindings.h declares C-callable versions of SipHash/HighwayHash. +* sip_hash.cc is the compatible implementation of SipHash, and also provides + the final reduction for sip_tree_hash. +* sip_tree_hash.cc is the faster but incompatible SIMD j-lanes tree hash. +* scalar_sip_tree_hash.cc is a non-SIMD version. +* state_helpers.h simplifies the implementation of the SipHash variants. +* highwayhash.h is our new, fast hash function. +* hh_{avx2,sse41,vsx,portable}.h are its various implementations. +* highwayhash_target.h chooses the best available implementation at runtime. + +### Infrastructure + +* arch_specific.h offers byte swapping and CPUID detection. +* compiler_specific.h defines some compiler-dependent language extensions. +* data_parallel.h provides a C++11 ThreadPool and PerThread (similar to + OpenMP). +* instruction_sets.h and targets.h enable efficient CPU-specific dispatching. +* nanobenchmark.h measures elapsed times with < 1 cycle variability. +* os_specific.h sets thread affinity and priority for benchmarking. +* profiler.h is a low-overhead, deterministic hierarchical profiler. +* tsc_timer.h obtains high-resolution timestamps without CPU reordering. +* vector256.h and vector128.h contain wrapper classes for AVX2 and SSE4.1. + +By Jan Wassenberg and Jyrki Alakuijala +, updated 2023-03-29 + +This is not an official Google product. diff --git a/highwayhash/c/highwayhash.c b/highwayhash/c/highwayhash.c new file mode 100644 index 000000000..bf4863ecd --- /dev/null +++ b/highwayhash/c/highwayhash.c @@ -0,0 +1,261 @@ +#include "c/highwayhash.h" + +#include +#include +#include + +/* +This code is compatible with C90 with the additional requirement of +supporting uint64_t. +*/ + +/*////////////////////////////////////////////////////////////////////////////*/ +/* Internal implementation */ +/*////////////////////////////////////////////////////////////////////////////*/ + +void HighwayHashReset(const uint64_t key[4], HighwayHashState* state) { + state->mul0[0] = 0xdbe6d5d5fe4cce2full; + state->mul0[1] = 0xa4093822299f31d0ull; + state->mul0[2] = 0x13198a2e03707344ull; + state->mul0[3] = 0x243f6a8885a308d3ull; + state->mul1[0] = 0x3bd39e10cb0ef593ull; + state->mul1[1] = 0xc0acf169b5f18a8cull; + state->mul1[2] = 0xbe5466cf34e90c6cull; + state->mul1[3] = 0x452821e638d01377ull; + state->v0[0] = state->mul0[0] ^ key[0]; + state->v0[1] = state->mul0[1] ^ key[1]; + state->v0[2] = state->mul0[2] ^ key[2]; + state->v0[3] = state->mul0[3] ^ key[3]; + state->v1[0] = state->mul1[0] ^ ((key[0] >> 32) | (key[0] << 32)); + state->v1[1] = state->mul1[1] ^ ((key[1] >> 32) | (key[1] << 32)); + state->v1[2] = state->mul1[2] ^ ((key[2] >> 32) | (key[2] << 32)); + state->v1[3] = state->mul1[3] ^ ((key[3] >> 32) | (key[3] << 32)); +} + +static void ZipperMergeAndAdd(const uint64_t v1, const uint64_t v0, + uint64_t* add1, uint64_t* add0) { + *add0 += (((v0 & 0xff000000ull) | (v1 & 0xff00000000ull)) >> 24) | + (((v0 & 0xff0000000000ull) | (v1 & 0xff000000000000ull)) >> 16) | + (v0 & 0xff0000ull) | ((v0 & 0xff00ull) << 32) | + ((v1 & 0xff00000000000000ull) >> 8) | (v0 << 56); + *add1 += (((v1 & 0xff000000ull) | (v0 & 0xff00000000ull)) >> 24) | + (v1 & 0xff0000ull) | ((v1 & 0xff0000000000ull) >> 16) | + ((v1 & 0xff00ull) << 24) | ((v0 & 0xff000000000000ull) >> 8) | + ((v1 & 0xffull) << 48) | (v0 & 0xff00000000000000ull); +} + +static void Update(const uint64_t lanes[4], HighwayHashState* state) { + int i; + for (i = 0; i < 4; ++i) { + state->v1[i] += state->mul0[i] + lanes[i]; + state->mul0[i] ^= (state->v1[i] & 0xffffffff) * (state->v0[i] >> 32); + state->v0[i] += state->mul1[i]; + state->mul1[i] ^= (state->v0[i] & 0xffffffff) * (state->v1[i] >> 32); + } + ZipperMergeAndAdd(state->v1[1], state->v1[0], &state->v0[1], &state->v0[0]); + ZipperMergeAndAdd(state->v1[3], state->v1[2], &state->v0[3], &state->v0[2]); + ZipperMergeAndAdd(state->v0[1], state->v0[0], &state->v1[1], &state->v1[0]); + ZipperMergeAndAdd(state->v0[3], state->v0[2], &state->v1[3], &state->v1[2]); +} + +static uint64_t Read64(const uint8_t* src) { + return (uint64_t)src[0] | ((uint64_t)src[1] << 8) | + ((uint64_t)src[2] << 16) | ((uint64_t)src[3] << 24) | + ((uint64_t)src[4] << 32) | ((uint64_t)src[5] << 40) | + ((uint64_t)src[6] << 48) | ((uint64_t)src[7] << 56); +} + +void HighwayHashUpdatePacket(const uint8_t* packet, HighwayHashState* state) { + uint64_t lanes[4]; + lanes[0] = Read64(packet + 0); + lanes[1] = Read64(packet + 8); + lanes[2] = Read64(packet + 16); + lanes[3] = Read64(packet + 24); + Update(lanes, state); +} + +static void Rotate32By(uint64_t count, uint64_t lanes[4]) { + int i; + for (i = 0; i < 4; ++i) { + uint32_t half0 = lanes[i] & 0xffffffff; + uint32_t half1 = (lanes[i] >> 32); + lanes[i] = (half0 << count) | (half0 >> (32 - count)); + lanes[i] |= (uint64_t)((half1 << count) | (half1 >> (32 - count))) << 32; + } +} + +void HighwayHashUpdateRemainder(const uint8_t* bytes, const size_t size_mod32, + HighwayHashState* state) { + int i; + const size_t size_mod4 = size_mod32 & 3; + const uint8_t* remainder = bytes + (size_mod32 & ~3); + uint8_t packet[32] = {0}; + for (i = 0; i < 4; ++i) { + state->v0[i] += ((uint64_t)size_mod32 << 32) + size_mod32; + } + Rotate32By(size_mod32, state->v1); + for (i = 0; i < remainder - bytes; i++) { + packet[i] = bytes[i]; + } + if (size_mod32 & 16) { + for (i = 0; i < 4; i++) { + packet[28 + i] = remainder[i + size_mod4 - 4]; + } + } else { + if (size_mod4) { + packet[16 + 0] = remainder[0]; + packet[16 + 1] = remainder[size_mod4 >> 1]; + packet[16 + 2] = remainder[size_mod4 - 1]; + } + } + HighwayHashUpdatePacket(packet, state); +} + +static void Permute(const uint64_t v[4], uint64_t* permuted) { + permuted[0] = (v[2] >> 32) | (v[2] << 32); + permuted[1] = (v[3] >> 32) | (v[3] << 32); + permuted[2] = (v[0] >> 32) | (v[0] << 32); + permuted[3] = (v[1] >> 32) | (v[1] << 32); +} + +void PermuteAndUpdate(HighwayHashState* state) { + uint64_t permuted[4]; + Permute(state->v0, permuted); + Update(permuted, state); +} + +static void ModularReduction(uint64_t a3_unmasked, uint64_t a2, uint64_t a1, + uint64_t a0, uint64_t* m1, uint64_t* m0) { + uint64_t a3 = a3_unmasked & 0x3FFFFFFFFFFFFFFFull; + *m1 = a1 ^ ((a3 << 1) | (a2 >> 63)) ^ ((a3 << 2) | (a2 >> 62)); + *m0 = a0 ^ (a2 << 1) ^ (a2 << 2); +} + +static uint64_t HighwayHashFinalize64(HighwayHashState* state) { + int i; + for (i = 0; i < 4; i++) { + PermuteAndUpdate(state); + } + return state->v0[0] + state->v1[0] + state->mul0[0] + state->mul1[0]; +} + +static void HighwayHashFinalize128(HighwayHashState* state, uint64_t hash[2]) { + int i; + for (i = 0; i < 6; i++) { + PermuteAndUpdate(state); + } + hash[0] = state->v0[0] + state->mul0[0] + state->v1[2] + state->mul1[2]; + hash[1] = state->v0[1] + state->mul0[1] + state->v1[3] + state->mul1[3]; +} + +static void HighwayHashFinalize256(HighwayHashState* state, uint64_t hash[4]) { + int i; + /* We anticipate that 256-bit hashing will be mostly used with long messages + because storing and using the 256-bit hash (in contrast to 128-bit) + carries a larger additional constant cost by itself. Doing extra rounds + here hardly increases the per-byte cost of long messages. */ + for (i = 0; i < 10; i++) { + PermuteAndUpdate(state); + } + ModularReduction(state->v1[1] + state->mul1[1], state->v1[0] + state->mul1[0], + state->v0[1] + state->mul0[1], state->v0[0] + state->mul0[0], + &hash[1], &hash[0]); + ModularReduction(state->v1[3] + state->mul1[3], state->v1[2] + state->mul1[2], + state->v0[3] + state->mul0[3], state->v0[2] + state->mul0[2], + &hash[3], &hash[2]); +} + +/*////////////////////////////////////////////////////////////////////////////*/ +/* Non-cat API: single call on full data */ +/*////////////////////////////////////////////////////////////////////////////*/ + +static void ProcessAll(const uint8_t* data, size_t size, const uint64_t key[4], + HighwayHashState* state) { + size_t i; + HighwayHashReset(key, state); + for (i = 0; i + 32 <= size; i += 32) { + HighwayHashUpdatePacket(data + i, state); + } + if ((size & 31) != 0) HighwayHashUpdateRemainder(data + i, size & 31, state); +} + +uint64_t HighwayHash64(const uint8_t* data, size_t size, + const uint64_t key[4]) { + HighwayHashState state; + ProcessAll(data, size, key, &state); + return HighwayHashFinalize64(&state); +} + +void HighwayHash128(const uint8_t* data, size_t size, + const uint64_t key[4], uint64_t hash[2]) { + HighwayHashState state; + ProcessAll(data, size, key, &state); + HighwayHashFinalize128(&state, hash); +} + +void HighwayHash256(const uint8_t* data, size_t size, + const uint64_t key[4], uint64_t hash[4]) { + HighwayHashState state; + ProcessAll(data, size, key, &state); + HighwayHashFinalize256(&state, hash); +} + +/*////////////////////////////////////////////////////////////////////////////*/ +/* Cat API: allows appending with multiple calls */ +/*////////////////////////////////////////////////////////////////////////////*/ + +void HighwayHashCatStart(const uint64_t key[4], HighwayHashCat* state) { + HighwayHashReset(key, &state->state); + state->num = 0; +} + +void HighwayHashCatAppend(const uint8_t* bytes, size_t num, + HighwayHashCat* state) { + size_t i; + if (state->num != 0) { + size_t num_add = num > (32u - state->num) ? (32u - state->num) : num; + for (i = 0; i < num_add; i++) { + state->packet[state->num + i] = bytes[i]; + } + state->num += num_add; + num -= num_add; + bytes += num_add; + if (state->num == 32) { + HighwayHashUpdatePacket(state->packet, &state->state); + state->num = 0; + } + } + while (num >= 32) { + HighwayHashUpdatePacket(bytes, &state->state); + num -= 32; + bytes += 32; + } + for (i = 0; i < num; i++) { + state->packet[state->num] = bytes[i]; + state->num++; + } +} + +uint64_t HighwayHashCatFinish64(const HighwayHashCat* state) { + HighwayHashState copy = state->state; + if (state->num) { + HighwayHashUpdateRemainder(state->packet, state->num, ©); + } + return HighwayHashFinalize64(©); +} + +void HighwayHashCatFinish128(const HighwayHashCat* state, uint64_t hash[2]) { + HighwayHashState copy = state->state; + if (state->num) { + HighwayHashUpdateRemainder(state->packet, state->num, ©); + } + HighwayHashFinalize128(©, hash); +} + +void HighwayHashCatFinish256(const HighwayHashCat* state, uint64_t hash[4]) { + HighwayHashState copy = state->state; + if (state->num) { + HighwayHashUpdateRemainder(state->packet, state->num, ©); + } + HighwayHashFinalize256(©, hash); +} diff --git a/highwayhash/c/highwayhash.h b/highwayhash/c/highwayhash.h new file mode 100644 index 000000000..10c877fdc --- /dev/null +++ b/highwayhash/c/highwayhash.h @@ -0,0 +1,100 @@ +#ifndef C_HIGHWAYHASH_H_ +#define C_HIGHWAYHASH_H_ + +#include +#include + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +/*////////////////////////////////////////////////////////////////////////////*/ +/* Low-level API, use for implementing streams etc... */ +/*////////////////////////////////////////////////////////////////////////////*/ + +typedef struct { + uint64_t v0[4]; + uint64_t v1[4]; + uint64_t mul0[4]; + uint64_t mul1[4]; +} HighwayHashState; + +/* Initializes state with given key */ +static void HighwayHashReset(const uint64_t key[4], HighwayHashState* state); +/* Takes a packet of 32 bytes */ +void HighwayHashUpdatePacket(const uint8_t* packet, HighwayHashState* state); +/* Adds the final 1..31 bytes, do not use if 0 remain */ +void HighwayHashUpdateRemainder(const uint8_t* bytes, const size_t size_mod32, + HighwayHashState* state); +/* Compute final hash value. Makes state invalid. */ +static uint64_t HighwayHashFinalize64(HighwayHashState* state); +static void HighwayHashFinalize128(HighwayHashState* state, uint64_t hash[2]); +static void HighwayHashFinalize256(HighwayHashState* state, uint64_t hash[4]); + +/*////////////////////////////////////////////////////////////////////////////*/ +/* Non-cat API: single call on full data */ +/*////////////////////////////////////////////////////////////////////////////*/ + +uint64_t HighwayHash64(const uint8_t* data, size_t size, const uint64_t key[4]); + +void HighwayHash128(const uint8_t* data, size_t size, + const uint64_t key[4], uint64_t hash[2]); + +void HighwayHash256(const uint8_t* data, size_t size, + const uint64_t key[4], uint64_t hash[4]); + +/*////////////////////////////////////////////////////////////////////////////*/ +/* Cat API: allows appending with multiple calls */ +/*////////////////////////////////////////////////////////////////////////////*/ + +typedef struct { + HighwayHashState state; + uint8_t packet[32]; + int num; +} HighwayHashCat; + +/* Allocates new state for a new streaming hash computation */ +void HighwayHashCatStart(const uint64_t key[4], HighwayHashCat* state); + +void HighwayHashCatAppend(const uint8_t* bytes, size_t num, + HighwayHashCat* state); + +/* Computes final hash value */ +uint64_t HighwayHashCatFinish64(const HighwayHashCat* state); +void HighwayHashCatFinish128(const HighwayHashCat* state, uint64_t hash[2]); +void HighwayHashCatFinish256(const HighwayHashCat* state, uint64_t hash[4]); + +/* +Usage examples: + +#include +#include + +void Example64() { + uint64_t key[4] = {1, 2, 3, 4}; + const char* text = "Hello world!"; + size_t size = strlen(text); + uint64_t hash = HighwayHash64((const uint8_t*)text, size, key); + printf("%016"PRIx64"\n", hash); +} + +void Example64Cat() { + uint64_t key[4] = {1, 2, 3, 4}; + HighwayHashCat state; + uint64_t hash; + + HighwayHashCatStart(key, &state); + + HighwayHashCatAppend((const uint8_t*)"Hello", 5, &state); + HighwayHashCatAppend((const uint8_t*)" world!", 7, &state); + + hash = HighwayHashCatFinish64(&state); + printf("%016"PRIx64"\n", hash); +} +*/ + +#if defined(__cplusplus) || defined(c_plusplus) +} /* extern "C" */ +#endif + +#endif // C_HIGHWAYHASH_H_ diff --git a/highwayhash/c/highwayhash_test.c b/highwayhash/c/highwayhash_test.c new file mode 100644 index 000000000..9f9ee3367 --- /dev/null +++ b/highwayhash/c/highwayhash_test.c @@ -0,0 +1,70 @@ +#include "c/highwayhash.h" + +#include +#include +#include + +#define kMaxSize 64 + +static const uint64_t kTestKey1[4] = { + 0x0706050403020100ull, 0x0F0E0D0C0B0A0908ull, + 0x1716151413121110ull, 0x1F1E1D1C1B1A1918ull +}; + +static const uint64_t kTestKey2[4] = { + 1ull, 2ull, 3ull, 4ull +}; + +const uint64_t kExpected64[kMaxSize + 1] = { + 0x907A56DE22C26E53ull, 0x7EAB43AAC7CDDD78ull, 0xB8D0569AB0B53D62ull, + 0x5C6BEFAB8A463D80ull, 0xF205A46893007EDAull, 0x2B8A1668E4A94541ull, + 0xBD4CCC325BEFCA6Full, 0x4D02AE1738F59482ull, 0xE1205108E55F3171ull, + 0x32D2644EC77A1584ull, 0xF6E10ACDB103A90Bull, 0xC3BBF4615B415C15ull, + 0x243CC2040063FA9Cull, 0xA89A58CE65E641FFull, 0x24B031A348455A23ull, + 0x40793F86A449F33Bull, 0xCFAB3489F97EB832ull, 0x19FE67D2C8C5C0E2ull, + 0x04DD90A69C565CC2ull, 0x75D9518E2371C504ull, 0x38AD9B1141D3DD16ull, + 0x0264432CCD8A70E0ull, 0xA9DB5A6288683390ull, 0xD7B05492003F028Cull, + 0x205F615AEA59E51Eull, 0xEEE0C89621052884ull, 0x1BFC1A93A7284F4Full, + 0x512175B5B70DA91Dull, 0xF71F8976A0A2C639ull, 0xAE093FEF1F84E3E7ull, + 0x22CA92B01161860Full, 0x9FC7007CCF035A68ull, 0xA0C964D9ECD580FCull, + 0x2C90F73CA03181FCull, 0x185CF84E5691EB9Eull, 0x4FC1F5EF2752AA9Bull, + 0xF5B7391A5E0A33EBull, 0xB9B84B83B4E96C9Cull, 0x5E42FE712A5CD9B4ull, + 0xA150F2F90C3F97DCull, 0x7FA522D75E2D637Dull, 0x181AD0CC0DFFD32Bull, + 0x3889ED981E854028ull, 0xFB4297E8C586EE2Dull, 0x6D064A45BB28059Cull, + 0x90563609B3EC860Cull, 0x7AA4FCE94097C666ull, 0x1326BAC06B911E08ull, + 0xB926168D2B154F34ull, 0x9919848945B1948Dull, 0xA2A98FC534825EBEull, + 0xE9809095213EF0B6ull, 0x582E5483707BC0E9ull, 0x086E9414A88A6AF5ull, + 0xEE86B98D20F6743Dull, 0xF89B7FF609B1C0A7ull, 0x4C7D9CC19E22C3E8ull, + 0x9A97005024562A6Full, 0x5DD41CF423E6EBEFull, 0xDF13609C0468E227ull, + 0x6E0DA4F64188155Aull, 0xB755BA4B50D7D4A1ull, 0x887A3484647479BDull, + 0xAB8EEBE9BF2139A0ull, 0x75542C5D4CD2A6FFull}; + +void TestHash64(uint64_t expected, const uint8_t* data, size_t size, + const uint64_t* key) { + uint64_t hash = HighwayHash64(data, size, key); + if (expected != hash) { + printf("Test failed: expected %016"PRIx64", got %016"PRIx64", size: %d\n", + expected, hash, (int) size); + exit(1); + } +} + +int main() { + uint8_t data[kMaxSize + 1] = {0}; + int i; + for (i = 0; i <= kMaxSize; i++) { + data[i] = i; + TestHash64(kExpected64[i], data, i, kTestKey1); + } + + for (i = 0; i < 33; i++) { + data[i] = 128 + i; + } + TestHash64(0x53c516cce478cad7ull, data, 33, kTestKey2); + + /* 128-bit and 256-bit tests to be added when they are declared frozen in the + C++ version */ + + printf("Test success\n"); + return 0; +} diff --git a/highwayhash/google3/third_party/highwayhash/WORKSPACE b/highwayhash/google3/third_party/highwayhash/WORKSPACE new file mode 100644 index 000000000..cca464c25 --- /dev/null +++ b/highwayhash/google3/third_party/highwayhash/WORKSPACE @@ -0,0 +1 @@ +workspace(name = "highwayhash") diff --git a/highwayhash/highwayhash.3 b/highwayhash/highwayhash.3 new file mode 100644 index 000000000..54f3d1d93 --- /dev/null +++ b/highwayhash/highwayhash.3 @@ -0,0 +1,107 @@ +.TH highwayhash 3 "April 25, 2017" + +.SH NAME +highwayhash \- fast strong 64-bit hash functions + +.SH SYNOPSIS + +.B #include /* C */ + + uint64_t SipHashC(const uint64_t* key, const char* bytes, const uint64_t size); + + uint64_t SipHash13C(const uint64_t* key, const char* bytes, const uint64_t size); + + uint64_t HighwayHash64(const HHKey key, const char* bytes, const uint64_t size); + +.B #include /* C++ */ + + using namespace highwayhash; + + void HighwayHashT(State* HH_RESTRICT state, + const char* HH_RESTRICT bytes, const size_t size, + Result* HH_RESTRICT hash); + +.B #include /* C++ */ + + using namespace highwayhash; + + HH_U64 SipHash(const SipHashState::Key& key, const char* bytes,const HH_U64 size); + +Link with +.I +-lhighwayhash + +.SH DESCRIPTION + +Hash functions are widely used, so it is desirable to increase their speed and +security. This package provides two 'strong' (well-distributed and +unpredictable) hash functions: a faster version of SipHash, and an even faster +algorithm we call HighwayHash. + +SipHash is a fast but 'cryptographically strong' pseudo-random function by +Aumasson and Bernstein [https://www.131002.net/siphash/siphash.pdf]. + +HighwayHash is a new way of mixing inputs which may inspire new +cryptographically strong hashes. Large inputs are processed at a rate of 0.24 +cycles per byte, and latency remains low even for small inputs. HighwayHash is +faster than SipHash for all input sizes, with 5 times higher throughput at 1 +KiB. We discuss design choices and provide statistical analysis and preliminary +cryptanalysis in https://arxiv.org/abs/1612.06257. + +.I +Note, SipHash wants an uint64_t[2] key while HighwayHash uint64_t[4] . + +.SH EXAMPLES + +64-bit SipHash for any CPU: + + #include "highwayhash/sip_hash.h" + using namespace highwayhash; + HH_ALIGNAS(16) const HH_U64 key2[2] = {1234, 5678}; + char in[8] = {1}; + return SipHash(key2, in, 8); + +64, 128 or 256 bit HighwayHash for the CPU determined by compiler flags: + + #include "highwayhash/highwayhash.h" + using namespace highwayhash; + HH_ALIGNAS(32) const HHKey key = {1, 2, 3, 4}; + char in[8] = {1}; + HHResult64 result; // or HHResult128 or HHResult256 + HHStateT state(key); + HighwayHashT(&state, in, 8, &result); + +64, 128 or 256 bit HighwayHash for the CPU on which we're currently running: + + #include "highwayhash/highwayhash_target.h" + #include "highwayhash/instruction_sets.h" + using namespace highwayhash; + HH_ALIGNAS(32) const HHKey key = {1, 2, 3, 4}; + char in[8] = {1}; + HHResult64 result; // or HHResult128 or HHResult256 + InstructionSets::Run(key, in, 8, &result); + +C-callable 64-bit HighwayHash for the CPU on which we're currently running: + + #include "highwayhash/c_bindings.h" + const uint64_t key[4] = {1, 2, 3, 4}; + char in[8] = {1}; + return HighwayHash64(key, in, 8); + +.SH SEE ALSO + +/usr/include/highwayhash/c_bindings.h (C) + +/usr/include/highwayhash/highwayhash.h (C++) + +.SH BUGS + +https://github.com/google/highwayhash/issues + +.SH AUTHOR + +Upstream authors are Jan Wassenberg and Jyrki Alakuijala , updated 2017-02-07 + +This manpage was created by Adam Borowski , +and completed by Zhou Mo according to upstream readme +and header files. \ No newline at end of file diff --git a/highwayhash/highwayhash/arch_specific.cc b/highwayhash/highwayhash/arch_specific.cc new file mode 100644 index 000000000..2a05860ce --- /dev/null +++ b/highwayhash/highwayhash/arch_specific.cc @@ -0,0 +1,193 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "highwayhash/arch_specific.h" + +#include + +#if HH_ARCH_X64 && !HH_MSC_VERSION +#include +#endif + +#if HH_ARCH_PPC +#if __GLIBC__ +#include // __ppc_get_timebase_freq +#elif __FreeBSD__ +// clang-format off +#include +#include /* must come after sys/types.h */ +// clang-format on +#endif +#endif + +#include // memcpy +#include + +namespace highwayhash { + +const char* TargetName(const TargetBits target_bit) { + switch (target_bit) { + case HH_TARGET_Portable: + return "Portable"; + case HH_TARGET_SSE41: + return "SSE41"; + case HH_TARGET_AVX2: + return "AVX2"; + case HH_TARGET_VSX: + return "VSX"; + case HH_TARGET_NEON: + return "NEON"; + default: + return nullptr; // zero, multiple, or unknown bits + } +} + +#if HH_ARCH_X64 + +namespace { + +std::string BrandString() { + char brand_string[49]; + uint32_t abcd[4]; + + // Check if brand string is supported (it is on all reasonable Intel/AMD) + Cpuid(0x80000000U, 0, abcd); + if (abcd[0] < 0x80000004U) { + return std::string(); + } + + for (int i = 0; i < 3; ++i) { + Cpuid(0x80000002U + i, 0, abcd); + memcpy(brand_string + i * 16, &abcd, sizeof(abcd)); + } + brand_string[48] = 0; + return brand_string; +} + +} // namespace + +void Cpuid(const uint32_t level, const uint32_t count, + uint32_t* HH_RESTRICT abcd) { +#if HH_MSC_VERSION + int regs[4]; + __cpuidex(regs, level, count); + for (int i = 0; i < 4; ++i) { + abcd[i] = regs[i]; + } +#else + uint32_t a, b, c, d; + __cpuid_count(level, count, a, b, c, d); + abcd[0] = a; + abcd[1] = b; + abcd[2] = c; + abcd[3] = d; +#endif +} + +uint32_t ApicId() { + uint32_t abcd[4]; + Cpuid(1, 0, abcd); + return abcd[1] >> 24; // ebx +} + +#endif // HH_ARCH_X64 + +namespace { + +double DetectNominalClockRate() { +#if HH_ARCH_X64 + const std::string& brand_string = BrandString(); + // Brand strings include the maximum configured frequency. These prefixes are + // defined by Intel CPUID documentation. + const char* prefixes[3] = {"MHz", "GHz", "THz"}; + const double multipliers[3] = {1E6, 1E9, 1E12}; + for (size_t i = 0; i < 3; ++i) { + const size_t pos_prefix = brand_string.find(prefixes[i]); + if (pos_prefix != std::string::npos) { + const size_t pos_space = brand_string.rfind(' ', pos_prefix - 1); + if (pos_space != std::string::npos) { + const std::string digits = + brand_string.substr(pos_space + 1, pos_prefix - pos_space - 1); + return std::stod(digits) * multipliers[i]; + } + } + } +#elif HH_ARCH_PPC + double freq = -1; +#if __linux__ + char line[200]; + char* s; + char* value; + + FILE* f = fopen("/proc/cpuinfo", "r"); + if (f != nullptr) { + while (fgets(line, sizeof(line), f) != nullptr) { + // NOTE: the ':' is the only character we can rely on + if (!(value = strchr(line, ':'))) continue; + // terminate the valuename + *value++ = '\0'; + // skip any leading spaces + while (*value == ' ') value++; + if ((s = strchr(value, '\n'))) *s = '\0'; + + if (!strncasecmp(line, "clock", strlen("clock")) && + sscanf(value, "%lf", &freq) == 1) { + freq *= 1E6; + break; + } + } + fclose(f); + return freq; + } +#elif __FreeBSD__ + size_t length = sizeof(freq); + sysctlbyname("dev.cpu.0.freq", &freq, &length, NULL, 0); + freq *= 1E6; + return freq; +#endif +#endif + + return 0.0; +} + +} // namespace + +double NominalClockRate() { + // Thread-safe caching - this is called several times. + static const double cycles_per_second = DetectNominalClockRate(); + return cycles_per_second; +} + +double InvariantTicksPerSecond() { +#if HH_ARCH_PPC +#if __GLIBC__ + static const double cycles_per_second = __ppc_get_timebase_freq(); +#elif __FreeBSD__ + double cycles_per_second = 0; + size_t length = sizeof(cycles_per_second); + sysctlbyname("kern.timecounter.tc.timebase.frequency", &cycles_per_second, + &length, NULL, 0); +#elif __OpenBSD__ + /* There is currently no method of retrieving this via userland. + * This value is correct for Power8 and Power9. + */ + static const double cycles_per_second = 512000000; +#endif + return cycles_per_second; +#else + return NominalClockRate(); +#endif +} + +} // namespace highwayhash diff --git a/highwayhash/highwayhash/arch_specific.h b/highwayhash/highwayhash/arch_specific.h new file mode 100644 index 000000000..0b8c38417 --- /dev/null +++ b/highwayhash/highwayhash/arch_specific.h @@ -0,0 +1,179 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef HIGHWAYHASH_ARCH_SPECIFIC_H_ +#define HIGHWAYHASH_ARCH_SPECIFIC_H_ + +// WARNING: this is a "restricted" header because it is included from +// translation units compiled with different flags. This header and its +// dependencies must not define any function unless it is static inline and/or +// within namespace HH_TARGET_NAME. +// +// Background: older GCC/Clang require flags such as -mavx2 before AVX2 SIMD +// intrinsics can be used. These intrinsics are only used within blocks that +// first verify CPU capabilities. However, the flag also allows the compiler to +// generate AVX2 code in other places. This can violate the One Definition Rule, +// which requires multiple instances of a function with external linkage +// (e.g. extern inline in a header) to be "equivalent". To prevent the resulting +// crashes on non-AVX2 CPUs, any header (transitively) included from a +// translation unit compiled with different flags is "restricted". This means +// all function definitions must have internal linkage (e.g. static inline), or +// reside in namespace HH_TARGET_NAME, which expands to a name unique to the +// current compiler flags. +// +// Most C system headers are safe to include, but C++ headers should generally +// be avoided because they often do not specify static linkage and cannot +// reliably be wrapped in a namespace. + +#include "highwayhash/compiler_specific.h" + +#include + +#if HH_MSC_VERSION +#include // _byteswap_* +#endif + +namespace highwayhash { + +#if defined(__x86_64__) || defined(_M_X64) +#define HH_ARCH_X64 1 +#else +#define HH_ARCH_X64 0 +#endif + +#if defined(__aarch64__) || defined(__arm64__) +#define HH_ARCH_AARCH64 1 +#else +#define HH_ARCH_AARCH64 0 +#endif + +#ifdef __arm__ +#define HH_ARCH_ARM 1 +#else +#define HH_ARCH_ARM 0 +#endif + +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +#define HH_ARCH_NEON 1 +#else +#define HH_ARCH_NEON 0 +#endif + +#if defined(__powerpc64__) || defined(_M_PPC) +#define HH_ARCH_PPC 1 +#else +#define HH_ARCH_PPC 0 +#endif + +// Target := instruction set extension(s) such as SSE41. A translation unit can +// only provide a single target-specific implementation because they require +// different compiler flags. + +// Either the build system specifies the target by defining HH_TARGET_NAME +// (which is necessary for Portable on X64, and SSE41 on MSVC), or we'll choose +// the most efficient one that can be compiled given the current flags: +#ifndef HH_TARGET_NAME + +// To avoid excessive code size and dispatch overhead, we only support a few +// groups of extensions, e.g. FMA+BMI2+AVX+AVX2 =: "AVX2". These names must +// match the HH_TARGET_* suffixes below. +#ifdef __AVX2__ +#define HH_TARGET_NAME AVX2 +// MSVC does not set SSE4_1, but it does set AVX; checking for the latter means +// we at least get SSE4 on machines supporting AVX but not AVX2. +// https://stackoverflow.com/questions/18563978/detect-the-availability-of-sse-sse2-instruction-set-in-visual-studio +#elif defined(__SSE4_1__) || (HH_MSC_VERSION != 0 && defined(__AVX__)) +#define HH_TARGET_NAME SSE41 +#elif defined(__VSX__) +#define HH_TARGET_NAME VSX +#elif HH_ARCH_NEON +#define HH_TARGET_NAME NEON +#else +#define HH_TARGET_NAME Portable +#endif + +#endif // HH_TARGET_NAME + +#define HH_CONCAT(first, second) first##second +// Required due to macro expansion rules. +#define HH_EXPAND_CONCAT(first, second) HH_CONCAT(first, second) +// Appends HH_TARGET_NAME to "identifier_prefix". +#define HH_ADD_TARGET_SUFFIX(identifier_prefix) \ + HH_EXPAND_CONCAT(identifier_prefix, HH_TARGET_NAME) + +// HH_TARGET expands to an integer constant. Typical usage: HHStateT. +// This ensures your code will work correctly when compiler flags are changed, +// and benefit from subsequently added targets/specializations. +#define HH_TARGET HH_ADD_TARGET_SUFFIX(HH_TARGET_) + +// Deprecated former name of HH_TARGET; please use HH_TARGET instead. +#define HH_TARGET_PREFERRED HH_TARGET + +// Associate targets with integer literals so the preprocessor can compare them +// with HH_TARGET. Do not instantiate templates with these values - use +// HH_TARGET instead. Must be unique powers of two, see TargetBits. Always +// defined even if unavailable on this HH_ARCH to allow calling TargetName. +// The suffixes must match the HH_TARGET_NAME identifiers. +#define HH_TARGET_Portable 1 +#define HH_TARGET_SSE41 2 +#define HH_TARGET_AVX2 4 +#define HH_TARGET_VSX 8 +#define HH_TARGET_NEON 16 + +// Bit array for one or more HH_TARGET_*. Used to indicate which target(s) are +// supported or were called by InstructionSets::RunAll. +using TargetBits = unsigned; + +namespace HH_TARGET_NAME { + +// Calls func(bit_value) for every nonzero bit in "bits". +template +void ForeachTarget(TargetBits bits, const Func& func) { + while (bits != 0) { + const TargetBits lowest = bits & (~bits + 1); + func(lowest); + bits &= ~lowest; + } +} + +} // namespace HH_TARGET_NAME + +// Returns a brief human-readable string literal identifying one of the above +// bits, or nullptr if zero, multiple, or unknown bits are set. +const char* TargetName(const TargetBits target_bit); + +// Returns the nominal (without Turbo Boost) CPU clock rate [Hertz]. Useful for +// (roughly) characterizing the CPU speed. +double NominalClockRate(); + +// Returns tsc_timer frequency, useful for converting ticks to seconds. This is +// unaffected by CPU throttling ("invariant"). Thread-safe. Returns timebase +// frequency on PPC and NominalClockRate on all other platforms. +double InvariantTicksPerSecond(); + +#if HH_ARCH_X64 + +// Calls CPUID instruction with eax=level and ecx=count and returns the result +// in abcd array where abcd = {eax, ebx, ecx, edx} (hence the name abcd). +void Cpuid(const uint32_t level, const uint32_t count, + uint32_t* HH_RESTRICT abcd); + +// Returns the APIC ID of the CPU on which we're currently running. +uint32_t ApicId(); + +#endif // HH_ARCH_X64 + +} // namespace highwayhash + +#endif // HIGHWAYHASH_ARCH_SPECIFIC_H_ diff --git a/highwayhash/highwayhash/benchmark.cc b/highwayhash/highwayhash/benchmark.cc new file mode 100644 index 000000000..7cc304ffc --- /dev/null +++ b/highwayhash/highwayhash/benchmark.cc @@ -0,0 +1,331 @@ +// Copyright 2016 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Measures hash function throughput for various input sizes. + +#include +#include +#include +#include +#include +#include +#include + +#include +#include "highwayhash/arch_specific.h" +#include "highwayhash/compiler_specific.h" +#include "highwayhash/instruction_sets.h" +#include "highwayhash/nanobenchmark.h" +#include "highwayhash/robust_statistics.h" + +// Which functions to enable (includes check for compiler support) +#define BENCHMARK_SIP 0 +#define BENCHMARK_SIP_TREE 0 +#define BENCHMARK_HIGHWAY 1 +#define BENCHMARK_HIGHWAY_CAT 1 +#define BENCHMARK_FARM 0 +#define BENCHMARK_INTERNAL 0 + +#include "highwayhash/highwayhash_test_target.h" +#if BENCHMARK_SIP +#include "highwayhash/sip_hash.h" +#endif +#if BENCHMARK_SIP_TREE +#include "highwayhash/scalar_sip_tree_hash.h" +#include "highwayhash/sip_tree_hash.h" +#endif +#if BENCHMARK_FARM +#include "third_party/farmhash/src/farmhash.h" +#endif + +#if BENCHMARK_INTERNAL +// Placeholder for include +#endif + +namespace highwayhash { +namespace { + +// Stores time measurements from benchmarks, with support for printing them +// as LaTeX figures or tables. +class Measurements { + public: + void Add(const char* caption, const size_t bytes, const double cycles) { + const float cpb = static_cast(cycles / bytes); + results_.emplace_back(caption, static_cast(bytes), cpb); + } + + // Prints results as a LaTeX table (only for in_sizes matching the + // desired values). + void PrintTable(const std::vector& in_sizes) { + std::vector unique = in_sizes; + std::sort(unique.begin(), unique.end()); + unique.erase(std::unique(unique.begin(), unique.end()), unique.end()); + + printf("\\begin{tabular}{"); + for (size_t i = 0; i < unique.size() + 1; ++i) { + printf("%s", i == 0 ? "r" : "|r"); + } + printf("}\n\\toprule\nAlgorithm"); + for (const size_t in_size : unique) { + printf(" & %zu", in_size); + } + printf("\\\\\n\\midrule\n"); + + const SpeedsForCaption cpb_for_caption = SortByCaptionFilterBySize(unique); + for (const auto& item : cpb_for_caption) { + printf("%22s", item.first.c_str()); + for (const float cpb : item.second) { + printf(" & %5.2f", cpb); + } + printf("\\\\\n"); + } + } + + // Prints results suitable for pgfplots. + void PrintPlots() { + const SpeedsForCaption cpb_for_caption = SortByCaption(); + assert(!cpb_for_caption.empty()); + const size_t num_sizes = cpb_for_caption.begin()->second.size(); + + printf("Size "); + // Flatten per-caption vectors into one iterator. + std::vector::const_iterator> iterators; + for (const auto& item : cpb_for_caption) { + printf("%21s ", item.first.c_str()); + assert(item.second.size() == num_sizes); + iterators.push_back(item.second.begin()); + } + printf("\n"); + + const std::vector& sizes = UniqueSizes(); + assert(num_sizes == sizes.size()); + for (int i = 0; i < static_cast(num_sizes); ++i) { + printf("%d ", sizes[i]); + for (auto& it : iterators) { + printf("%5.2f ", 1.0f / *it); // bytes per cycle + ++it; + } + printf("\n"); + } + } + + private: + struct Result { + Result(const char* caption, const int in_size, const float cpb) + : caption(caption), in_size(in_size), cpb(cpb) {} + + // Algorithm name. + std::string caption; + // Size of the input data [bytes]. + int in_size; + // Measured throughput [cycles per byte]. + float cpb; + }; + + // Returns set of all input sizes for the first column of a size/speed plot. + std::vector UniqueSizes() { + std::vector sizes; + sizes.reserve(results_.size()); + for (const Result& result : results_) { + sizes.push_back(result.in_size); + } + std::sort(sizes.begin(), sizes.end()); + sizes.erase(std::unique(sizes.begin(), sizes.end()), sizes.end()); + return sizes; + } + + using SpeedsForCaption = std::map>; + + SpeedsForCaption SortByCaption() const { + SpeedsForCaption cpb_for_caption; + for (const Result& result : results_) { + cpb_for_caption[result.caption].push_back(result.cpb); + } + return cpb_for_caption; + } + + // Only includes measurement results matching one of the given sizes. + SpeedsForCaption SortByCaptionFilterBySize( + const std::vector& in_sizes) const { + SpeedsForCaption cpb_for_caption; + for (const Result& result : results_) { + for (const size_t in_size : in_sizes) { + if (result.in_size == static_cast(in_size)) { + cpb_for_caption[result.caption].push_back(result.cpb); + } + } + } + return cpb_for_caption; + } + + std::vector results_; +}; + +void AddMeasurements(DurationsForInputs* input_map, const char* caption, + Measurements* measurements) { + for (size_t i = 0; i < input_map->num_items; ++i) { + const DurationsForInputs::Item& item = input_map->items[i]; + std::vector durations(item.durations, + item.durations + item.num_durations); + const float median_ticks = Median(&durations); + const float variability = MedianAbsoluteDeviation(durations, median_ticks); + const double median_cpu_cycles = + (median_ticks / InvariantTicksPerSecond()) * NominalClockRate(); + printf("%s %4zu: median=%6.1f ticks; median L1 norm =%4.1f ticks\n", + caption, item.input, median_ticks, variability); + measurements->Add(caption, item.input, median_cpu_cycles); + } + input_map->num_items = 0; +} + +#if BENCHMARK_SIP || BENCHMARK_FARM || BENCHMARK_INTERNAL || \ + (BENCHMARK_SIP_TREE && defined(__AVX2__)) + +void MeasureAndAdd(DurationsForInputs* input_map, const char* caption, + const Func func, Measurements* measurements) { + MeasureDurations(func, input_map); + AddMeasurements(input_map, caption, measurements); +} + +#endif + +// InstructionSets::RunAll callback. +void AddMeasurementsWithPrefix(const char* prefix, const char* target_name, + DurationsForInputs* input_map, void* context) { + std::string caption(prefix); + caption += target_name; + AddMeasurements(input_map, caption.c_str(), + static_cast(context)); +} + +#if BENCHMARK_SIP + +uint64_t RunSip(const void*, const size_t size) { + HH_ALIGNAS(16) const HH_U64 key2[2] = {0, 1}; + char in[kMaxBenchmarkInputSize]; + memcpy(in, &size, sizeof(size)); + return SipHash(key2, in, size); +} + +uint64_t RunSip13(const void*, const size_t size) { + HH_ALIGNAS(16) const HH_U64 key2[2] = {0, 1}; + char in[kMaxBenchmarkInputSize]; + memcpy(in, &size, sizeof(size)); + return SipHash13(key2, in, size); +} + +#endif + +#if BENCHMARK_SIP_TREE + +uint64_t RunSipTree(const void*, const size_t size) { + HH_ALIGNAS(32) const HH_U64 key4[4] = {0, 1, 2, 3}; + char in[kMaxBenchmarkInputSize]; + memcpy(in, &size, sizeof(size)); + return SipTreeHash(key4, in, size); +} + +uint64_t RunSipTree13(const void*, const size_t size) { + HH_ALIGNAS(32) const HH_U64 key4[4] = {0, 1, 2, 3}; + char in[kMaxBenchmarkInputSize]; + memcpy(in, &size, sizeof(size)); + return SipTreeHash13(key4, in, size); +} + +#endif + +#if BENCHMARK_FARM + +uint64_t RunFarm(const void*, const size_t size) { + char in[kMaxBenchmarkInputSize]; + memcpy(in, &size, sizeof(size)); + return farmhash::Fingerprint64(reinterpret_cast(in), size); +} + +#endif + +#if BENCHMARK_INTERNAL +uint64_t RunInternal(const void*, const size_t size) { + char in[kMaxBenchmarkInputSize]; + memcpy(in, &size, sizeof(size)); + return in[rand() % size]; +} +#endif + +void AddMeasurements(const std::vector& in_sizes, + Measurements* measurements) { + DurationsForInputs input_map(in_sizes.data(), in_sizes.size(), 40); +#if BENCHMARK_SIP + MeasureAndAdd(&input_map, "SipHash", &RunSip, measurements); + MeasureAndAdd(&input_map, "SipHash13", &RunSip13, measurements); +#endif + +#if BENCHMARK_SIP_TREE && defined(__AVX2__) + MeasureAndAdd(&input_map, "SipTreeHash", &RunSipTree, measurements); + MeasureAndAdd(&input_map, "SipTreeHash13", &RunSipTree13, measurements); +#endif + +#if BENCHMARK_FARM + MeasureAndAdd(&input_map, "Farm", &RunFarm, measurements); +#endif + +#if BENCHMARK_INTERNAL + MeasureAndAdd(&input_map, "Internal", &RunInternal, measurements); +#endif + +#if BENCHMARK_HIGHWAY + InstructionSets::RunAll( + &input_map, &AddMeasurementsWithPrefix, measurements); +#endif + +#if BENCHMARK_HIGHWAY_CAT + InstructionSets::RunAll( + &input_map, &AddMeasurementsWithPrefix, measurements); +#endif +} + +void PrintTable() { + const std::vector in_sizes = { + 7, 8, 31, 32, 63, 64, kMaxBenchmarkInputSize}; + Measurements measurements; + AddMeasurements(in_sizes, &measurements); + measurements.PrintTable(in_sizes); +} + +void PrintPlots() { + std::vector in_sizes; + for (int num_vectors = 0; num_vectors < 12; ++num_vectors) { + for (int remainder : {0, 9, 18, 27}) { + in_sizes.push_back(num_vectors * 32 + remainder); + assert(in_sizes.back() <= kMaxBenchmarkInputSize); + } + } + + Measurements measurements; + AddMeasurements(in_sizes, &measurements); + measurements.PrintPlots(); +} + +} // namespace +} // namespace highwayhash + +int main(int argc, char* argv[]) { + // No argument or t => table + if (argc < 2 || argv[1][0] == 't') { + highwayhash::PrintTable(); + } else if (argv[1][0] == 'p') { + highwayhash::PrintPlots(); + } + return 0; +} diff --git a/highwayhash/highwayhash/c_bindings.cc b/highwayhash/highwayhash/c_bindings.cc new file mode 100644 index 000000000..7e0488fb4 --- /dev/null +++ b/highwayhash/highwayhash/c_bindings.cc @@ -0,0 +1,35 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "highwayhash/c_bindings.h" + +#include "highwayhash/highwayhash_target.h" +#include "highwayhash/instruction_sets.h" + +using highwayhash::InstructionSets; +using highwayhash::HighwayHash; + +extern "C" { + +// Ideally this would reside in highwayhash_target.cc, but that file is +// compiled multiple times and we must only define this function once. +uint64_t HighwayHash64(const HHKey key, const char* bytes, + const uint64_t size) { + HHResult64 result; + InstructionSets::Run(*reinterpret_cast(key), bytes, + size, &result); + return result; +} + +} // extern "C" diff --git a/highwayhash/highwayhash/c_bindings.h b/highwayhash/highwayhash/c_bindings.h new file mode 100644 index 000000000..903aabc0f --- /dev/null +++ b/highwayhash/highwayhash/c_bindings.h @@ -0,0 +1,57 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef HIGHWAYHASH_C_BINDINGS_H_ +#define HIGHWAYHASH_C_BINDINGS_H_ + +// C-callable function prototypes, documented in the other header files. + +#include + +#include "hh_types.h" + +#ifdef __cplusplus +extern "C" { + +// Bring the symbols out of the namespace. +using highwayhash::HHKey; +using highwayhash::HHPacket; +using highwayhash::HHResult128; +using highwayhash::HHResult256; +using highwayhash::HHResult64; +#endif + +uint64_t SipHashC(const uint64_t* key, const char* bytes, const uint64_t size); +uint64_t SipHash13C(const uint64_t* key, const char* bytes, + const uint64_t size); + +// Uses the best implementation of HighwayHash for the current CPU and +// calculates 64-bit hash of given data. +uint64_t HighwayHash64(const HHKey key, const char* bytes, const uint64_t size); + +// Defined by highwayhash_target.cc, which requires a _Target* suffix. +uint64_t HighwayHash64_TargetPortable(const HHKey key, const char* bytes, + const uint64_t size); +uint64_t HighwayHash64_TargetSSE41(const HHKey key, const char* bytes, + const uint64_t size); +uint64_t HighwayHash64_TargetAVX2(const HHKey key, const char* bytes, + const uint64_t size); +uint64_t HighwayHash64_TargetVSX(const HHKey key, const char* bytes, + const uint64_t size); + +#ifdef __cplusplus +} +#endif + +#endif // HIGHWAYHASH_C_BINDINGS_H_ diff --git a/highwayhash/highwayhash/compiler_specific.h b/highwayhash/highwayhash/compiler_specific.h new file mode 100644 index 000000000..4789f9a61 --- /dev/null +++ b/highwayhash/highwayhash/compiler_specific.h @@ -0,0 +1,90 @@ +// Copyright 2015 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef HIGHWAYHASH_COMPILER_SPECIFIC_H_ +#define HIGHWAYHASH_COMPILER_SPECIFIC_H_ + +// WARNING: this is a "restricted" header because it is included from +// translation units compiled with different flags. This header and its +// dependencies must not define any function unless it is static inline and/or +// within namespace HH_TARGET_NAME. See arch_specific.h for details. + +// Compiler + +// #if is shorter and safer than #ifdef. *_VERSION are zero if not detected, +// otherwise 100 * major + minor version. Note that other packages check for +// #ifdef COMPILER_MSVC, so we cannot use that same name. + +#ifdef _MSC_VER +#define HH_MSC_VERSION _MSC_VER +#else +#define HH_MSC_VERSION 0 +#endif + +#ifdef __GNUC__ +#define HH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +#else +#define HH_GCC_VERSION 0 +#endif + +#ifdef __clang__ +#define HH_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__) +#else +#define HH_CLANG_VERSION 0 +#endif + +//----------------------------------------------------------------------------- + +#if HH_GCC_VERSION && HH_GCC_VERSION < 408 +#define HH_ALIGNAS(multiple) __attribute__((aligned(multiple))) +#else +#define HH_ALIGNAS(multiple) alignas(multiple) // C++11 +#endif + +#if HH_MSC_VERSION +#define HH_RESTRICT __restrict +#elif HH_GCC_VERSION +#define HH_RESTRICT __restrict__ +#else +#define HH_RESTRICT +#endif + +#if HH_MSC_VERSION +#define HH_INLINE __forceinline +#define HH_NOINLINE __declspec(noinline) +#else +#define HH_INLINE inline +#define HH_NOINLINE __attribute__((noinline)) +#endif + +#if HH_MSC_VERSION +// Unsupported, __assume is not the same. +#define HH_LIKELY(expr) expr +#define HH_UNLIKELY(expr) expr +#else +#define HH_LIKELY(expr) __builtin_expect(!!(expr), 1) +#define HH_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#endif + +#if HH_MSC_VERSION +#include +#pragma intrinsic(_ReadWriteBarrier) +#define HH_COMPILER_FENCE _ReadWriteBarrier() +#elif HH_GCC_VERSION +#define HH_COMPILER_FENCE asm volatile("" : : : "memory") +#else +#define HH_COMPILER_FENCE +#endif + +#endif // HIGHWAYHASH_COMPILER_SPECIFIC_H_ diff --git a/highwayhash/highwayhash/data_parallel.h b/highwayhash/highwayhash/data_parallel.h new file mode 100644 index 000000000..d72afc953 --- /dev/null +++ b/highwayhash/highwayhash/data_parallel.h @@ -0,0 +1,341 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef HIGHWAYHASH_DATA_PARALLEL_H_ +#define HIGHWAYHASH_DATA_PARALLEL_H_ + +// Portable C++11 alternative to OpenMP for data-parallel computations: +// provides low-overhead ThreadPool, plus PerThread with support for reduction. + +#include +#include // find_if +#include +#include //NOLINT +#include +#include +#include +#include +#include //NOLINT +#include //NOLINT +#include +#include + +#define DATA_PARALLEL_CHECK(condition) \ + while (!(condition)) { \ + printf("data_parallel check failed at line %d\n", __LINE__); \ + abort(); \ + } + +namespace highwayhash { + +// Highly scalable thread pool, especially suitable for data-parallel +// computations in the fork-join model, where clients need to know when all +// tasks have completed. +// +// Thread pools usually store small numbers of heterogeneous tasks in a queue. +// When tasks are identical or differ only by an integer input parameter, it is +// much faster to store just one function of an integer parameter and call it +// for each value. +// +// This thread pool can efficiently load-balance millions of tasks using an +// atomic counter, thus avoiding per-task syscalls. With 48 hyperthreads and +// 1M tasks that add to an atomic counter, overall runtime is 10-20x higher +// when using std::async, and up to 200x for a queue-based ThreadPool. +// +// Usage: +// ThreadPool pool; +// pool.Run(0, 1000000, [](const int i) { Func1(i); }); +// // When Run returns, all of its tasks have finished. +// +// pool.RunTasks({Func2, Func3, Func4}); +// // The destructor waits until all worker threads have exited cleanly. +class ThreadPool { + public: + // Starts the given number of worker threads and blocks until they are ready. + // "num_threads" defaults to one per hyperthread. + explicit ThreadPool( + const int num_threads = std::thread::hardware_concurrency()) + : num_threads_(num_threads) { + DATA_PARALLEL_CHECK(num_threads_ > 0); + threads_.reserve(num_threads_); + for (int i = 0; i < num_threads_; ++i) { + threads_.emplace_back(ThreadFunc, this); + } + + padding_[0] = 0; // avoid unused member warning. + + WorkersReadyBarrier(); + } + + ThreadPool(const ThreadPool&) = delete; + ThreadPool& operator&(const ThreadPool&) = delete; + + // Waits for all threads to exit. + ~ThreadPool() { + StartWorkers(kWorkerExit); + + for (std::thread& thread : threads_) { + thread.join(); + } + } + + // Runs func(i) on worker thread(s) for every i in [begin, end). + // Not thread-safe - no two calls to Run and RunTasks may overlap. + // Subsequent calls will reuse the same threads. + // + // Precondition: 0 <= begin <= end. + template + void Run(const int begin, const int end, const Func& func) { + DATA_PARALLEL_CHECK(0 <= begin && begin <= end); + if (begin == end) { + return; + } + const WorkerCommand worker_command = (WorkerCommand(end) << 32) + begin; + // Ensure the inputs do not result in a reserved command. + DATA_PARALLEL_CHECK(worker_command != kWorkerWait); + DATA_PARALLEL_CHECK(worker_command != kWorkerExit); + + // If Func is large (many captures), this will allocate memory, but it is + // still slower to use a std::ref wrapper. + task_ = func; + num_reserved_.store(0); + + StartWorkers(worker_command); + WorkersReadyBarrier(); + } + + // Runs each task (closure, typically a lambda function) on worker thread(s). + // Not thread-safe - no two calls to Run and RunTasks may overlap. + // Subsequent calls will reuse the same threads. + // + // This is a more conventional interface for heterogeneous tasks that may be + // independent/unrelated. + void RunTasks(const std::vector>& tasks) { + Run(0, static_cast(tasks.size()), + [&tasks](const int i) { tasks[i](); }); + } + + // Statically (and deterministically) splits [begin, end) into ranges and + // calls "func" for each of them. Useful when "func" involves some overhead + // (e.g. for PerThread::Get or random seeding) that should be amortized over + // a range of values. "func" is void(int chunk, uint32_t begin, uint32_t end). + template + void RunRanges(const uint32_t begin, const uint32_t end, const Func& func) { + const uint32_t length = end - begin; + + // Use constant rather than num_threads_ for machine-independent splitting. + const uint32_t chunk = std::max(1U, (length + 127) / 128); + std::vector> ranges; // begin/end + ranges.reserve(length / chunk + 1); + for (uint32_t i = 0; i < length; i += chunk) { + ranges.emplace_back(begin + i, begin + std::min(i + chunk, length)); + } + + Run(0, static_cast(ranges.size()), [&ranges, func](const int i) { + func(i, ranges[i].first, ranges[i].second); + }); + } + + private: + // After construction and between calls to Run, workers are "ready", i.e. + // waiting on worker_start_cv_. They are "started" by sending a "command" + // and notifying all worker_start_cv_ waiters. (That is why all workers + // must be ready/waiting - otherwise, the notification will not reach all of + // them and the main thread waits in vain for them to report readiness.) + using WorkerCommand = uint64_t; + + // Special values; all others encode the begin/end parameters. + static constexpr WorkerCommand kWorkerWait = 0; + static constexpr WorkerCommand kWorkerExit = ~0ULL; + + void WorkersReadyBarrier() { + std::unique_lock lock(mutex_); + workers_ready_cv_.wait(lock, + [this]() { return workers_ready_ == num_threads_; }); + workers_ready_ = 0; + } + + // Precondition: all workers are ready. + void StartWorkers(const WorkerCommand worker_command) { + std::unique_lock lock(mutex_); + worker_start_command_ = worker_command; + // Workers will need this lock, so release it before they wake up. + lock.unlock(); + worker_start_cv_.notify_all(); + } + + // Attempts to reserve and perform some work from the global range of tasks, + // which is encoded within "command". Returns after all tasks are reserved. + static void RunRange(ThreadPool* self, const WorkerCommand command) { + const int begin = command & 0xFFFFFFFF; + const int end = command >> 32; + const int num_tasks = end - begin; + + // OpenMP introduced several "schedule" strategies: + // "single" (static assignment of exactly one chunk per thread): slower. + // "dynamic" (allocates k tasks at a time): competitive for well-chosen k. + // "guided" (allocates k tasks, decreases k): computing k = remaining/n + // is faster than halving k each iteration. We prefer this strategy + // because it avoids user-specified parameters. + + for (;;) { + const int num_reserved = self->num_reserved_.load(); + const int num_remaining = num_tasks - num_reserved; + const int my_size = std::max(num_remaining / (self->num_threads_ * 2), 1); + const int my_begin = begin + self->num_reserved_.fetch_add(my_size); + const int my_end = std::min(my_begin + my_size, begin + num_tasks); + // Another thread already reserved the last task. + if (my_begin >= my_end) { + break; + } + for (int i = my_begin; i < my_end; ++i) { + self->task_(i); + } + } + } + + static void ThreadFunc(ThreadPool* self) { + // Until kWorkerExit command received: + for (;;) { + std::unique_lock lock(self->mutex_); + // Notify main thread that this thread is ready. + if (++self->workers_ready_ == self->num_threads_) { + self->workers_ready_cv_.notify_one(); + } + RESUME_WAIT: + // Wait for a command. + self->worker_start_cv_.wait(lock); + const WorkerCommand command = self->worker_start_command_; + switch (command) { + case kWorkerWait: // spurious wakeup: + goto RESUME_WAIT; // lock still held, avoid incrementing ready. + case kWorkerExit: + return; // exits thread + } + + lock.unlock(); + RunRange(self, command); + } + } + + const int num_threads_; + + // Unmodified after ctor, but cannot be const because we call thread::join(). + std::vector threads_; + + std::mutex mutex_; // guards both cv and their variables. + std::condition_variable workers_ready_cv_; + int workers_ready_ = 0; + std::condition_variable worker_start_cv_; + WorkerCommand worker_start_command_; + + // Written by main thread, read by workers (after mutex lock/unlock). + std::function task_; + + // Updated by workers; alignment/padding avoids false sharing. + alignas(64) std::atomic num_reserved_{0}; + int padding_[15]; +}; + +// Thread-local storage with support for reduction (combining into one result). +// The "T" type must be unique to the call site because the list of threads' +// copies is a static member. (With knowledge of the underlying threads, we +// could eliminate this list and T allocations, but that is difficult to +// arrange and we prefer this to be usable independently of ThreadPool.) +// +// Usage: +// for (int i = 0; i < N; ++i) { +// // in each thread: +// T& my_copy = PerThread::Get(); +// my_copy.Modify(); +// +// // single-threaded: +// T& combined = PerThread::Reduce(); +// Use(combined); +// PerThread::Destroy(); +// } +// +// T is duck-typed and implements the following interface: +// +// // Returns true if T is default-initialized or Destroy was called without +// // any subsequent re-initialization. +// bool IsNull() const; +// +// // Releases any resources. Postcondition: IsNull() == true. +// void Destroy(); +// +// // Merges in data from "victim". Precondition: !IsNull() && !victim.IsNull(). +// void Assimilate(const T& victim); +template +class PerThread { + public: + // Returns reference to this thread's T instance (dynamically allocated, + // so its address is unique). Callers are responsible for any initialization + // beyond the default ctor. + static T& Get() { + static thread_local T* t; + if (t == nullptr) { + t = new T; + static std::mutex mutex; + std::lock_guard lock(mutex); + Threads().push_back(t); + } + return *t; + } + + // Returns vector of all per-thread T. Used inside Reduce() or by clients + // that require direct access to T instead of Assimilating them. + // Function wrapper avoids separate static member variable definition. + static std::vector& Threads() { + static std::vector threads; + return threads; + } + + // Returns the first non-null T after assimilating all other threads' T + // into it. Precondition: at least one non-null T exists (caller must have + // called Get() and initialized the result). + static T& Reduce() { + std::vector& threads = Threads(); + + // Find first non-null T + const auto it = std::find_if(threads.begin(), threads.end(), + [](const T* t) { return !t->IsNull(); }); + if (it == threads.end()) { + abort(); + } + T* const first = *it; + + for (const T* t : threads) { + if (t != first && !t->IsNull()) { + first->Assimilate(*t); + } + } + return *first; + } + + // Calls each thread's T::Destroy to release resources and/or prepare for + // reuse by the same threads/ThreadPool. Note that all T remain allocated + // (we need thread-independent pointers for iterating over each thread's T, + // and deleting them would leave dangling pointers in each thread, which is + // unacceptable because the same thread may call Get() again later.) + static void Destroy() { + for (T* t : Threads()) { + t->Destroy(); + } + } +}; + +} // namespace highwayhash + +#endif // HIGHWAYHASH_DATA_PARALLEL_H_ diff --git a/highwayhash/highwayhash/data_parallel_benchmark.cc b/highwayhash/highwayhash/data_parallel_benchmark.cc new file mode 100644 index 000000000..b8817c5c1 --- /dev/null +++ b/highwayhash/highwayhash/data_parallel_benchmark.cc @@ -0,0 +1,157 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include //NOLINT +#include + +#include "testing/base/public/gunit.h" +#include "third_party/absl/container/btree_set.h" +#include "third_party/absl/time/clock.h" +#include "third_party/absl/time/time.h" +#include "highwayhash/arch_specific.h" +#include "highwayhash/data_parallel.h" +#include "thread/threadpool.h" + +namespace highwayhash { +namespace { + +constexpr int kBenchmarkTasks = 1000000; + +// Returns elapsed time [nanoseconds] for std::async. +double BenchmarkAsync(uint64_t* total) { + const absl::Time t0 = absl::Now(); + std::atomic sum1{0}; + std::atomic sum2{0}; + + std::vector> futures; + futures.reserve(kBenchmarkTasks); + for (int i = 0; i < kBenchmarkTasks; ++i) { + futures.push_back(std::async( + [&sum1, &sum2](const int i) { + sum1.fetch_add(i); + sum2.fetch_add(1); + }, + i)); + } + + for (auto& future : futures) { + future.get(); + } + + const absl::Time t1 = absl::Now(); + *total = sum1.load() + sum2.load(); + return absl::ToDoubleNanoseconds(t1 - t0); +} + +// Returns elapsed time [nanoseconds] for (atomic) ThreadPool. +double BenchmarkPoolA(uint64_t* total) { + const absl::Time t0 = absl::Now(); + std::atomic sum1{0}; + std::atomic sum2{0}; + + ThreadPool pool; + pool.Run(0, kBenchmarkTasks, [&sum1, &sum2](const int i) { + sum1.fetch_add(i); + sum2.fetch_add(1); + }); + + const absl::Time t1 = absl::Now(); + *total = sum1.load() + sum2.load(); + return absl::ToDoubleNanoseconds(t1 - t0); +} + +// Returns elapsed time [nanoseconds] for ::ThreadPool. +double BenchmarkPoolG(uint64_t* total) { + const absl::Time t0 = absl::Now(); + std::atomic sum1{0}; + std::atomic sum2{0}; + + { + ::ThreadPool pool(std::thread::hardware_concurrency()); + pool.StartWorkers(); + for (int i = 0; i < kBenchmarkTasks; ++i) { + pool.Schedule([&sum1, &sum2, i]() { + sum1.fetch_add(i); + sum2.fetch_add(1); + }); + } + } + + const absl::Time t1 = absl::Now(); + *total = sum1.load() + sum2.load(); + return absl::ToDoubleNanoseconds(t1 - t0); +} + +// Compares ThreadPool speed to std::async and ::ThreadPool. +TEST(DataParallelTest, Benchmarks) { + uint64_t sum1, sum2, sum3; + const double async_ns = BenchmarkAsync(&sum1); + const double poolA_ns = BenchmarkPoolA(&sum2); + const double poolG_ns = BenchmarkPoolG(&sum3); + + printf("Async %11.0f ns\nPoolA %11.0f ns\nPoolG %11.0f ns\n", async_ns, + poolA_ns, poolG_ns); + // baseline 20x, 10x with asan or msan, 5x with tsan + EXPECT_GT(async_ns, poolA_ns * 4); + // baseline 200x, 180x with asan, 70x with msan, 50x with tsan. + EXPECT_GT(poolG_ns, poolA_ns * 20); + + // Should reach same result. + EXPECT_EQ(sum1, sum2); + EXPECT_EQ(sum2, sum3); +} + +#if HH_ARCH_X64 +// Ensures multiple hardware threads are used (decided by the OS scheduler). +TEST(DataParallelTest, TestApicIds) { + for (int num_threads = 1; num_threads <= std::thread::hardware_concurrency(); + ++num_threads) { + ThreadPool pool(num_threads); + + std::mutex mutex; + absl::btree_set ids; + double total = 0.0; + pool.Run(0, 2 * num_threads, [&mutex, &ids, &total](const int i) { + // Useless computations to keep the processor busy so that threads + // can't just reuse the same processor. + double sum = 0.0; + for (int rep = 0; rep < 900 * (i + 30); ++rep) { + sum += pow(rep, 0.5); + } + + mutex.lock(); + ids.insert(ApicId()); + total += sum; + mutex.unlock(); + }); + + // No core ID / APIC ID available + if (num_threads > 1 && ids.size() == 1) { + EXPECT_EQ(0, *ids.begin()); + } else { + // (The Linux scheduler doesn't use all available HTs, but the + // computations should at least keep most cores busy.) + EXPECT_GT(ids.size() + 2, num_threads / 4); + } + + // (Ensure the busy-work is not elided.) + EXPECT_GT(total, 1E4); + } +} +#endif // HH_ARCH_X64 + +} // namespace +} // namespace highwayhash diff --git a/highwayhash/highwayhash/data_parallel_test.cc b/highwayhash/highwayhash/data_parallel_test.cc new file mode 100644 index 000000000..2728b7d3a --- /dev/null +++ b/highwayhash/highwayhash/data_parallel_test.cc @@ -0,0 +1,175 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "testing/base/public/gunit.h" +#include "highwayhash/data_parallel.h" + +namespace highwayhash { +namespace { + +int PopulationCount(uint64_t bits) { + int num_set = 0; + while (bits != 0) { + num_set += bits & 1; + bits >>= 1; + } + return num_set; +} + +std::atomic func_counts{0}; + +void Func2() { + usleep(200000); + func_counts.fetch_add(4); +} + +void Func3() { + usleep(300000); + func_counts.fetch_add(16); +} + +void Func4() { + usleep(400000); + func_counts.fetch_add(256); +} + +// Exercises the RunTasks feature (running arbitrary tasks/closures) +TEST(DataParallelTest, TestRunTasks) { + ThreadPool pool(4); + pool.RunTasks({Func2, Func3, Func4}); + EXPECT_EQ(276, func_counts.load()); +} + +// Ensures task parameter is in bounds, every parameter is reached, +// pool can be reused (multiple consecutive Run calls), pool can be destroyed +// (joining with its threads). +TEST(DataParallelTest, TestPool) { + for (int num_threads = 1; num_threads <= 18; ++num_threads) { + ThreadPool pool(num_threads); + for (int num_tasks = 0; num_tasks < 32; ++num_tasks) { + std::vector mementos(num_tasks, 0); + for (int begin = 0; begin < 32; ++begin) { + std::fill(mementos.begin(), mementos.end(), 0); + pool.Run(begin, begin + num_tasks, + [begin, num_tasks, &mementos](const int i) { + // Parameter is in the given range + EXPECT_GE(i, begin); + EXPECT_LT(i, begin + num_tasks); + + // Store mementos to be sure we visited each i. + mementos.at(i - begin) = 1000 + i; + }); + for (int i = begin; i < begin + num_tasks; ++i) { + EXPECT_EQ(1000 + i, mementos.at(i - begin)); + } + } + } + } +} + +TEST(DataParallelTest, TestRunRanges) { + for (int num_threads = 1; num_threads <= 18; ++num_threads) { + ThreadPool pool(num_threads); + for (int num_tasks = 0; num_tasks < 32; ++num_tasks) { + std::vector mementos(num_tasks, 0); + for (int begin = 0; begin < 32; ++begin) { + std::fill(mementos.begin(), mementos.end(), 0); + pool.RunRanges(begin, begin + num_tasks, + [begin, num_tasks, &mementos](const int chunk, + const uint32_t my_begin, + const uint32_t my_end) { + for (uint32_t i = my_begin; i < my_end; ++i) { + // Parameter is in the given range + EXPECT_GE(i, begin); + EXPECT_LT(i, begin + num_tasks); + + // Store mementos to be sure we visited each i. + mementos.at(i - begin) = 1000 + i; + } + }); + for (int i = begin; i < begin + num_tasks; ++i) { + EXPECT_EQ(1000 + i, mementos.at(i - begin)); + } + } + } + } +} + +// Ensures each of N threads processes exactly 1 of N tasks, i.e. the +// work distribution is perfectly fair for small counts. +TEST(DataParallelTest, TestSmallAssignments) { + for (int num_threads = 1; num_threads <= 64; ++num_threads) { + ThreadPool pool(num_threads); + + std::atomic counter{0}; + // (Avoid mutex because it may perturb the worker thread scheduling) + std::atomic id_bits{0}; + + pool.Run(0, num_threads, [&counter, num_threads, &id_bits](const int i) { + const int id = counter.fetch_add(1); + EXPECT_LT(id, num_threads); + uint64_t bits = id_bits.load(std::memory_order_relaxed); + while (!id_bits.compare_exchange_weak(bits, bits | (1ULL << id))) { + } + }); + + const int num_participants = PopulationCount(id_bits.load()); + EXPECT_EQ(num_threads, num_participants); + } +} + +// Test payload for PerThread. +struct CheckUniqueIDs { + bool IsNull() const { return false; } + void Destroy() { id_bits = 0; } + void Assimilate(const CheckUniqueIDs& victim) { + // Cannot overlap because each PerThread has unique bits. + EXPECT_EQ(0, id_bits & victim.id_bits); + id_bits |= victim.id_bits; + } + + uint64_t id_bits = 0; +}; + +// Ensures each thread has a PerThread instance, that they are successfully +// combined/reduced into a single result, and that reuse is possible after +// Destroy(). +TEST(DataParallelTest, TestPerThread) { + // We use a uint64_t bit array for convenience => no more than 64 threads. + const int max_threads = std::min(64U, std::thread::hardware_concurrency()); + for (int num_threads = 1; num_threads <= max_threads; ++num_threads) { + ThreadPool pool(num_threads); + + std::atomic counter{0}; + pool.Run(0, num_threads, [&counter, num_threads](const int i) { + const int id = counter.fetch_add(1); + EXPECT_LT(id, num_threads); + PerThread::Get().id_bits |= 1ULL << id; + }); + + // Verify each thread's bit is set. + const uint64_t all_bits = PerThread::Reduce().id_bits; + // Avoid shifting by 64 (undefined). + const uint64_t expected = + num_threads == 64 ? ~0ULL : (1ULL << num_threads) - 1; + EXPECT_EQ(expected, all_bits); + PerThread::Destroy(); + } +} + +} // namespace +} // namespace highwayhash diff --git a/highwayhash/highwayhash/endianess.h b/highwayhash/highwayhash/endianess.h new file mode 100644 index 000000000..776a02fa2 --- /dev/null +++ b/highwayhash/highwayhash/endianess.h @@ -0,0 +1,108 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef HIGHWAYHASH_ENDIANESS_H_ +#define HIGHWAYHASH_ENDIANESS_H_ + +// WARNING: this is a "restricted" header because it is included from +// translation units compiled with different flags. This header and its +// dependencies must not define any function unless it is static inline and/or +// within namespace HH_TARGET_NAME. See arch_specific.h for details. + +#include + +#if defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && defined(BIG_ENDIAN) + + /* Someone has already included or equivalent. */ + +#elif defined(__LITTLE_ENDIAN__) + +# define HH_IS_LITTLE_ENDIAN 1 +# define HH_IS_BIG_ENDIAN 0 +# ifdef __BIG_ENDIAN__ +# error "Platform is both little and big endian?" +# endif + +#elif defined(__BIG_ENDIAN__) + +# define HH_IS_LITTLE_ENDIAN 0 +# define HH_IS_BIG_ENDIAN 1 + +#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \ + defined(__ORDER_LITTLE_ENDIAN__) + +# define HH_IS_LITTLE_ENDIAN (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define HH_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + +#elif defined(__linux__) || defined(__CYGWIN__) || defined( __GNUC__ ) || \ + defined( __GNU_LIBRARY__ ) + +# include + +#elif defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD__) || \ + defined(__DragonFly__) + +# include + +#elif defined(_WIN32) + +#define HH_IS_LITTLE_ENDIAN 1 +#define HH_IS_BIG_ENDIAN 0 + +#else + +# error "Unsupported platform. Cannot determine byte order." + +#endif + + +#ifndef HH_IS_LITTLE_ENDIAN +# define HH_IS_LITTLE_ENDIAN (BYTE_ORDER == LITTLE_ENDIAN) +# define HH_IS_BIG_ENDIAN (BYTE_ORDER == BIG_ENDIAN) +#endif + + +namespace highwayhash { + +#if HH_IS_LITTLE_ENDIAN + +static inline uint32_t le32_from_host(uint32_t x) { return x; } +static inline uint32_t host_from_le32(uint32_t x) { return x; } +static inline uint64_t le64_from_host(uint64_t x) { return x; } +static inline uint64_t host_from_le64(uint64_t x) { return x; } + +#elif !HH_IS_BIG_ENDIAN + +# error "Unsupported byte order." + +#elif defined(_WIN16) || defined(_WIN32) || defined(_WIN64) + +#include +static inline uint32_t host_from_le32(uint32_t x) { return _byteswap_ulong(x); } +static inline uint32_t le32_from_host(uint32_t x) { return _byteswap_ulong(x); } +static inline uint64_t host_from_le64(uint64_t x) { return _byteswap_uint64(x);} +static inline uint64_t le64_from_host(uint64_t x) { return _byteswap_uint64(x);} + +#else + +static inline uint32_t host_from_le32(uint32_t x) {return __builtin_bswap32(x);} +static inline uint32_t le32_from_host(uint32_t x) {return __builtin_bswap32(x);} +static inline uint64_t host_from_le64(uint64_t x) {return __builtin_bswap64(x);} +static inline uint64_t le64_from_host(uint64_t x) {return __builtin_bswap64(x);} + +#endif + +} // namespace highwayhash + +#endif // HIGHWAYHASH_ENDIANESS_H_ diff --git a/highwayhash/highwayhash/example.cc b/highwayhash/highwayhash/example.cc new file mode 100644 index 000000000..e3939dd4a --- /dev/null +++ b/highwayhash/highwayhash/example.cc @@ -0,0 +1,40 @@ +// Minimal usage example: prints a hash. Tested on x86, ppc, arm. + +#include +#include +#include + +#include "highwayhash/highwayhash.h" + +using namespace highwayhash; + +int main(int argc, char* argv[]) { + // We read from the args on purpose, to ensure a compile time constant will + // not be used, for verifying assembly on the supported platforms. + if (argc != 2) { + std::cout << "Please provide 1 argument with a text to hash" << std::endl; + return 1; + } + + // Please use a different key to ensure your hashes aren't identical. + HH_ALIGNAS(32) const HHKey key = {1, 2, 3, 4}; + + // Aligning inputs to 32 bytes may help but is not required. + const char* in = argv[1]; + const size_t size = strlen(in); + + // Type determines the hash size; can also be HHResult128 or HHResult256. + HHResult64 result; + + // HH_TARGET_PREFERRED expands to the best specialization available for the + // CPU detected via compiler flags (e.g. AVX2 #ifdef __AVX2__). + HHStateT state(key); + HighwayHashT(&state, in, size, &result); + std::cout << "Hash : " << result << std::endl; + + HighwayHashCatT cat(key); + cat.Append(in, size); + cat.Finalize(&result); + std::cout << "HashCat: " << result << std::endl; + return 0; +} diff --git a/highwayhash/highwayhash/hh_avx2.cc b/highwayhash/highwayhash/hh_avx2.cc new file mode 100644 index 000000000..7e3ddff0d --- /dev/null +++ b/highwayhash/highwayhash/hh_avx2.cc @@ -0,0 +1,19 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// WARNING: this is a "restricted" source file; avoid including any headers +// unless they are also restricted. See arch_specific.h for details. + +#define HH_TARGET_NAME AVX2 +#include "highwayhash/highwayhash_target.cc" diff --git a/highwayhash/highwayhash/hh_avx2.h b/highwayhash/highwayhash/hh_avx2.h new file mode 100644 index 000000000..db44f533c --- /dev/null +++ b/highwayhash/highwayhash/hh_avx2.h @@ -0,0 +1,381 @@ +// Copyright 2015-2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef HIGHWAYHASH_HH_AVX2_H_ +#define HIGHWAYHASH_HH_AVX2_H_ + +// WARNING: this is a "restricted" header because it is included from +// translation units compiled with different flags. This header and its +// dependencies must not define any function unless it is static inline and/or +// within namespace HH_TARGET_NAME. See arch_specific.h for details. + +#include "highwayhash/arch_specific.h" +#include "highwayhash/compiler_specific.h" +#include "highwayhash/hh_buffer.h" +#include "highwayhash/hh_types.h" +#include "highwayhash/load3.h" +#include "highwayhash/vector128.h" +#include "highwayhash/vector256.h" + +// For auto-dependency generation, we need to include all headers but not their +// contents (otherwise compilation fails because -mavx2 is not specified). +#ifndef HH_DISABLE_TARGET_SPECIFIC + +namespace highwayhash { +// See vector128.h for why this namespace is necessary; matching it here makes +// it easier use the vector128 symbols, but requires textual inclusion. +namespace HH_TARGET_NAME { + +class HHStateAVX2 { + public: + explicit HH_INLINE HHStateAVX2(const HHKey key_lanes) { Reset(key_lanes); } + + HH_INLINE void Reset(const HHKey key_lanes) { + // "Nothing up my sleeve" numbers, concatenated hex digits of Pi from + // http://www.numberworld.org/digits/Pi/, retrieved Feb 22, 2016. + // + // We use this python code to generate the fourth number to have + // more even mixture of bits: + /* +def x(a,b,c): + retval = 0 + for i in range(64): + count = ((a >> i) & 1) + ((b >> i) & 1) + ((c >> i) & 1) + if (count <= 1): + retval |= 1 << i + return retval + */ + const V4x64U init0(0x243f6a8885a308d3ull, 0x13198a2e03707344ull, + 0xa4093822299f31d0ull, 0xdbe6d5d5fe4cce2full); + const V4x64U init1(0x452821e638d01377ull, 0xbe5466cf34e90c6cull, + 0xc0acf169b5f18a8cull, 0x3bd39e10cb0ef593ull); + const V4x64U key = LoadUnaligned(key_lanes); + v0 = key ^ init0; + v1 = Rotate64By32(key) ^ init1; + mul0 = init0; + mul1 = init1; + } + + HH_INLINE void Update(const HHPacket& packet_bytes) { + const uint64_t* HH_RESTRICT packet = + reinterpret_cast(packet_bytes); + Update(LoadUnaligned(packet)); + } + + HH_INLINE void UpdateRemainder(const char* bytes, const size_t size_mod32) { + // 'Length padding' differentiates zero-valued inputs that have the same + // size/32. mod32 is sufficient because each Update behaves as if a + // counter were injected, because the state is large and mixed thoroughly. + const V8x32U size256( + _mm256_broadcastd_epi32(_mm_cvtsi64_si128(size_mod32))); + // Equivalent to storing size_mod32 in packet. + v0 += V4x64U(size256); + // Boosts the avalanche effect of mod32. + v1 = Rotate32By(v1, size256); + + const char* remainder = bytes + (size_mod32 & ~3); + const size_t size_mod4 = size_mod32 & 3; + + const V4x32U size(_mm256_castsi256_si128(size256)); + + // (Branching is faster than a single _mm256_maskload_epi32.) + if (HH_UNLIKELY(size_mod32 & 16)) { // 16..31 bytes left + const V4x32U packetL = + LoadUnaligned(reinterpret_cast(bytes)); + + const V4x32U int_mask = IntMask<16>()(size); + const V4x32U int_lanes = MaskedLoadInt(bytes + 16, int_mask); + const uint32_t last4 = + Load3()(Load3::AllowReadBeforeAndReturn(), remainder, size_mod4); + + // The upper four bytes of packetH are zero, so insert there. + const V4x32U packetH(_mm_insert_epi32(int_lanes, last4, 3)); + Update(packetH, packetL); + } else { // size_mod32 < 16 + const V4x32U int_mask = IntMask<0>()(size); + const V4x32U packetL = MaskedLoadInt(bytes, int_mask); + const uint64_t last3 = + Load3()(Load3::AllowUnordered(), remainder, size_mod4); + + // Rather than insert into packetL[3], it is faster to initialize + // the otherwise empty packetH. + const V4x32U packetH(_mm_cvtsi64_si128(last3)); + Update(packetH, packetL); + } + } + + HH_INLINE void Finalize(HHResult64* HH_RESTRICT result) { + // Mix together all lanes. It is slightly better to permute v0 than v1; + // it will be added to v1. + Update(Permute(v0)); + Update(Permute(v0)); + Update(Permute(v0)); + Update(Permute(v0)); + + const V2x64U sum0(_mm256_castsi256_si128(v0 + mul0)); + const V2x64U sum1(_mm256_castsi256_si128(v1 + mul1)); + const V2x64U hash = sum0 + sum1; + // Each lane is sufficiently mixed, so just truncate to 64 bits. + _mm_storel_epi64(reinterpret_cast<__m128i*>(result), hash); + } + + HH_INLINE void Finalize(HHResult128* HH_RESTRICT result) { + for (int n = 0; n < 6; n++) { + Update(Permute(v0)); + } + + const V2x64U sum0(_mm256_castsi256_si128(v0 + mul0)); + const V2x64U sum1(_mm256_extracti128_si256(v1 + mul1, 1)); + const V2x64U hash = sum0 + sum1; + _mm_storeu_si128(reinterpret_cast<__m128i*>(result), hash); + } + + HH_INLINE void Finalize(HHResult256* HH_RESTRICT result) { + for (int n = 0; n < 10; n++) { + Update(Permute(v0)); + } + + const V4x64U sum0 = v0 + mul0; + const V4x64U sum1 = v1 + mul1; + const V4x64U hash = ModularReduction(sum1, sum0); + StoreUnaligned(hash, &(*result)[0]); + } + + // "buffer" must be 32-byte aligned. + static HH_INLINE void ZeroInitialize(char* HH_RESTRICT buffer) { + const __m256i zero = _mm256_setzero_si256(); + _mm256_store_si256(reinterpret_cast<__m256i*>(buffer), zero); + } + + // "buffer" must be 32-byte aligned. + static HH_INLINE void CopyPartial(const char* HH_RESTRICT from, + const size_t size_mod32, + char* HH_RESTRICT buffer) { + const V4x32U size(size_mod32); + const uint32_t* const HH_RESTRICT from_u32 = + reinterpret_cast(from); + uint32_t* const HH_RESTRICT buffer_u32 = + reinterpret_cast(buffer); + if (HH_UNLIKELY(size_mod32 & 16)) { // Copying 16..31 bytes + const V4x32U inL = LoadUnaligned(from_u32); + Store(inL, buffer_u32); + const V4x32U inH = Load0To16<16, Load3::AllowReadBefore>( + from + 16, size_mod32 - 16, size); + Store(inH, buffer_u32 + V4x32U::N); + } else { // Copying 0..15 bytes + const V4x32U inL = Load0To16<>(from, size_mod32, size); + Store(inL, buffer_u32); + // No need to change upper 16 bytes of buffer. + } + } + + // "buffer" must be 32-byte aligned. + static HH_INLINE void AppendPartial(const char* HH_RESTRICT from, + const size_t size_mod32, + char* HH_RESTRICT buffer, + const size_t buffer_valid) { + const V4x32U size(size_mod32); + uint32_t* const HH_RESTRICT buffer_u32 = + reinterpret_cast(buffer); + // buffer_valid + size <= 32 => appending 0..16 bytes inside upper 16 bytes. + if (HH_UNLIKELY(buffer_valid & 16)) { + const V4x32U suffix = Load0To16<>(from, size_mod32, size); + const V4x32U bufferH = Load(buffer_u32 + V4x32U::N); + const V4x32U outH = Concatenate(bufferH, buffer_valid - 16, suffix); + Store(outH, buffer_u32 + V4x32U::N); + } else { // Appending 0..32 bytes starting at offset 0..15. + const V4x32U bufferL = Load(buffer_u32); + const V4x32U suffixL = Load0To16<>(from, size_mod32, size); + const V4x32U outL = Concatenate(bufferL, buffer_valid, suffixL); + Store(outL, buffer_u32); + const size_t offsetH = sizeof(V4x32U) - buffer_valid; + // Do we have enough input to start filling the upper 16 buffer bytes? + if (size_mod32 > offsetH) { + const size_t sizeH = size_mod32 - offsetH; + const V4x32U outH = Load0To16<>(from + offsetH, sizeH, V4x32U(sizeH)); + Store(outH, buffer_u32 + V4x32U::N); + } + } + } + + // "buffer" must be 32-byte aligned. + HH_INLINE void AppendAndUpdate(const char* HH_RESTRICT from, + const size_t size_mod32, + const char* HH_RESTRICT buffer, + const size_t buffer_valid) { + const V4x32U size(size_mod32); + const uint32_t* const HH_RESTRICT buffer_u32 = + reinterpret_cast(buffer); + // buffer_valid + size <= 32 => appending 0..16 bytes inside upper 16 bytes. + if (HH_UNLIKELY(buffer_valid & 16)) { + const V4x32U suffix = Load0To16<>(from, size_mod32, size); + const V4x32U packetL = Load(buffer_u32); + const V4x32U bufferH = Load(buffer_u32 + V4x32U::N); + const V4x32U packetH = Concatenate(bufferH, buffer_valid - 16, suffix); + Update(packetH, packetL); + } else { // Appending 0..32 bytes starting at offset 0..15. + const V4x32U bufferL = Load(buffer_u32); + const V4x32U suffixL = Load0To16<>(from, size_mod32, size); + const V4x32U packetL = Concatenate(bufferL, buffer_valid, suffixL); + const size_t offsetH = sizeof(V4x32U) - buffer_valid; + V4x32U packetH = packetL - packetL; + // Do we have enough input to start filling the upper 16 packet bytes? + if (size_mod32 > offsetH) { + const size_t sizeH = size_mod32 - offsetH; + packetH = Load0To16<>(from + offsetH, sizeH, V4x32U(sizeH)); + } + + Update(packetH, packetL); + } + } + + private: + static HH_INLINE V4x32U MaskedLoadInt(const char* from, + const V4x32U& int_mask) { + // No faults will be raised when reading n=0..3 ints from "from" provided + // int_mask[n] = 0. + const int* HH_RESTRICT int_from = reinterpret_cast(from); + return V4x32U(_mm_maskload_epi32(int_from, int_mask)); + } + + // Loads <= 16 bytes without accessing any byte outside [from, from + size). + // from[i] is loaded into lane i; from[i >= size] is undefined. + template + static HH_INLINE V4x32U Load0To16(const char* from, const size_t size_mod32, + const V4x32U& size) { + const char* remainder = from + (size_mod32 & ~3); + const uint64_t last3 = Load3()(Load3Policy(), remainder, size_mod32 & 3); + const V4x32U int_mask = IntMask()(size); + const V4x32U int_lanes = MaskedLoadInt(from, int_mask); + return Insert4AboveMask(last3, int_mask, int_lanes); + } + + static HH_INLINE V4x64U Rotate64By32(const V4x64U& v) { + return V4x64U(_mm256_shuffle_epi32(v, _MM_SHUFFLE(2, 3, 0, 1))); + } + + // Rotates 32-bit lanes by "count" bits. + static HH_INLINE V4x64U Rotate32By(const V4x64U& v, const V8x32U& count) { + // Use variable shifts because sll_epi32 has 4 cycle latency (presumably + // to broadcast the shift count). + const V4x64U shifted_left(_mm256_sllv_epi32(v, count)); + const V4x64U shifted_right(_mm256_srlv_epi32(v, V8x32U(32) - count)); + return shifted_left | shifted_right; + } + + static HH_INLINE V4x64U Permute(const V4x64U& v) { + // For complete mixing, we need to swap the upper and lower 128-bit halves; + // we also swap all 32-bit halves. This is faster than extracti128 plus + // inserti128 followed by Rotate64By32. + const V4x64U indices(0x0000000200000003ull, 0x0000000000000001ull, + 0x0000000600000007ull, 0x0000000400000005ull); + return V4x64U(_mm256_permutevar8x32_epi32(v, indices)); + } + + static HH_INLINE V4x64U MulLow32(const V4x64U& a, const V4x64U& b) { + return V4x64U(_mm256_mul_epu32(a, b)); + } + + static HH_INLINE V4x64U ZipperMerge(const V4x64U& v) { + // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to + // varying degrees. In descending order of goodness, bytes + // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32. + // As expected, the upper and lower bytes are much worse. + // For each 64-bit lane, our objectives are: + // 1) maximizing and equalizing total goodness across the four lanes. + // 2) mixing with bytes from the neighboring lane (AVX-2 makes it difficult + // to cross the 128-bit wall, but PermuteAndUpdate takes care of that); + // 3) placing the worst bytes in the upper 32 bits because those will not + // be used in the next 32x32 multiplication. + const uint64_t hi = 0x070806090D0A040Bull; + const uint64_t lo = 0x000F010E05020C03ull; + return V4x64U(_mm256_shuffle_epi8(v, V4x64U(hi, lo, hi, lo))); + } + + // Updates four hash lanes in parallel by injecting four 64-bit packets. + HH_INLINE void Update(const V4x64U& packet) { + v1 += packet; + v1 += mul0; + mul0 ^= MulLow32(v1, v0 >> 32); + HH_COMPILER_FENCE; + v0 += mul1; + mul1 ^= MulLow32(v0, v1 >> 32); + HH_COMPILER_FENCE; + v0 += ZipperMerge(v1); + v1 += ZipperMerge(v0); + } + + HH_INLINE void Update(const V4x32U& packetH, const V4x32U& packetL) { + const __m256i packetL256 = _mm256_castsi128_si256(packetL); + Update(V4x64U(_mm256_inserti128_si256(packetL256, packetH, 1))); + } + + // XORs a << 1 and a << 2 into *out after clearing the upper two bits of a. + // Also does the same for the upper 128 bit lane "b". Bit shifts are only + // possible on independent 64-bit lanes. We therefore insert the upper bits + // of a[0] that were lost into a[1]. Thanks to D. Lemire for helpful comments! + static HH_INLINE void XorByShift128Left12(const V4x64U& ba, + V4x64U* HH_RESTRICT out) { + const V4x64U zero = ba ^ ba; + const V4x64U top_bits2 = ba >> (64 - 2); + const V4x64U ones = ba == ba; // FF .. FF + const V4x64U shifted1_unmasked = ba + ba; // (avoids needing port0) + HH_COMPILER_FENCE; + + // Only the lower halves of top_bits1's 128 bit lanes will be used, so we + // can compute it before clearing the upper two bits of ba. + const V4x64U top_bits1 = ba >> (64 - 1); + const V4x64U upper_8bytes(_mm256_slli_si256(ones, 8)); // F 0 F 0 + const V4x64U shifted2 = shifted1_unmasked + shifted1_unmasked; + HH_COMPILER_FENCE; + + const V4x64U upper_bit_of_128 = upper_8bytes << 63; // 80..00 80..00 + const V4x64U new_low_bits2(_mm256_unpacklo_epi64(zero, top_bits2)); + *out ^= shifted2; + HH_COMPILER_FENCE; + + // The result must be as if the upper two bits of the input had been clear, + // otherwise we're no longer computing a reduction. + const V4x64U shifted1 = AndNot(upper_bit_of_128, shifted1_unmasked); + *out ^= new_low_bits2; + HH_COMPILER_FENCE; + + const V4x64U new_low_bits1(_mm256_unpacklo_epi64(zero, top_bits1)); + *out ^= shifted1; + + *out ^= new_low_bits1; + } + + // Modular reduction by the irreducible polynomial (x^128 + x^2 + x). + // Input: two 256-bit numbers a3210 and b3210, interleaved in 2 vectors. + // The upper and lower 128-bit halves are processed independently. + static HH_INLINE V4x64U ModularReduction(const V4x64U& b32a32, + const V4x64U& b10a10) { + // See Lemire, https://arxiv.org/pdf/1503.03465v8.pdf. + V4x64U out = b10a10; + XorByShift128Left12(b32a32, &out); + return out; + } + + V4x64U v0; + V4x64U v1; + V4x64U mul0; + V4x64U mul1; +}; + +} // namespace HH_TARGET_NAME +} // namespace highwayhash + +#endif // HH_DISABLE_TARGET_SPECIFIC +#endif // HIGHWAYHASH_HH_AVX2_H_ diff --git a/highwayhash/highwayhash/hh_buffer.h b/highwayhash/highwayhash/hh_buffer.h new file mode 100644 index 000000000..7b1dad0d1 --- /dev/null +++ b/highwayhash/highwayhash/hh_buffer.h @@ -0,0 +1,116 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef HIGHWAYHASH_HH_BUFFER_H_ +#define HIGHWAYHASH_HH_BUFFER_H_ + +// Helper functions used by hh_avx2 and hh_sse41. + +// WARNING: this is a "restricted" header because it is included from +// translation units compiled with different flags. This header and its +// dependencies must not define any function unless it is static inline and/or +// within namespace HH_TARGET_NAME. See arch_specific.h for details. + +#if HH_TARGET == HH_TARGET_NEON +#include "highwayhash/vector_neon.h" +#else +#include "highwayhash/vector128.h" +#endif + +// For auto-dependency generation, we need to include all headers but not their +// contents (otherwise compilation fails because -msse4.1 is not specified). +#ifndef HH_DISABLE_TARGET_SPECIFIC + +namespace highwayhash { +// To prevent ODR violations when including this from multiple translation +// units (TU) that are compiled with different flags, the contents must reside +// in a namespace whose name is unique to the TU. NOTE: this behavior is +// incompatible with precompiled modules and requires textual inclusion instead. +namespace HH_TARGET_NAME { + +template +struct IntMask {}; // primary template + +template <> +struct IntMask<0> { + // Returns 32-bit lanes : ~0U if that lane can be loaded given "size" bytes. + // Typical case: size = 0..16, nothing deducted. + HH_INLINE V4x32U operator()(const V4x32U& size) const { + // Lane n is valid if size >= (n + 1) * 4; subtract one because we only have + // greater-than comparisons and don't want a negated mask. +#if HH_TARGET == HH_TARGET_NEON + return V4x32U(vcgtq_u32(size, V4x32U(15, 11, 7, 3))); +#else + return V4x32U(_mm_cmpgt_epi32(size, V4x32U(15, 11, 7, 3))); +#endif + } +}; + +template <> +struct IntMask<16> { + // "size" is 16..31; this is for loading the upper half of a packet, so + // effectively deduct 16 from size by changing the comparands. + HH_INLINE V4x32U operator()(const V4x32U& size) const { +#if HH_TARGET == HH_TARGET_NEON + return V4x32U(vcgtq_u32(size, V4x32U(31, 27, 23, 19))); +#else + return V4x32U(_mm_cmpgt_epi32(size, V4x32U(31, 27, 23, 19))); +#endif + } +}; + +// Inserts "bytes4" into "prev" at the lowest i such that mask[i] = 0. +// Assumes prev[j] == 0 if mask[j] = 0. +HH_INLINE V4x32U Insert4AboveMask(const uint32_t bytes4, const V4x32U& mask, + const V4x32U& prev) { + // There is no 128-bit shift by a variable count. Using shuffle_epi8 with a + // control mask requires a table lookup. We know the shift count is a + // multiple of 4 bytes, so we can broadcastd_epi32 and clear all lanes except + // those where mask != 0. This works because any upper output lanes need not + // be zero. + return prev | AndNot(mask, V4x32U(bytes4)); +} + +#if HH_TARGET == HH_TARGET_AVX2 +// Shifts "suffix" left by "prefix_len" = 0..15 bytes, clears upper bytes of +// "prefix", and returns the merged/concatenated bytes. +HH_INLINE V4x32U Concatenate(const V4x32U& prefix, const size_t prefix_len, + const V4x32U& suffix) { + static const uint64_t table[V16x8U::N][V2x64U::N] = { + {0x0706050403020100ull, 0x0F0E0D0C0B0A0908ull}, + {0x06050403020100FFull, 0x0E0D0C0B0A090807ull}, + {0x050403020100FFFFull, 0x0D0C0B0A09080706ull}, + {0x0403020100FFFFFFull, 0x0C0B0A0908070605ull}, + {0x03020100FFFFFFFFull, 0x0B0A090807060504ull}, + {0x020100FFFFFFFFFFull, 0x0A09080706050403ull}, + {0x0100FFFFFFFFFFFFull, 0x0908070605040302ull}, + {0x00FFFFFFFFFFFFFFull, 0x0807060504030201ull}, + {0xFFFFFFFFFFFFFFFFull, 0x0706050403020100ull}, + {0xFFFFFFFFFFFFFFFFull, 0x06050403020100FFull}, + {0xFFFFFFFFFFFFFFFFull, 0x050403020100FFFFull}, + {0xFFFFFFFFFFFFFFFFull, 0x0403020100FFFFFFull}, + {0xFFFFFFFFFFFFFFFFull, 0x03020100FFFFFFFFull}, + {0xFFFFFFFFFFFFFFFFull, 0x020100FFFFFFFFFFull}, + {0xFFFFFFFFFFFFFFFFull, 0x0100FFFFFFFFFFFFull}, + {0xFFFFFFFFFFFFFFFFull, 0x00FFFFFFFFFFFFFFull}}; + const V2x64U control = Load(&table[prefix_len][0]); + const V2x64U shifted_suffix(_mm_shuffle_epi8(suffix, control)); + return V4x32U(_mm_blendv_epi8(shifted_suffix, prefix, control)); +} +#endif +} // namespace HH_TARGET_NAME +} // namespace highwayhash + +#endif // HH_DISABLE_TARGET_SPECIFIC +#endif // HIGHWAYHASH_HH_BUFFER_H_ diff --git a/highwayhash/highwayhash/hh_neon.cc b/highwayhash/highwayhash/hh_neon.cc new file mode 100644 index 000000000..981c094db --- /dev/null +++ b/highwayhash/highwayhash/hh_neon.cc @@ -0,0 +1,22 @@ +// Copyright 2017-2019 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// WARNING: this is a "restricted" source file; avoid including any headers +// unless they are also restricted. See arch_specific.h for details. + +#define HH_TARGET_NAME NEON +// GCC 4.5.4 only defines the former; 5.4 defines both. +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +#include "highwayhash/highwayhash_target.cc" +#endif diff --git a/highwayhash/highwayhash/hh_neon.h b/highwayhash/highwayhash/hh_neon.h new file mode 100644 index 000000000..286ad7ec0 --- /dev/null +++ b/highwayhash/highwayhash/hh_neon.h @@ -0,0 +1,336 @@ +// Copyright 2015-2019 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef HIGHWAYHASH_HH_NEON_H_ +#define HIGHWAYHASH_HH_NEON_H_ + +// WARNING: this is a "restricted" header because it is included from +// translation units compiled with different flags. This header and its +// dependencies must not define any function unless it is static inline and/or +// within namespace HH_TARGET_NAME. See arch_specific.h for details. + +#include "highwayhash/arch_specific.h" +#include "highwayhash/compiler_specific.h" +#include "highwayhash/hh_buffer.h" +#include "highwayhash/hh_types.h" +#include "highwayhash/load3.h" +#include "highwayhash/vector_neon.h" + +// For auto-dependency generation, we need to include all headers but not their +// contents. +#ifndef HH_DISABLE_TARGET_SPECIFIC + +namespace highwayhash { + +// See vector_neon.h for why this namespace is necessary; matching it here makes +// it easier use the vector_neon symbols, but requires textual inclusion. +namespace HH_TARGET_NAME { + +// J-lanes tree hashing: see https://doi.org/10.4236/jis.2014.53010 +// Uses the same method that SSE4.1 uses, only with NEON used instead. +class HHStateNEON { + public: + explicit HH_INLINE HHStateNEON(const HHKey key) { Reset(key); } + + HH_INLINE void Reset(const HHKey key) { + // "Nothing up my sleeve numbers"; see HHStateTAVX2. + const V2x64U init0L(0xa4093822299f31d0ull, 0xdbe6d5d5fe4cce2full); + const V2x64U init0H(0x243f6a8885a308d3ull, 0x13198a2e03707344ull); + const V2x64U init1L(0xc0acf169b5f18a8cull, 0x3bd39e10cb0ef593ull); + const V2x64U init1H(0x452821e638d01377ull, 0xbe5466cf34e90c6cull); + const V2x64U keyL = LoadUnaligned(key + 0); + const V2x64U keyH = LoadUnaligned(key + 2); + v0L = keyL ^ init0L; + v0H = keyH ^ init0H; + v1L = Rotate64By32(keyL) ^ init1L; + v1H = Rotate64By32(keyH) ^ init1H; + mul0L = init0L; + mul0H = init0H; + mul1L = init1L; + mul1H = init1H; + } + + HH_INLINE void Update(const HHPacket& packet_bytes) { + const uint64_t* HH_RESTRICT packet = + reinterpret_cast(packet_bytes); + const V2x64U packetL = LoadUnaligned(packet + 0); + const V2x64U packetH = LoadUnaligned(packet + 2); + Update(packetH, packetL); + } + + HH_INLINE void UpdateRemainder(const char* bytes, const size_t size_mod32) { + // 'Length padding' differentiates zero-valued inputs that have the same + // size/32. mod32 is sufficient because each Update behaves as if a + // counter were injected, because the state is large and mixed thoroughly. + + // We can't use vshl/vsra because it needs a constant expression. + // In order to do this right now, we would need a switch statement. + const int32x4_t vsize_mod32(vdupq_n_s32(static_cast(size_mod32))); + // -32 - size_mod32 + const int32x4_t shift_right_amt = + vdupq_n_s32(static_cast(size_mod32) + (~32 + 1)); + // Equivalent to storing size_mod32 in packet. + v0L += V2x64U(vreinterpretq_u64_s32(vsize_mod32)); + v0H += V2x64U(vreinterpretq_u64_s32(vsize_mod32)); + + // Boosts the avalanche effect of mod32. + v1L = V2x64U(vreinterpretq_u64_u32( + vorrq_u32(vshlq_u32(vreinterpretq_u32_u64(v1L), vsize_mod32), + vshlq_u32(vreinterpretq_u32_u64(v1L), shift_right_amt)))); + v1H = V2x64U(vreinterpretq_u64_u32( + vorrq_u32(vshlq_u32(vreinterpretq_u32_u64(v1H), vsize_mod32), + vshlq_u32(vreinterpretq_u32_u64(v1H), shift_right_amt)))); + + const size_t size_mod4 = size_mod32 & 3; + const char* HH_RESTRICT remainder = bytes + (size_mod32 & ~3); + + if (HH_UNLIKELY(size_mod32 & 16)) { // 16..31 bytes left + const V2x64U packetL = + LoadUnaligned(reinterpret_cast(bytes)); + + V2x64U packetH = LoadMultipleOfFour(bytes + 16, size_mod32); + + const uint32_t last4 = + Load3()(Load3::AllowReadBeforeAndReturn(), remainder, size_mod4); + + // The upper four bytes of packetH are zero, so insert there. + packetH = V2x64U(vreinterpretq_u64_u32( + vsetq_lane_u32(last4, vreinterpretq_u32_u64(packetH), 3))); + Update(packetH, packetL); + } else { // size_mod32 < 16 + const V2x64U packetL = LoadMultipleOfFour(bytes, size_mod32); + + const uint64_t last4 = + Load3()(Load3::AllowUnordered(), remainder, size_mod4); + + // Rather than insert into packetL[3], it is faster to initialize + // the otherwise empty packetH. + HH_ALIGNAS(16) uint64_t tmp[2] = {last4, 0}; + const V2x64U packetH(vld1q_u64(tmp)); + Update(packetH, packetL); + } + } + + HH_INLINE void Finalize(HHResult64* HH_RESTRICT result) { + // Mix together all lanes. + for (int n = 0; n < 4; n++) { + PermuteAndUpdate(); + } + + const V2x64U sum0 = v0L + mul0L; + const V2x64U sum1 = v1L + mul1L; + const V2x64U hash = sum0 + sum1; + vst1q_low_u64(reinterpret_cast(result), hash); + } + + HH_INLINE void Finalize(HHResult128* HH_RESTRICT result) { + for (int n = 0; n < 6; n++) { + PermuteAndUpdate(); + } + + const V2x64U sum0 = v0L + mul0L; + const V2x64U sum1 = v1H + mul1H; + const V2x64U hash = sum0 + sum1; + StoreUnaligned(hash, &(*result)[0]); + } + + HH_INLINE void Finalize(HHResult256* HH_RESTRICT result) { + for (int n = 0; n < 10; n++) { + PermuteAndUpdate(); + } + + const V2x64U sum0L = v0L + mul0L; + const V2x64U sum1L = v1L + mul1L; + const V2x64U sum0H = v0H + mul0H; + const V2x64U sum1H = v1H + mul1H; + const V2x64U hashL = ModularReduction(sum1L, sum0L); + const V2x64U hashH = ModularReduction(sum1H, sum0H); + StoreUnaligned(hashL, &(*result)[0]); + StoreUnaligned(hashH, &(*result)[2]); + } + + static HH_INLINE void ZeroInitialize(char* HH_RESTRICT buffer_bytes) { + for (size_t i = 0; i < sizeof(HHPacket); ++i) { + buffer_bytes[i] = 0; + } + } + + static HH_INLINE void CopyPartial(const char* HH_RESTRICT from, + const size_t size_mod32, + char* HH_RESTRICT buffer) { + for (size_t i = 0; i < size_mod32; ++i) { + buffer[i] = from[i]; + } + } + + static HH_INLINE void AppendPartial(const char* HH_RESTRICT from, + const size_t size_mod32, + char* HH_RESTRICT buffer, + const size_t buffer_valid) { + for (size_t i = 0; i < size_mod32; ++i) { + buffer[buffer_valid + i] = from[i]; + } + } + + HH_INLINE void AppendAndUpdate(const char* HH_RESTRICT from, + const size_t size_mod32, + const char* HH_RESTRICT buffer, + const size_t buffer_valid) { + HH_ALIGNAS(32) HHPacket tmp; + for (size_t i = 0; i < buffer_valid; ++i) { + tmp[i] = buffer[i]; + } + for (size_t i = 0; i < size_mod32; ++i) { + tmp[buffer_valid + i] = from[i]; + } + Update(tmp); + } + + private: + // Swap 32-bit halves of each lane (caller swaps 128-bit halves) + static HH_INLINE V2x64U Rotate64By32(const V2x64U& v) { + return V2x64U(vreinterpretq_u64_u32(vrev64q_u32(vreinterpretq_u32_u64(v)))); + } + + static HH_INLINE V2x64U ZipperMerge(const V2x64U& v) { + // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to + // varying degrees. In descending order of goodness, bytes + // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32. + // As expected, the upper and lower bytes are much worse. + // For each 64-bit lane, our objectives are: + // 1) maximizing and equalizing total goodness across each lane's bytes; + // 2) mixing with bytes from the neighboring lane; + // 3) placing the worst bytes in the upper 32 bits because those will not + // be used in the next 32x32 multiplication. + + // The positions of each byte in the new vector. + const uint8_t shuffle_positions[] = {3, 12, 2, 5, 14, 1, 15, 0, + 11, 4, 10, 13, 9, 6, 8, 7}; + const uint8x16_t tbl = vld1q_u8(shuffle_positions); + + // Note: vqtbl1q_u8 is polyfilled for ARMv7a in vector_neon.h. + return V2x64U( + vreinterpretq_u64_u8(vqtbl1q_u8(vreinterpretq_u8_u64(v), tbl))); + } + + HH_INLINE void Update(const V2x64U& packetH, const V2x64U& packetL) { + v1L += packetL; + v1H += packetH; + v1L += mul0L; + v1H += mul0H; + // mul0L ^= (v1L & 0xFFFFFFFF) * (v0L >> 32); + mul0L ^= V2x64U(vmull_u32(vmovn_u64(v1L), vshrn_n_u64(v0L, 32))); + // mul0H ^= (v1H & 0xFFFFFFFF) * (v0H >> 32); + mul0H ^= V2x64U(vmull_u32(vmovn_u64(v1H), vshrn_n_u64(v0H, 32))); + v0L += mul1L; + v0H += mul1H; + // mul1L ^= (v0L & 0xFFFFFFFF) * (v1L >> 32); + mul1L ^= V2x64U(vmull_u32(vmovn_u64(v0L), vshrn_n_u64(v1L, 32))); + // mul1H ^= (v0H & 0xFFFFFFFF) * (v1H >> 32); + mul1H ^= V2x64U(vmull_u32(vmovn_u64(v0H), vshrn_n_u64(v1H, 32))); + v0L += ZipperMerge(v1L); + v0H += ZipperMerge(v1H); + v1L += ZipperMerge(v0L); + v1H += ZipperMerge(v0H); + } + + HH_INLINE void PermuteAndUpdate() { + // It is slightly better to permute v0 than v1; it will be added to v1. + Update(Rotate64By32(v0L), Rotate64By32(v0H)); + } + + // Returns zero-initialized vector with the lower "size" = 0, 4, 8 or 12 + // bytes loaded from "bytes". Serves as a replacement for AVX2 maskload_epi32. + static HH_INLINE V2x64U LoadMultipleOfFour(const char* bytes, + const size_t size) { + const uint32_t* words = reinterpret_cast(bytes); + // Mask of 1-bits where the final 4 bytes should be inserted (replacement + // for variable shift/insert using broadcast+blend). + alignas(16) const uint64_t mask_pattern[2] = {0xFFFFFFFFULL, 0}; + V2x64U mask4(vld1q_u64(mask_pattern)); // 'insert' into lane 0 + V2x64U ret(vdupq_n_u64(0)); + if (size & 8) { + ret = V2x64U(vld1q_low_u64(reinterpret_cast(words))); + // mask4 = 0 ~0 0 0 ('insert' into lane 2) + mask4 = V2x64U(vshlq_n_u128(mask4, 8)); + words += 2; + } + // Final 4 (possibly after the 8 above); 'insert' into lane 0 or 2 of ret. + if (size & 4) { + // = 0 word2 0 word2; mask4 will select which lane to keep. + const V2x64U broadcast( + vreinterpretq_u64_u32(vdupq_n_u32(LoadUnaligned(words)))); + // (slightly faster than blendv_epi8) + ret |= V2x64U(broadcast & mask4); + } + return ret; + } + + // XORs x << 1 and x << 2 into *out after clearing the upper two bits of x. + // Bit shifts are only possible on independent 64-bit lanes. We therefore + // insert the upper bits of x[0] that were lost into x[1]. + // Thanks to D. Lemire for helpful comments! + static HH_INLINE void XorByShift128Left12(const V2x64U& x, + V2x64U* HH_RESTRICT out) { + const V4x32U zero(vdupq_n_u32(0)); + const V2x64U sign_bit128( + vreinterpretq_u64_u32(vsetq_lane_u32(0x80000000u, zero, 3))); + const V2x64U top_bits2 = x >> (64 - 2); + HH_COMPILER_FENCE; + const V2x64U shifted1_unmasked = x + x; // (avoids needing port0) + + // Only the lower half of top_bits1 will be used, so we + // can compute it before clearing the upper two bits of x. + const V2x64U top_bits1 = x >> (64 - 1); + const V2x64U shifted2 = shifted1_unmasked + shifted1_unmasked; + HH_COMPILER_FENCE; + + const V2x64U new_low_bits2(vshlq_n_u128(top_bits2, 8)); + *out ^= shifted2; + // The result must be as if the upper two bits of the input had been clear, + // otherwise we're no longer computing a reduction. + const V2x64U shifted1 = AndNot(sign_bit128, shifted1_unmasked); + HH_COMPILER_FENCE; + + const V2x64U new_low_bits1(vshlq_n_u128(top_bits1, 8)); + *out ^= new_low_bits2; + *out ^= shifted1; + *out ^= new_low_bits1; + } + + // Modular reduction by the irreducible polynomial (x^128 + x^2 + x). + // Input: a 256-bit number a3210. + static HH_INLINE V2x64U ModularReduction(const V2x64U& a32_unmasked, + const V2x64U& a10) { + // See Lemire, https://arxiv.org/pdf/1503.03465v8.pdf. + V2x64U out = a10; + XorByShift128Left12(a32_unmasked, &out); + return out; + } + + V2x64U v0L; + V2x64U v0H; + V2x64U v1L; + V2x64U v1H; + V2x64U mul0L; + V2x64U mul0H; + V2x64U mul1L; + V2x64U mul1H; +}; + +} // namespace HH_TARGET_NAME +} // namespace highwayhash + +#endif // HH_DISABLE_TARGET_SPECIFIC +#endif // HIGHWAYHASH_HH_NEON_H_ diff --git a/highwayhash/highwayhash/hh_portable.cc b/highwayhash/highwayhash/hh_portable.cc new file mode 100644 index 000000000..3e0de9ed9 --- /dev/null +++ b/highwayhash/highwayhash/hh_portable.cc @@ -0,0 +1,19 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// WARNING: this is a "restricted" source file; avoid including any headers +// unless they are also restricted. See arch_specific.h for details. + +#define HH_TARGET_NAME Portable +#include "highwayhash/highwayhash_target.cc" diff --git a/highwayhash/highwayhash/hh_portable.h b/highwayhash/highwayhash/hh_portable.h new file mode 100644 index 000000000..ab6e2faf2 --- /dev/null +++ b/highwayhash/highwayhash/hh_portable.h @@ -0,0 +1,302 @@ +// Copyright 2015-2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef HIGHWAYHASH_HH_PORTABLE_H_ +#define HIGHWAYHASH_HH_PORTABLE_H_ + +// WARNING: this is a "restricted" header because it is included from +// translation units compiled with different flags. This header and its +// dependencies must not define any function unless it is static inline and/or +// within namespace HH_TARGET_NAME. See arch_specific.h for details. + +#include "highwayhash/arch_specific.h" +#include "highwayhash/compiler_specific.h" +#include "highwayhash/endianess.h" +#include "highwayhash/hh_types.h" +#include "highwayhash/load3.h" + +namespace highwayhash { +// See vector128.h for why this namespace is necessary; we match it here for +// consistency. As a result, this header requires textual inclusion. +namespace HH_TARGET_NAME { + +class HHStatePortable { + public: + static const int kNumLanes = 4; + using Lanes = uint64_t[kNumLanes]; + + explicit HH_INLINE HHStatePortable(const HHKey keys) { Reset(keys); } + + HH_INLINE void Reset(const HHKey keys) { + static const Lanes init0 = {0xdbe6d5d5fe4cce2full, 0xa4093822299f31d0ull, + 0x13198a2e03707344ull, 0x243f6a8885a308d3ull}; + static const Lanes init1 = {0x3bd39e10cb0ef593ull, 0xc0acf169b5f18a8cull, + 0xbe5466cf34e90c6cull, 0x452821e638d01377ull}; + Lanes rotated_keys; + Rotate64By32(keys, &rotated_keys); + Copy(init0, &mul0); + Copy(init1, &mul1); + Xor(init0, keys, &v0); + Xor(init1, rotated_keys, &v1); + } + + HH_INLINE void Update(const HHPacket& packet) { + Lanes packet_lanes; + CopyPartial(&packet[0], sizeof(HHPacket), + reinterpret_cast(&packet_lanes)); + for (int lane = 0; lane < kNumLanes; ++lane) { + packet_lanes[lane] = host_from_le64(packet_lanes[lane]); + } + Update(packet_lanes); + } + + HH_INLINE void UpdateRemainder(const char* bytes, const size_t size_mod32) { + // 'Length padding' differentiates zero-valued inputs that have the same + // size/32. mod32 is sufficient because each Update behaves as if a + // counter were injected, because the state is large and mixed thoroughly. + const uint64_t mod32_pair = + (static_cast(size_mod32) << 32) + size_mod32; + for (int lane = 0; lane < kNumLanes; ++lane) { + v0[lane] += mod32_pair; + } + Rotate32By(reinterpret_cast(&v1), size_mod32); + + const size_t size_mod4 = size_mod32 & 3; + const char* remainder = bytes + (size_mod32 & ~3); + + HH_ALIGNAS(32) HHPacket packet = {0}; + CopyPartial(bytes, remainder - bytes, &packet[0]); + + if (size_mod32 & 16) { // 16..31 bytes left + // Read the last 0..3 bytes and previous 1..4 into the upper bits. + // Insert into the upper four bytes of packet, which are zero. + uint32_t last4 = + Load3()(Load3::AllowReadBeforeAndReturn(), remainder, size_mod4); + last4 = host_from_le32(last4); + + CopyPartial(reinterpret_cast(&last4), 4, &packet[28]); + } else { // size_mod32 < 16 + uint64_t last4 = Load3()(Load3::AllowUnordered(), remainder, size_mod4); + last4 = host_from_le64(last4); + + // Rather than insert at packet + 28, it is faster to initialize + // the otherwise empty packet + 16 with up to 64 bits of padding. + CopyPartial(reinterpret_cast(&last4), sizeof(last4), + &packet[16]); + } + Update(packet); + } + + HH_INLINE void Finalize(HHResult64* HH_RESTRICT result) { + for (int n = 0; n < 4; n++) { + PermuteAndUpdate(); + } + + *result = v0[0] + v1[0] + mul0[0] + mul1[0]; + } + + HH_INLINE void Finalize(HHResult128* HH_RESTRICT result) { + for (int n = 0; n < 6; n++) { + PermuteAndUpdate(); + } + + (*result)[0] = v0[0] + mul0[0] + v1[2] + mul1[2]; + (*result)[1] = v0[1] + mul0[1] + v1[3] + mul1[3]; + } + + HH_INLINE void Finalize(HHResult256* HH_RESTRICT result) { + for (int n = 0; n < 10; n++) { + PermuteAndUpdate(); + } + + ModularReduction(v1[1] + mul1[1], v1[0] + mul1[0], v0[1] + mul0[1], + v0[0] + mul0[0], &(*result)[1], &(*result)[0]); + ModularReduction(v1[3] + mul1[3], v1[2] + mul1[2], v0[3] + mul0[3], + v0[2] + mul0[2], &(*result)[3], &(*result)[2]); + } + + static HH_INLINE void ZeroInitialize(char* HH_RESTRICT buffer) { + for (size_t i = 0; i < sizeof(HHPacket); ++i) { + buffer[i] = 0; + } + } + + static HH_INLINE void CopyPartial(const char* HH_RESTRICT from, + const size_t size_mod32, + char* HH_RESTRICT buffer) { + for (size_t i = 0; i < size_mod32; ++i) { + buffer[i] = from[i]; + } + } + + static HH_INLINE void AppendPartial(const char* HH_RESTRICT from, + const size_t size_mod32, + char* HH_RESTRICT buffer, + const size_t buffer_valid) { + for (size_t i = 0; i < size_mod32; ++i) { + buffer[buffer_valid + i] = from[i]; + } + } + + HH_INLINE void AppendAndUpdate(const char* HH_RESTRICT from, + const size_t size_mod32, + const char* HH_RESTRICT buffer, + const size_t buffer_valid) { + HH_ALIGNAS(32) HHPacket tmp; + for (size_t i = 0; i < buffer_valid; ++i) { + tmp[i] = buffer[i]; + } + for (size_t i = 0; i < size_mod32; ++i) { + tmp[buffer_valid + i] = from[i]; + } + Update(tmp); + } + + private: + static HH_INLINE void Copy(const Lanes& source, Lanes* HH_RESTRICT dest) { + for (int lane = 0; lane < kNumLanes; ++lane) { + (*dest)[lane] = source[lane]; + } + } + + static HH_INLINE void Add(const Lanes& source, Lanes* HH_RESTRICT dest) { + for (int lane = 0; lane < kNumLanes; ++lane) { + (*dest)[lane] += source[lane]; + } + } + + template + static HH_INLINE void Xor(const Lanes& op1, const LanesOrPointer& op2, + Lanes* HH_RESTRICT dest) { + for (int lane = 0; lane < kNumLanes; ++lane) { + (*dest)[lane] = op1[lane] ^ op2[lane]; + } + } + +// Clears all bits except one byte at the given offset. +#define MASK(v, bytes) ((v) & (0xFFull << ((bytes)*8))) + + // 16-byte permutation; shifting is about 10% faster than byte loads. + // Adds zipper-merge result to add*. + static HH_INLINE void ZipperMergeAndAdd(const uint64_t v1, const uint64_t v0, + uint64_t* HH_RESTRICT add1, + uint64_t* HH_RESTRICT add0) { + *add0 += ((MASK(v0, 3) + MASK(v1, 4)) >> 24) + + ((MASK(v0, 5) + MASK(v1, 6)) >> 16) + MASK(v0, 2) + + (MASK(v0, 1) << 32) + (MASK(v1, 7) >> 8) + (v0 << 56); + + *add1 += ((MASK(v1, 3) + MASK(v0, 4)) >> 24) + MASK(v1, 2) + + (MASK(v1, 5) >> 16) + (MASK(v1, 1) << 24) + (MASK(v0, 6) >> 8) + + (MASK(v1, 0) << 48) + MASK(v0, 7); + } + +#undef MASK + + // For inputs that are already in native byte order (e.g. PermuteAndAdd) + HH_INLINE void Update(const Lanes& packet_lanes) { + Add(packet_lanes, &v1); + Add(mul0, &v1); + + // (Loop is faster than unrolling) + for (int lane = 0; lane < kNumLanes; ++lane) { + const uint32_t v1_32 = static_cast(v1[lane]); + mul0[lane] ^= v1_32 * (v0[lane] >> 32); + v0[lane] += mul1[lane]; + const uint32_t v0_32 = static_cast(v0[lane]); + mul1[lane] ^= v0_32 * (v1[lane] >> 32); + } + + ZipperMergeAndAdd(v1[1], v1[0], &v0[1], &v0[0]); + ZipperMergeAndAdd(v1[3], v1[2], &v0[3], &v0[2]); + + ZipperMergeAndAdd(v0[1], v0[0], &v1[1], &v1[0]); + ZipperMergeAndAdd(v0[3], v0[2], &v1[3], &v1[2]); + } + + static HH_INLINE uint64_t Rotate64By32(const uint64_t x) { + return (x >> 32) | (x << 32); + } + + template + static HH_INLINE void Rotate64By32(const LanesOrPointer& v, + Lanes* HH_RESTRICT rotated) { + for (int i = 0; i < kNumLanes; ++i) { + (*rotated)[i] = Rotate64By32(v[i]); + } + } + + static HH_INLINE void Rotate32By(uint32_t* halves, const uint64_t count) { + for (int i = 0; i < 2 * kNumLanes; ++i) { + const uint32_t x = halves[i]; + halves[i] = (x << count) | (x >> (32 - count)); + } + } + + static HH_INLINE void Permute(const Lanes& v, Lanes* HH_RESTRICT permuted) { + (*permuted)[0] = Rotate64By32(v[2]); + (*permuted)[1] = Rotate64By32(v[3]); + (*permuted)[2] = Rotate64By32(v[0]); + (*permuted)[3] = Rotate64By32(v[1]); + } + + HH_INLINE void PermuteAndUpdate() { + Lanes permuted; + Permute(v0, &permuted); + Update(permuted); + } + + // Computes a << kBits for 128-bit a = (a1, a0). + // Bit shifts are only possible on independent 64-bit lanes. We therefore + // insert the upper bits of a0 that were lost into a1. This is slightly + // shorter than Lemire's (a << 1) | (((a >> 8) << 1) << 8) approach. + template + static HH_INLINE void Shift128Left(uint64_t* HH_RESTRICT a1, + uint64_t* HH_RESTRICT a0) { + const uint64_t shifted1 = (*a1) << kBits; + const uint64_t top_bits = (*a0) >> (64 - kBits); + *a0 <<= kBits; + *a1 = shifted1 | top_bits; + } + + // Modular reduction by the irreducible polynomial (x^128 + x^2 + x). + // Input: a 256-bit number a3210. + static HH_INLINE void ModularReduction(const uint64_t a3_unmasked, + const uint64_t a2, const uint64_t a1, + const uint64_t a0, + uint64_t* HH_RESTRICT m1, + uint64_t* HH_RESTRICT m0) { + // The upper two bits must be clear, otherwise a3 << 2 would lose bits, + // in which case we're no longer computing a reduction. + const uint64_t a3 = a3_unmasked & 0x3FFFFFFFFFFFFFFFull; + // See Lemire, https://arxiv.org/pdf/1503.03465v8.pdf. + uint64_t a3_shl1 = a3; + uint64_t a2_shl1 = a2; + uint64_t a3_shl2 = a3; + uint64_t a2_shl2 = a2; + Shift128Left<1>(&a3_shl1, &a2_shl1); + Shift128Left<2>(&a3_shl2, &a2_shl2); + *m1 = a1 ^ a3_shl1 ^ a3_shl2; + *m0 = a0 ^ a2_shl1 ^ a2_shl2; + } + + Lanes v0; + Lanes v1; + Lanes mul0; + Lanes mul1; +}; + +} // namespace HH_TARGET_NAME +} // namespace highwayhash + +#endif // HIGHWAYHASH_HH_PORTABLE_H_ diff --git a/highwayhash/highwayhash/hh_sse41.cc b/highwayhash/highwayhash/hh_sse41.cc new file mode 100644 index 000000000..9d6a0b968 --- /dev/null +++ b/highwayhash/highwayhash/hh_sse41.cc @@ -0,0 +1,19 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// WARNING: this is a "restricted" source file; avoid including any headers +// unless they are also restricted. See arch_specific.h for details. + +#define HH_TARGET_NAME SSE41 +#include "highwayhash/highwayhash_target.cc" diff --git a/highwayhash/highwayhash/hh_sse41.h b/highwayhash/highwayhash/hh_sse41.h new file mode 100644 index 000000000..333db1d1b --- /dev/null +++ b/highwayhash/highwayhash/hh_sse41.h @@ -0,0 +1,336 @@ +// Copyright 2015-2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef HIGHWAYHASH_HH_SSE41_H_ +#define HIGHWAYHASH_HH_SSE41_H_ + +// WARNING: this is a "restricted" header because it is included from +// translation units compiled with different flags. This header and its +// dependencies must not define any function unless it is static inline and/or +// within namespace HH_TARGET_NAME. See arch_specific.h for details. + +#include + +#include "highwayhash/arch_specific.h" +#include "highwayhash/compiler_specific.h" +#include "highwayhash/hh_buffer.h" +#include "highwayhash/hh_types.h" +#include "highwayhash/load3.h" +#include "highwayhash/vector128.h" + +// For auto-dependency generation, we need to include all headers but not their +// contents (otherwise compilation fails because -msse4.1 is not specified). +#ifndef HH_DISABLE_TARGET_SPECIFIC + +namespace highwayhash { +// See vector128.h for why this namespace is necessary; matching it here makes +// it easier use the vector128 symbols, but requires textual inclusion. +namespace HH_TARGET_NAME { + +template +HH_INLINE T LoadUnaligned(const void* from) { + T ret; + memcpy(&ret, from, sizeof(ret)); + return ret; +} + +// J-lanes tree hashing: see https://doi.org/10.4236/jis.2014.53010 +// Uses pairs of SSE4.1 instructions to emulate the AVX-2 algorithm. +class HHStateSSE41 { + public: + explicit HH_INLINE HHStateSSE41(const HHKey key) { Reset(key); } + + HH_INLINE void Reset(const HHKey key) { + // "Nothing up my sleeve numbers"; see HHStateTAVX2. + const V2x64U init0L(0xa4093822299f31d0ull, 0xdbe6d5d5fe4cce2full); + const V2x64U init0H(0x243f6a8885a308d3ull, 0x13198a2e03707344ull); + const V2x64U init1L(0xc0acf169b5f18a8cull, 0x3bd39e10cb0ef593ull); + const V2x64U init1H(0x452821e638d01377ull, 0xbe5466cf34e90c6cull); + const V2x64U keyL = LoadUnaligned(key + 0); + const V2x64U keyH = LoadUnaligned(key + 2); + v0L = keyL ^ init0L; + v0H = keyH ^ init0H; + v1L = Rotate64By32(keyL) ^ init1L; + v1H = Rotate64By32(keyH) ^ init1H; + mul0L = init0L; + mul0H = init0H; + mul1L = init1L; + mul1H = init1H; + } + + HH_INLINE void Update(const HHPacket& packet_bytes) { + const uint64_t* HH_RESTRICT packet = + reinterpret_cast(packet_bytes); + const V2x64U packetL = LoadUnaligned(packet + 0); + const V2x64U packetH = LoadUnaligned(packet + 2); + Update(packetH, packetL); + } + + HH_INLINE void UpdateRemainder(const char* bytes, const size_t size_mod32) { + // 'Length padding' differentiates zero-valued inputs that have the same + // size/32. mod32 is sufficient because each Update behaves as if a + // counter were injected, because the state is large and mixed thoroughly. + const V4x32U vsize_mod32(static_cast(size_mod32)); + // Equivalent to storing size_mod32 in packet. + v0L += V2x64U(vsize_mod32); + v0H += V2x64U(vsize_mod32); + // Boosts the avalanche effect of mod32. + Rotate32By(&v1H, &v1L, size_mod32); + + const size_t size_mod4 = size_mod32 & 3; + const char* HH_RESTRICT remainder = bytes + (size_mod32 & ~3); + + if (HH_UNLIKELY(size_mod32 & 16)) { // 16..31 bytes left + const V2x64U packetL = + LoadUnaligned(reinterpret_cast(bytes)); + + V2x64U packetH = LoadMultipleOfFour(bytes + 16, size_mod32); + + const uint32_t last4 = + Load3()(Load3::AllowReadBeforeAndReturn(), remainder, size_mod4); + + // The upper four bytes of packetH are zero, so insert there. + packetH = V2x64U(_mm_insert_epi32(packetH, last4, 3)); + Update(packetH, packetL); + } else { // size_mod32 < 16 + const V2x64U packetL = LoadMultipleOfFour(bytes, size_mod32); + + const uint64_t last4 = + Load3()(Load3::AllowUnordered(), remainder, size_mod4); + + // Rather than insert into packetL[3], it is faster to initialize + // the otherwise empty packetH. + const V2x64U packetH(_mm_cvtsi64_si128(last4)); + Update(packetH, packetL); + } + } + + HH_INLINE void Finalize(HHResult64* HH_RESTRICT result) { + // Mix together all lanes. + for (int n = 0; n < 4; n++) { + PermuteAndUpdate(); + } + + const V2x64U sum0 = v0L + mul0L; + const V2x64U sum1 = v1L + mul1L; + const V2x64U hash = sum0 + sum1; + _mm_storel_epi64(reinterpret_cast<__m128i*>(result), hash); + } + + HH_INLINE void Finalize(HHResult128* HH_RESTRICT result) { + for (int n = 0; n < 6; n++) { + PermuteAndUpdate(); + } + + const V2x64U sum0 = v0L + mul0L; + const V2x64U sum1 = v1H + mul1H; + const V2x64U hash = sum0 + sum1; + StoreUnaligned(hash, &(*result)[0]); + } + + HH_INLINE void Finalize(HHResult256* HH_RESTRICT result) { + for (int n = 0; n < 10; n++) { + PermuteAndUpdate(); + } + + const V2x64U sum0L = v0L + mul0L; + const V2x64U sum1L = v1L + mul1L; + const V2x64U sum0H = v0H + mul0H; + const V2x64U sum1H = v1H + mul1H; + const V2x64U hashL = ModularReduction(sum1L, sum0L); + const V2x64U hashH = ModularReduction(sum1H, sum0H); + StoreUnaligned(hashL, &(*result)[0]); + StoreUnaligned(hashH, &(*result)[2]); + } + + static HH_INLINE void ZeroInitialize(char* HH_RESTRICT buffer_bytes) { + __m128i* buffer = reinterpret_cast<__m128i*>(buffer_bytes); + const __m128i zero = _mm_setzero_si128(); + _mm_store_si128(buffer + 0, zero); + _mm_store_si128(buffer + 1, zero); + } + + static HH_INLINE void CopyPartial(const char* HH_RESTRICT from, + const size_t size_mod32, + char* HH_RESTRICT buffer) { + for (size_t i = 0; i < size_mod32; ++i) { + buffer[i] = from[i]; + } + } + + static HH_INLINE void AppendPartial(const char* HH_RESTRICT from, + const size_t size_mod32, + char* HH_RESTRICT buffer, + const size_t buffer_valid) { + for (size_t i = 0; i < size_mod32; ++i) { + buffer[buffer_valid + i] = from[i]; + } + } + + HH_INLINE void AppendAndUpdate(const char* HH_RESTRICT from, + const size_t size_mod32, + const char* HH_RESTRICT buffer, + const size_t buffer_valid) { + HH_ALIGNAS(32) HHPacket tmp; + for (size_t i = 0; i < buffer_valid; ++i) { + tmp[i] = buffer[i]; + } + for (size_t i = 0; i < size_mod32; ++i) { + tmp[buffer_valid + i] = from[i]; + } + Update(tmp); + } + + private: + // Swap 32-bit halves of each lane (caller swaps 128-bit halves) + static HH_INLINE V2x64U Rotate64By32(const V2x64U& v) { + return V2x64U(_mm_shuffle_epi32(v, _MM_SHUFFLE(2, 3, 0, 1))); + } + + // Rotates 32-bit lanes by "count" bits. + static HH_INLINE void Rotate32By(V2x64U* HH_RESTRICT vH, + V2x64U* HH_RESTRICT vL, + const uint64_t count) { + // WARNING: the shift count is 64 bits, so we can't reuse vsize_mod32, + // which is broadcast into 32-bit lanes. + const __m128i count_left = _mm_cvtsi64_si128(count); + const __m128i count_right = _mm_cvtsi64_si128(32 - count); + const V2x64U shifted_leftL(_mm_sll_epi32(*vL, count_left)); + const V2x64U shifted_leftH(_mm_sll_epi32(*vH, count_left)); + const V2x64U shifted_rightL(_mm_srl_epi32(*vL, count_right)); + const V2x64U shifted_rightH(_mm_srl_epi32(*vH, count_right)); + *vL = shifted_leftL | shifted_rightL; + *vH = shifted_leftH | shifted_rightH; + } + + static HH_INLINE V2x64U ZipperMerge(const V2x64U& v) { + // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to + // varying degrees. In descending order of goodness, bytes + // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32. + // As expected, the upper and lower bytes are much worse. + // For each 64-bit lane, our objectives are: + // 1) maximizing and equalizing total goodness across each lane's bytes; + // 2) mixing with bytes from the neighboring lane; + // 3) placing the worst bytes in the upper 32 bits because those will not + // be used in the next 32x32 multiplication. + const uint64_t hi = 0x070806090D0A040Bull; + const uint64_t lo = 0x000F010E05020C03ull; + return V2x64U(_mm_shuffle_epi8(v, V2x64U(hi, lo))); + } + + HH_INLINE void Update(const V2x64U& packetH, const V2x64U& packetL) { + v1L += packetL; + v1H += packetH; + v1L += mul0L; + v1H += mul0H; + mul0L ^= V2x64U(_mm_mul_epu32(v1L, Rotate64By32(v0L))); + mul0H ^= V2x64U(_mm_mul_epu32(v1H, v0H >> 32)); + v0L += mul1L; + v0H += mul1H; + mul1L ^= V2x64U(_mm_mul_epu32(v0L, Rotate64By32(v1L))); + mul1H ^= V2x64U(_mm_mul_epu32(v0H, v1H >> 32)); + v0L += ZipperMerge(v1L); + v0H += ZipperMerge(v1H); + v1L += ZipperMerge(v0L); + v1H += ZipperMerge(v0H); + } + + HH_INLINE void PermuteAndUpdate() { + // It is slightly better to permute v0 than v1; it will be added to v1. + // AVX-2 Permute also swaps 128-bit halves, so swap input operands. + Update(Rotate64By32(v0L), Rotate64By32(v0H)); + } + + // Returns zero-initialized vector with the lower "size" = 0, 4, 8 or 12 + // bytes loaded from "bytes". Serves as a replacement for AVX2 maskload_epi32. + static HH_INLINE V2x64U LoadMultipleOfFour(const char* bytes, + const size_t size) { + const uint32_t* words = reinterpret_cast(bytes); + // Mask of 1-bits where the final 4 bytes should be inserted (replacement + // for variable shift/insert using broadcast+blend). + V2x64U mask4(_mm_cvtsi64_si128(0xFFFFFFFFULL)); // 'insert' into lane 0 + V2x64U ret(0); + if (size & 8) { + ret = V2x64U(_mm_loadl_epi64(reinterpret_cast(words))); + // mask4 = 0 ~0 0 0 ('insert' into lane 2) + mask4 = V2x64U(_mm_slli_si128(mask4, 8)); + words += 2; + } + // Final 4 (possibly after the 8 above); 'insert' into lane 0 or 2 of ret. + if (size & 4) { + const __m128i word2 = _mm_cvtsi32_si128(LoadUnaligned(words)); + // = 0 word2 0 word2; mask4 will select which lane to keep. + const V2x64U broadcast(_mm_shuffle_epi32(word2, 0x00)); + // (slightly faster than blendv_epi8) + ret |= V2x64U(broadcast & mask4); + } + return ret; + } + + // XORs x << 1 and x << 2 into *out after clearing the upper two bits of x. + // Bit shifts are only possible on independent 64-bit lanes. We therefore + // insert the upper bits of x[0] that were lost into x[1]. + // Thanks to D. Lemire for helpful comments! + static HH_INLINE void XorByShift128Left12(const V2x64U& x, + V2x64U* HH_RESTRICT out) { + const V2x64U zero(_mm_setzero_si128()); + const V2x64U sign_bit128(_mm_insert_epi32(zero, 0x80000000u, 3)); + const V2x64U top_bits2 = x >> (64 - 2); + HH_COMPILER_FENCE; + const V2x64U shifted1_unmasked = x + x; // (avoids needing port0) + + // Only the lower half of top_bits1 will be used, so we + // can compute it before clearing the upper two bits of x. + const V2x64U top_bits1 = x >> (64 - 1); + const V2x64U shifted2 = shifted1_unmasked + shifted1_unmasked; + HH_COMPILER_FENCE; + + const V2x64U new_low_bits2(_mm_slli_si128(top_bits2, 8)); + *out ^= shifted2; + // The result must be as if the upper two bits of the input had been clear, + // otherwise we're no longer computing a reduction. + const V2x64U shifted1 = AndNot(sign_bit128, shifted1_unmasked); + HH_COMPILER_FENCE; + + const V2x64U new_low_bits1(_mm_slli_si128(top_bits1, 8)); + *out ^= new_low_bits2; + *out ^= shifted1; + *out ^= new_low_bits1; + } + + // Modular reduction by the irreducible polynomial (x^128 + x^2 + x). + // Input: a 256-bit number a3210. + static HH_INLINE V2x64U ModularReduction(const V2x64U& a32_unmasked, + const V2x64U& a10) { + // See Lemire, https://arxiv.org/pdf/1503.03465v8.pdf. + V2x64U out = a10; + XorByShift128Left12(a32_unmasked, &out); + return out; + } + + V2x64U v0L; + V2x64U v0H; + V2x64U v1L; + V2x64U v1H; + V2x64U mul0L; + V2x64U mul0H; + V2x64U mul1L; + V2x64U mul1H; +}; + +} // namespace HH_TARGET_NAME +} // namespace highwayhash + +#endif // HH_DISABLE_TARGET_SPECIFIC +#endif // HIGHWAYHASH_HH_SSE41_H_ diff --git a/highwayhash/highwayhash/hh_types.h b/highwayhash/highwayhash/hh_types.h new file mode 100644 index 000000000..f350d70f6 --- /dev/null +++ b/highwayhash/highwayhash/hh_types.h @@ -0,0 +1,50 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef HIGHWAYHASH_HH_TYPES_H_ +#define HIGHWAYHASH_HH_TYPES_H_ + +// WARNING: included from c_bindings => must be C-compatible. +// WARNING: this is a "restricted" header because it is included from +// translation units compiled with different flags. This header and its +// dependencies must not define any function unless it is static inline and/or +// within namespace HH_TARGET_NAME. See arch_specific.h for details. + +#include // size_t +#include + +#ifdef __cplusplus +namespace highwayhash { +#endif + +// 256-bit secret key that should remain unknown to attackers. +// We recommend initializing it to a random value. +typedef uint64_t HHKey[4]; + +// How much input is hashed by one call to HHStateT::Update. +typedef char HHPacket[32]; + +// Hash 'return' types. +typedef uint64_t HHResult64; // returned directly +typedef uint64_t HHResult128[2]; +typedef uint64_t HHResult256[4]; + +// Called if a test fails, indicating which target and size. +typedef void (*HHNotify)(const char*, size_t); + +#ifdef __cplusplus +} // namespace highwayhash +#endif + +#endif // HIGHWAYHASH_HH_TYPES_H_ diff --git a/highwayhash/highwayhash/hh_vsx.cc b/highwayhash/highwayhash/hh_vsx.cc new file mode 100644 index 000000000..6479a7a80 --- /dev/null +++ b/highwayhash/highwayhash/hh_vsx.cc @@ -0,0 +1,22 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// WARNING: this is a "restricted" source file; avoid including any headers +// unless they are also restricted. See arch_specific.h for details. + +#define HH_TARGET_NAME VSX + +#ifdef __VSX__ +#include "highwayhash/highwayhash_target.cc" +#endif diff --git a/highwayhash/highwayhash/hh_vsx.h b/highwayhash/highwayhash/hh_vsx.h new file mode 100644 index 000000000..e503abe1f --- /dev/null +++ b/highwayhash/highwayhash/hh_vsx.h @@ -0,0 +1,335 @@ +// Copyright 2015-2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef HIGHWAYHASH_HH_VSX_H_ +#define HIGHWAYHASH_HH_VSX_H_ + +// WARNING: this is a "restricted" header because it is included from +// translation units compiled with different flags. This header and its +// dependencies must not define any function unless it is static inline and/or +// within namespace HH_TARGET_NAME. See arch_specific.h for details. + +#include "highwayhash/arch_specific.h" +#include "highwayhash/compiler_specific.h" +#include "highwayhash/hh_types.h" +#include "highwayhash/load3.h" + +// For auto-dependency generation, we need to include all headers but not their +// contents +#ifndef HH_DISABLE_TARGET_SPECIFIC + +#include +#undef vector +#undef pixel +#undef bool + +namespace highwayhash { + +typedef __vector unsigned long long PPC_VEC_U64; // NOLINT +typedef __vector unsigned int PPC_VEC_U32; +typedef __vector unsigned char PPC_VEC_U8; + +// See vector128.h for why this namespace is necessary; +namespace HH_TARGET_NAME { + +// Helper Functions + +// gcc doesn't support vec_mule() and vec_mulo() for vector long. +// Use the generic version, which is defined here only for gcc. + +#ifndef __clang__ +static HH_INLINE PPC_VEC_U64 vec_mule(PPC_VEC_U32 a, PPC_VEC_U32 b) { // NOLINT + PPC_VEC_U64 result; // NOLINT +#ifdef __LITTLE_ENDIAN__ + asm("vmulouw %0, %1, %2" : "=v"(result) : "v"(a), "v"(b)); +#else + asm("vmuleuw %0, %1, %2" : "=v"(result) : "v"(a), "v"(b)); +#endif + return result; +} +#endif + +// LoadUnaligned uses vec_vsx_ld(offset, address) format, +// Offset here is number of bytes and is 0 for this implementation. +static HH_INLINE PPC_VEC_U64 +LoadUnaligned(const uint64_t* const HH_RESTRICT from) { + const PPC_VEC_U64* const HH_RESTRICT p = + reinterpret_cast(from); + return vec_vsx_ld(0, p); +} + +static HH_INLINE void StoreUnaligned(const PPC_VEC_U64& hash, + uint64_t* const HH_RESTRICT to) { + PPC_VEC_U64* HH_RESTRICT p = reinterpret_cast(to); + vec_vsx_st(hash, 0, p); +} + +static HH_INLINE PPC_VEC_U64 MultiplyVectors(const PPC_VEC_U64& vec1, + const PPC_VEC_U64& vec2) { + return vec_mule(reinterpret_cast(vec1), + reinterpret_cast(vec2)); +} + +// J-lanes tree hashing: see https://doi.org/10.4236/jis.2014.53010 +class HHStateVSX { + public: + explicit HH_INLINE HHStateVSX(const HHKey key) { Reset(key); } + + HH_INLINE void Reset(const HHKey key) { + // "Nothing up my sleeve numbers"; + const PPC_VEC_U64 init0L = {0xdbe6d5d5fe4cce2full, 0xa4093822299f31d0ull}; + const PPC_VEC_U64 init0H = {0x13198a2e03707344ull, 0x243f6a8885a308d3ull}; + const PPC_VEC_U64 init1L = {0x3bd39e10cb0ef593ull, 0xc0acf169b5f18a8cull}; + const PPC_VEC_U64 init1H = {0xbe5466cf34e90c6cull, 0x452821e638d01377ull}; + const PPC_VEC_U64 keyL = LoadUnaligned(key); + const PPC_VEC_U64 keyH = LoadUnaligned(key + 2); + v0L = keyL ^ init0L; + v0H = keyH ^ init0H; + v1L = Rotate64By32(keyL) ^ init1L; + v1H = Rotate64By32(keyH) ^ init1H; + mul0L = init0L; + mul0H = init0H; + mul1L = init1L; + mul1H = init1H; + } + + HH_INLINE void Update(const HHPacket& packet_bytes) { + const uint64_t* HH_RESTRICT packet = + reinterpret_cast(packet_bytes); + const PPC_VEC_U64 packetL = LoadUnaligned(packet); + const PPC_VEC_U64 packetH = LoadUnaligned(packet + 2); + Update(packetH, packetL); + } + + HH_INLINE void UpdateRemainder(const char* bytes, const size_t size_mod32) { + // 'Length padding' differentiates zero-valued inputs that have the same + // size/32. mod32 is sufficient because each Update behaves as if a + // counter were injected, because the state is large and mixed thoroughly. + uint32_t size_rounded = static_cast(size_mod32); + PPC_VEC_U32 vsize_mod32 = {size_rounded, size_rounded, size_rounded, + size_rounded}; + // Equivalent to storing size_mod32 in packet. + v0L += reinterpret_cast(vsize_mod32); + v0H += reinterpret_cast(vsize_mod32); + + // Boosts the avalanche effect of mod32. + Rotate32By(&v1H, &v1L, size_mod32); + + const size_t size_mod4 = size_mod32 & 3; + const char* HH_RESTRICT remainder = bytes + (size_mod32 & ~3); + + if (HH_UNLIKELY(size_mod32 & 16)) { // 16..31 bytes left + const PPC_VEC_U64 packetL = + vec_vsx_ld(0, reinterpret_cast(bytes)); + + PPC_VEC_U64 packetH = LoadMultipleOfFour(bytes + 16, size_mod32); + + const uint32_t last4 = + Load3()(Load3::AllowReadBeforeAndReturn(), remainder, size_mod4); + + // The upper four bytes of packetH are zero, so insert there. + PPC_VEC_U32 packetH_32 = reinterpret_cast(packetH); + packetH_32[3] = last4; + packetH = reinterpret_cast(packetH_32); + Update(packetH, packetL); + } else { // size_mod32 < 16 + const PPC_VEC_U64 packetL = LoadMultipleOfFour(bytes, size_mod32); + + const uint64_t last4 = + Load3()(Load3::AllowUnordered(), remainder, size_mod4); + + // Rather than insert into packetL[3], it is faster to initialize + // the otherwise empty packetH. + const PPC_VEC_U64 packetH = {last4, 0}; + Update(packetH, packetL); + } + } + + HH_INLINE void Finalize(HHResult64* HH_RESTRICT result) { + // Mix together all lanes. + for (int n = 0; n < 4; n++) { + PermuteAndUpdate(); + } + const PPC_VEC_U64 hash = v0L + v1L + mul0L + mul1L; + *result = hash[0]; + } + + HH_INLINE void Finalize(HHResult128* HH_RESTRICT result) { + for (int n = 0; n < 6; n++) { + PermuteAndUpdate(); + } + const PPC_VEC_U64 hash = v0L + mul0L + v1H + mul1H; + StoreUnaligned(hash, *result); + } + + HH_INLINE void Finalize(HHResult256* HH_RESTRICT result) { + for (int n = 0; n < 10; n++) { + PermuteAndUpdate(); + } + const PPC_VEC_U64 sum0L = v0L + mul0L; + const PPC_VEC_U64 sum1L = v1L + mul1L; + const PPC_VEC_U64 sum0H = v0H + mul0H; + const PPC_VEC_U64 sum1H = v1H + mul1H; + const PPC_VEC_U64 hashL = ModularReduction(sum1L, sum0L); + const PPC_VEC_U64 hashH = ModularReduction(sum1H, sum0H); + StoreUnaligned(hashL, *result); + StoreUnaligned(hashH, *result + 2); + } + + static HH_INLINE void ZeroInitialize(char* HH_RESTRICT buffer_bytes) { + for (size_t i = 0; i < sizeof(HHPacket); ++i) { + buffer_bytes[i] = 0; + } + } + + static HH_INLINE void CopyPartial(const char* HH_RESTRICT from, + const size_t size_mod32, + char* HH_RESTRICT buffer) { + for (size_t i = 0; i < size_mod32; ++i) { + buffer[i] = from[i]; + } + } + + static HH_INLINE void AppendPartial(const char* HH_RESTRICT from, + const size_t size_mod32, + char* HH_RESTRICT buffer, + const size_t buffer_valid) { + for (size_t i = 0; i < size_mod32; ++i) { + buffer[buffer_valid + i] = from[i]; + } + } + + HH_INLINE void AppendAndUpdate(const char* HH_RESTRICT from, + const size_t size_mod32, + const char* HH_RESTRICT buffer, + const size_t buffer_valid) { + HH_ALIGNAS(32) HHPacket tmp; + for (size_t i = 0; i < buffer_valid; ++i) { + tmp[i] = buffer[i]; + } + for (size_t i = 0; i < size_mod32; ++i) { + tmp[buffer_valid + i] = from[i]; + } + Update(tmp); + } + + private: + // Swap 32-bit halves of each lane (caller swaps 128-bit halves) + static HH_INLINE PPC_VEC_U64 Rotate64By32(const PPC_VEC_U64& v) { + PPC_VEC_U64 shuffle_vec = {32, 32}; + return vec_rl(v, shuffle_vec); + } + + // Rotates 32-bit lanes by "count" bits. + static HH_INLINE void Rotate32By(PPC_VEC_U64* HH_RESTRICT vH, + PPC_VEC_U64* HH_RESTRICT vL, + const uint64_t count) { + // WARNING: the shift count is 64 bits, so we can't reuse vsize_mod32, + // which is broadcast into 32-bit lanes. + uint32_t count_rl = uint32_t(count); + PPC_VEC_U32 rot_left = {count_rl, count_rl, count_rl, count_rl}; + *vL = reinterpret_cast(vec_rl(PPC_VEC_U32(*vL), rot_left)); + *vH = reinterpret_cast(vec_rl(PPC_VEC_U32(*vH), rot_left)); + } + + static HH_INLINE PPC_VEC_U64 ZipperMerge(const PPC_VEC_U64& v) { + // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to + // varying degrees. In descending order of goodness, bytes + // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32. + // As expected, the upper and lower bytes are much worse. + // For each 64-bit lane, our objectives are: + // 1) maximizing and equalizing total goodness across each lane's bytes; + // 2) mixing with bytes from the neighboring lane; + // 3) placing the worst bytes in the upper 32 bits because those will not + // be used in the next 32x32 multiplication. + + const PPC_VEC_U64 mask = {0x000F010E05020C03ull, 0x070806090D0A040Bull}; + return vec_vperm(v, v, reinterpret_cast(mask)); + } + + HH_INLINE void Update(const PPC_VEC_U64& packetH, + const PPC_VEC_U64& packetL) { + // Tried rearranging the instructions below and benchmarks are similar + v1L += packetL + mul0L; + v1H += packetH + mul0H; + mul0L ^= MultiplyVectors(v1L, Rotate64By32(v0L)); + mul0H ^= MultiplyVectors(v1H, v0H >> 32); + v0L += mul1L; + v0H += mul1H; + mul1L ^= MultiplyVectors(v0L, Rotate64By32(v1L)); + mul1H ^= MultiplyVectors(v0H, v1H >> 32); + v0L += ZipperMerge(v1L); + v1L += ZipperMerge(v0L); + v0H += ZipperMerge(v1H); + v1H += ZipperMerge(v0H); + } + + HH_INLINE void PermuteAndUpdate() { + // Permutes v0L and V0H by swapping 32 bits halves of each lane + Update(Rotate64By32(v0L), Rotate64By32(v0H)); + } + + // Returns zero-initialized vector with the lower "size" = 0, 4, 8 or 12 + // bytes loaded from "bytes". Serves as a replacement for AVX2 maskload_epi32. + static HH_INLINE PPC_VEC_U64 LoadMultipleOfFour(const char* bytes, + const size_t size) { + const uint32_t* words = reinterpret_cast(bytes); + // Updating the entries, as if done by vec_insert function call + PPC_VEC_U32 ret = {0, 0, 0, 0}; + if (size & 8) { + ret[0] = words[0]; + ret[1] = words[1]; + words += 2; + if (size & 4) { + ret[2] = words[0]; + } + } else if (size & 4) { + ret[0] = words[0]; + } + return reinterpret_cast(ret); + } + + // Modular reduction by the irreducible polynomial (x^128 + x^2 + x). + // Input: a 256-bit number a3210. + static HH_INLINE PPC_VEC_U64 ModularReduction(const PPC_VEC_U64& a32_unmasked, + const PPC_VEC_U64& a10) { + // See Lemire, https://arxiv.org/pdf/1503.03465v8.pdf. + PPC_VEC_U64 out = a10; + const PPC_VEC_U64 shifted1 = reinterpret_cast( + vec_sll(reinterpret_cast(a32_unmasked), vec_splat_u8(1))); + const PPC_VEC_U64 shifted2 = reinterpret_cast( + vec_sll(reinterpret_cast(a32_unmasked), vec_splat_u8(2))); + // The result must be as if the upper two bits of the input had been clear, + // otherwise we're no longer computing a reduction. + const PPC_VEC_U64 mask = {0xFFFFFFFFFFFFFFFFull, 0x7FFFFFFFFFFFFFFFull}; + const PPC_VEC_U64 shifted1_masked = shifted1 & mask; + out ^= shifted1_masked ^ shifted2; + return out; + } + + PPC_VEC_U64 v0L; + PPC_VEC_U64 v0H; + PPC_VEC_U64 v1L; + PPC_VEC_U64 v1H; + PPC_VEC_U64 mul0L; + PPC_VEC_U64 mul0H; + PPC_VEC_U64 mul1L; + PPC_VEC_U64 mul1H; +}; + +} // namespace HH_TARGET_NAME +} // namespace highwayhash + +#endif // HH_DISABLE_TARGET_SPECIFIC +#endif // HIGHWAYHASH_HH_VSX_H_ diff --git a/highwayhash/highwayhash/highwayhash.h b/highwayhash/highwayhash/highwayhash.h new file mode 100644 index 000000000..fea4922b2 --- /dev/null +++ b/highwayhash/highwayhash/highwayhash.h @@ -0,0 +1,216 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef HIGHWAYHASH_HIGHWAYHASH_H_ +#define HIGHWAYHASH_HIGHWAYHASH_H_ + +// This header's templates are useful for inlining into other CPU-specific code: +// template CodeUsingHash() { HighwayHashT(...); }, +// and can also be instantiated with HH_TARGET when callers don't care about the +// exact implementation. Otherwise, they are implementation details of the +// highwayhash_target wrapper. Use that instead if you need to detect the best +// available implementation at runtime. + +// WARNING: this is a "restricted" header because it is included from +// translation units compiled with different flags. This header and its +// dependencies must not define any function unless it is static inline and/or +// within namespace HH_TARGET_NAME. See arch_specific.h for details. + +#include "highwayhash/arch_specific.h" +#include "highwayhash/compiler_specific.h" +#include "highwayhash/hh_types.h" + +#if HH_ARCH_X64 +#include "highwayhash/iaca.h" +#endif + +// Include exactly one (see arch_specific.h) header, which defines a state +// object in a target-specific namespace, e.g. AVX2::HHStateAVX2. +// Attempts to use "computed includes" (#define MACRO "path/or_just_filename", +// #include MACRO) fail with 'file not found', so we need an #if chain. +#if HH_TARGET == HH_TARGET_AVX2 +#include "highwayhash/hh_avx2.h" +#elif HH_TARGET == HH_TARGET_SSE41 +#include "highwayhash/hh_sse41.h" +#elif HH_TARGET == HH_TARGET_VSX +#include "highwayhash/hh_vsx.h" +#elif HH_TARGET == HH_TARGET_NEON +#include "highwayhash/hh_neon.h" +#elif HH_TARGET == HH_TARGET_Portable +#include "highwayhash/hh_portable.h" +#else +#error "Unknown target, add its hh_*.h include here." +#endif + +#ifndef HH_DISABLE_TARGET_SPECIFIC +namespace highwayhash { + +// Alias templates (HHStateT) cannot be specialized, so we need a helper struct. +// Note that hh_*.h don't just specialize HHStateT directly because vector128.h +// must reside in a distinct namespace (to allow including it from multiple +// translation units), and it is easier if its users, i.e. the concrete HHState, +// also reside in that same namespace, which precludes specialization. +template +struct HHStateForTarget {}; + +template <> +struct HHStateForTarget { + // (The namespace is sufficient and the additional HH_TARGET_NAME suffix is + // technically redundant, but it makes searching easier.) + using type = HH_TARGET_NAME::HH_ADD_TARGET_SUFFIX(HHState); +}; + +// Typically used as HHStateT. It would be easier to just have a +// concrete type HH_STATE, but this alias template is required by the +// templates in highwayhash_target.cc. +template +using HHStateT = typename HHStateForTarget::type; + +// Computes HighwayHash of "bytes" using the implementation chosen by "State". +// +// "state" is a HHStateT<> initialized with a key. +// "bytes" is the data to hash (possibly unaligned). +// "size" is the number of bytes to hash; we do not read any additional bytes. +// "hash" is a HHResult* (either 64, 128 or 256 bits). +// +// HighwayHash is a strong pseudorandom function with security claims +// [https://arxiv.org/abs/1612.06257]. It is intended as a safer general-purpose +// hash, about 4x faster than SipHash and 10x faster than BLAKE2. +// +// This template allows callers (e.g. tests) to invoke a specific +// implementation. It must be compiled with the flags required by the desired +// implementation. If the entire program cannot be built with these flags, use +// the wrapper in highwayhash_target.h instead. +// +// Callers wanting to hash multiple pieces of data should duplicate this +// function, calling HHStateT::Update for each input and only Finalizing once. +template +HH_INLINE void HighwayHashT(State* HH_RESTRICT state, + const char* HH_RESTRICT bytes, const size_t size, + Result* HH_RESTRICT hash) { + // BeginIACA(); + const size_t remainder = size & (sizeof(HHPacket) - 1); + const size_t truncated = size & ~(sizeof(HHPacket) - 1); + for (size_t offset = 0; offset < truncated; offset += sizeof(HHPacket)) { + state->Update(*reinterpret_cast(bytes + offset)); + } + + if (remainder != 0) { + state->UpdateRemainder(bytes + truncated, remainder); + } + + state->Finalize(hash); + // EndIACA(); +} + +// Wrapper class for incrementally hashing a series of data ranges. The final +// result is the same as HighwayHashT of the concatenation of all the ranges. +// This is useful for computing the hash of cords, iovecs, and similar +// data structures. +template +class HighwayHashCatT { + public: + HH_INLINE HighwayHashCatT(const HHKey& key) : state_(key) { + // Avoids msan uninitialized-memory warnings. + HHStateT::ZeroInitialize(buffer_); + } + + // Resets the state of the hasher so it can be used to hash a new string. + HH_INLINE void Reset(const HHKey& key) { + state_.Reset(key); + buffer_usage_ = 0; + } + + // Adds "bytes" to the internal buffer, feeding it to HHStateT::Update as + // required. Call this as often as desired. Only reads bytes within the + // interval [bytes, bytes + num_bytes). "num_bytes" == 0 has no effect. + // + // Beware that this implies hashing two strings {"A", ""} has the same result + // as {"", "A"}. To prevent this when hashing independent fields, you can + // append some extra (non-empty) data when a field is empty, or + // unconditionally also Append the field length. Either option would ensure + // the two examples above result in a different hash. + // + // There are no alignment requirements. + HH_INLINE void Append(const char* HH_RESTRICT bytes, size_t num_bytes) { + // BeginIACA(); + const size_t capacity = sizeof(HHPacket) - buffer_usage_; + // New bytes fit within buffer, but still not enough to Update. + if (HH_UNLIKELY(num_bytes < capacity)) { + HHStateT::AppendPartial(bytes, num_bytes, buffer_, buffer_usage_); + buffer_usage_ += num_bytes; + return; + } + + // HACK: ensures the state is kept in SIMD registers; otherwise, Update + // constantly load/stores its operands, which is much slower. + // Restrict-qualified pointers to external state or the state_ member are + // not sufficient for keeping this in registers. + HHStateT state_copy = state_; + + // Have prior bytes to flush. + const size_t buffer_usage = buffer_usage_; + if (HH_LIKELY(buffer_usage != 0)) { + // Calls update with prior buffer contents plus new data. Does not modify + // the buffer because some implementations can load into SIMD registers + // and Append to them directly. + state_copy.AppendAndUpdate(bytes, capacity, buffer_, buffer_usage); + bytes += capacity; + num_bytes -= capacity; + } + + // Buffer currently empty => Update directly from the source. + while (num_bytes >= sizeof(HHPacket)) { + state_copy.Update(*reinterpret_cast(bytes)); + bytes += sizeof(HHPacket); + num_bytes -= sizeof(HHPacket); + } + + // Unconditionally assign even if zero because we didn't reset to zero + // after the AppendAndUpdate above. + buffer_usage_ = num_bytes; + + state_ = state_copy; + + // Store any remainders in buffer, no-op if multiple of a packet. + if (HH_LIKELY(num_bytes != 0)) { + HHStateT::CopyPartial(bytes, num_bytes, buffer_); + } + // EndIACA(); + } + + // Stores the resulting 64, 128 or 256-bit hash of data previously passed to + // Append since construction or a prior call to Reset. + template // HHResult* + HH_INLINE void Finalize(Result* HH_RESTRICT hash) const { + // BeginIACA(); + HHStateT state_copy = state_; + const size_t buffer_usage = buffer_usage_; + if (HH_LIKELY(buffer_usage != 0)) { + state_copy.UpdateRemainder(buffer_, buffer_usage); + } + state_copy.Finalize(hash); + // EndIACA(); + } + + private: + HH_ALIGNAS(64) HHPacket buffer_; + HH_ALIGNAS(32) HHStateT state_; + // How many bytes in buffer_ (starting with offset 0) are valid. + size_t buffer_usage_ = 0; +}; + +} // namespace highwayhash +#endif // HH_DISABLE_TARGET_SPECIFIC +#endif // HIGHWAYHASH_HIGHWAYHASH_H_ diff --git a/highwayhash/highwayhash/highwayhash_fuzzer.cc b/highwayhash/highwayhash/highwayhash_fuzzer.cc new file mode 100644 index 000000000..5234fcb01 --- /dev/null +++ b/highwayhash/highwayhash/highwayhash_fuzzer.cc @@ -0,0 +1,25 @@ +#include "highwayhash/highwayhash_target.h" +#include "highwayhash/instruction_sets.h" + +using highwayhash::HHKey; +using highwayhash::HHResult64; +using highwayhash::HighwayHash; +using highwayhash::InstructionSets; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + if (size < sizeof(uint64_t) * 4) { + return 0; + } + + // Generate the key. + const uint64_t *u64s = reinterpret_cast(data); + HH_ALIGNAS(32) const HHKey key = {u64s[0], u64s[1], u64s[2], u64s[3]}; + data += sizeof(uint64_t) * 4; + size -= sizeof(uint64_t) * 4; + + // Compute the hash. + HHResult64 result; + InstructionSets::Run(key, reinterpret_cast(data), + size, &result); + return 0; +} diff --git a/highwayhash/highwayhash/highwayhash_target.cc b/highwayhash/highwayhash/highwayhash_target.cc new file mode 100644 index 000000000..74022f64b --- /dev/null +++ b/highwayhash/highwayhash/highwayhash_target.cc @@ -0,0 +1,104 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// WARNING: this is a "restricted" source file; avoid including any headers +// unless they are also restricted. See arch_specific.h for details. + +#include "highwayhash/highwayhash_target.h" + +#include "highwayhash/highwayhash.h" + +#ifndef HH_DISABLE_TARGET_SPECIFIC +namespace highwayhash { + +extern "C" { +uint64_t HH_ADD_TARGET_SUFFIX(HighwayHash64_)(const HHKey key, + const char* bytes, + const uint64_t size) { + HHStateT state(key); + HHResult64 result; + HighwayHashT(&state, bytes, size, &result); + return result; +} +} // extern "C" + +template +void HighwayHash::operator()(const HHKey& key, + const char* HH_RESTRICT bytes, + const size_t size, + HHResult64* HH_RESTRICT hash) const { + HHStateT state(key); + HighwayHashT(&state, bytes, size, hash); +} + +template +void HighwayHash::operator()(const HHKey& key, + const char* HH_RESTRICT bytes, + const size_t size, + HHResult128* HH_RESTRICT hash) const { + HHStateT state(key); + HighwayHashT(&state, bytes, size, hash); +} + +template +void HighwayHash::operator()(const HHKey& key, + const char* HH_RESTRICT bytes, + const size_t size, + HHResult256* HH_RESTRICT hash) const { + HHStateT state(key); + HighwayHashT(&state, bytes, size, hash); +} + +template +void HighwayHashCat::operator()(const HHKey& key, + const StringView* HH_RESTRICT fragments, + const size_t num_fragments, + HHResult64* HH_RESTRICT hash) const { + HighwayHashCatT cat(key); + for (size_t i = 0; i < num_fragments; ++i) { + cat.Append(fragments[i].data, fragments[i].num_bytes); + } + cat.Finalize(hash); +} + +template +void HighwayHashCat::operator()(const HHKey& key, + const StringView* HH_RESTRICT fragments, + const size_t num_fragments, + HHResult128* HH_RESTRICT hash) const { + HighwayHashCatT cat(key); + for (size_t i = 0; i < num_fragments; ++i) { + cat.Append(fragments[i].data, fragments[i].num_bytes); + } + cat.Finalize(hash); +} + +template +void HighwayHashCat::operator()(const HHKey& key, + const StringView* HH_RESTRICT fragments, + const size_t num_fragments, + HHResult256* HH_RESTRICT hash) const { + HighwayHashCatT cat(key); + for (size_t i = 0; i < num_fragments; ++i) { + cat.Append(fragments[i].data, fragments[i].num_bytes); + } + cat.Finalize(hash); +} + +// Instantiate for the current target. +template struct HighwayHash; +template struct HighwayHashCat; + +} // namespace highwayhash +#endif // HH_DISABLE_TARGET_SPECIFIC diff --git a/highwayhash/highwayhash/highwayhash_target.h b/highwayhash/highwayhash/highwayhash_target.h new file mode 100644 index 000000000..08b803f19 --- /dev/null +++ b/highwayhash/highwayhash/highwayhash_target.h @@ -0,0 +1,91 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef HIGHWAYHASH_HIGHWAYHASH_TARGET_H_ +#define HIGHWAYHASH_HIGHWAYHASH_TARGET_H_ + +// Adapter for the InstructionSets::Run dispatcher, which invokes the best +// implementations available on the current CPU. + +// WARNING: this is a "restricted" header because it is included from +// translation units compiled with different flags. This header and its +// dependencies must not define any function unless it is static inline and/or +// within namespace HH_TARGET_NAME. See arch_specific.h for details. + +#include "highwayhash/arch_specific.h" +#include "highwayhash/compiler_specific.h" +#include "highwayhash/hh_types.h" + +namespace highwayhash { + +// Usage: InstructionSets::Run(key, bytes, size, hash). +// This incurs some small dispatch overhead. If the entire program is compiled +// for the target CPU, you can instead call HighwayHashT directly to avoid any +// overhead. This template is instantiated in the source file, which is +// compiled once for every target with the required flags (e.g. -mavx2). +template +struct HighwayHash { + // Stores a 64/128/256 bit hash of "bytes" using the HighwayHashT + // implementation for the "Target" CPU. The hash result is identical + // regardless of which implementation is used. + // + // "key" is a (randomly generated or hard-coded) HHKey. + // "bytes" is the data to hash (possibly unaligned). + // "size" is the number of bytes to hash; we do not read any additional bytes. + // "hash" is a HHResult* (either 64, 128 or 256 bits). + // + // HighwayHash is a strong pseudorandom function with security claims + // [https://arxiv.org/abs/1612.06257]. It is intended as a safer + // general-purpose hash, 5x faster than SipHash and 10x faster than BLAKE2. + void operator()(const HHKey& key, const char* HH_RESTRICT bytes, + const size_t size, HHResult64* HH_RESTRICT hash) const; + void operator()(const HHKey& key, const char* HH_RESTRICT bytes, + const size_t size, HHResult128* HH_RESTRICT hash) const; + void operator()(const HHKey& key, const char* HH_RESTRICT bytes, + const size_t size, HHResult256* HH_RESTRICT hash) const; +}; + +// Replacement for C++17 std::string_view that avoids dependencies. +// A struct requires fewer allocations when calling HighwayHashCat with +// non-const "num_fragments". +struct StringView { + const char* data; // not necessarily aligned/padded + size_t num_bytes; // possibly zero +}; + +// Note: this interface avoids dispatch overhead per fragment. +template +struct HighwayHashCat { + // Stores a 64/128/256 bit hash of all "num_fragments" "fragments" using the + // HighwayHashCatT implementation for "Target". The hash result is identical + // to HighwayHash of the flattened data, regardless of Target. + // + // "key" is a (randomly generated or hard-coded) HHKey. + // "fragments" contain unaligned pointers and the number of valid bytes. + // "num_fragments" indicates the number of entries in "fragments". + // "hash" is a HHResult* (either 64, 128 or 256 bits). + void operator()(const HHKey& key, const StringView* HH_RESTRICT fragments, + const size_t num_fragments, + HHResult64* HH_RESTRICT hash) const; + void operator()(const HHKey& key, const StringView* HH_RESTRICT fragments, + const size_t num_fragments, + HHResult128* HH_RESTRICT hash) const; + void operator()(const HHKey& key, const StringView* HH_RESTRICT fragments, + const size_t num_fragments, + HHResult256* HH_RESTRICT hash) const; +}; + +} // namespace highwayhash + +#endif // HIGHWAYHASH_HIGHWAYHASH_TARGET_H_ diff --git a/highwayhash/highwayhash/highwayhash_test.cc b/highwayhash/highwayhash/highwayhash_test.cc new file mode 100644 index 000000000..aed9a9eed --- /dev/null +++ b/highwayhash/highwayhash/highwayhash_test.cc @@ -0,0 +1,391 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Ensures each implementation of HighwayHash returns consistent and unchanging +// hash values. + +#include "highwayhash/highwayhash_test_target.h" + +#include +#include +#include +#include + +#ifdef HH_GOOGLETEST +#include "testing/base/public/gunit.h" +#endif + +#include "highwayhash/data_parallel.h" +#include "highwayhash/highwayhash_target.h" +#include "highwayhash/instruction_sets.h" + +// Define to nonzero in order to print the (new) golden outputs. +// WARNING: HighwayHash is frozen, so the golden values must not change. +#define PRINT_RESULTS 0 + +namespace highwayhash { +namespace { + +// Known-good outputs are verified for all lengths in [0, 64]. +const size_t kMaxSize = 64; + +#if PRINT_RESULTS +void Print(const HHResult64 result) { printf("0x%016lXull,\n", result); } + +// For HHResult128/256. +template +void Print(const HHResult64 (&result)[kNumLanes]) { + printf("{ "); + for (int i = 0; i < kNumLanes; ++i) { + if (i != 0) { + printf(", "); + } + printf("0x%016lXull", result[i]); + } + printf("},\n"); +} +#endif // PRINT_RESULTS + +// Called when any test fails; exits immediately because one mismatch usually +// implies many others. +void OnFailure(const char* target_name, const size_t size) { + printf("Mismatch at size %zu for target %s\n", size, target_name); +#ifdef HH_GOOGLETEST + EXPECT_TRUE(false); +#endif + exit(1); +} + +// Verifies every combination of implementation and input size. Returns which +// targets were run/verified. +template +TargetBits VerifyImplementations(const Result (&known_good)[kMaxSize + 1]) { + const HHKey key = {0x0706050403020100ULL, 0x0F0E0D0C0B0A0908ULL, + 0x1716151413121110ULL, 0x1F1E1D1C1B1A1918ULL}; + + TargetBits targets = ~0U; + + // For each test input: empty string, 00, 00 01, ... + char in[kMaxSize + 1] = {0}; + // Fast enough that we don't need a thread pool. + for (uint64_t size = 0; size <= kMaxSize; ++size) { + in[size] = static_cast(size); +#if PRINT_RESULTS + Result actual; + targets &= InstructionSets::Run(key, in, size, &actual); + Print(actual); +#else + const Result* expected = &known_good[size]; + targets &= InstructionSets::RunAll(key, in, size, expected, + &OnFailure); +#endif + } + return targets; +} + +// Cat + +void OnCatFailure(const char* target_name, const size_t size) { + printf("Cat mismatch at size %zu\n", size); +#ifdef HH_GOOGLETEST + EXPECT_TRUE(false); +#endif + exit(1); +} + +// Returns which targets were run/verified. +template +TargetBits VerifyCat(ThreadPool* pool) { + // Reversed order vs prior test. + const HHKey key = {0x1F1E1D1C1B1A1918ULL, 0x1716151413121110ULL, + 0x0F0E0D0C0B0A0908ULL, 0x0706050403020100ULL}; + + const size_t kMaxSize = 3 * 35; + char flat[kMaxSize]; + srand(129); + for (size_t size = 0; size < kMaxSize; ++size) { + flat[size] = static_cast(rand() & 0xFF); + } + + std::atomic targets{~0U}; + + pool->Run(0, kMaxSize, [&key, &flat, &targets](const uint32_t i) { + Result dummy; + targets.fetch_and(InstructionSets::RunAll( + key, flat, i, &dummy, &OnCatFailure)); + }); + return targets.load(); +} + +// WARNING: HighwayHash is frozen, so the golden values must not change. +const HHResult64 kExpected64[kMaxSize + 1] = { + 0x907A56DE22C26E53ull, 0x7EAB43AAC7CDDD78ull, 0xB8D0569AB0B53D62ull, + 0x5C6BEFAB8A463D80ull, 0xF205A46893007EDAull, 0x2B8A1668E4A94541ull, + 0xBD4CCC325BEFCA6Full, 0x4D02AE1738F59482ull, 0xE1205108E55F3171ull, + 0x32D2644EC77A1584ull, 0xF6E10ACDB103A90Bull, 0xC3BBF4615B415C15ull, + 0x243CC2040063FA9Cull, 0xA89A58CE65E641FFull, 0x24B031A348455A23ull, + 0x40793F86A449F33Bull, 0xCFAB3489F97EB832ull, 0x19FE67D2C8C5C0E2ull, + 0x04DD90A69C565CC2ull, 0x75D9518E2371C504ull, 0x38AD9B1141D3DD16ull, + 0x0264432CCD8A70E0ull, 0xA9DB5A6288683390ull, 0xD7B05492003F028Cull, + 0x205F615AEA59E51Eull, 0xEEE0C89621052884ull, 0x1BFC1A93A7284F4Full, + 0x512175B5B70DA91Dull, 0xF71F8976A0A2C639ull, 0xAE093FEF1F84E3E7ull, + 0x22CA92B01161860Full, 0x9FC7007CCF035A68ull, 0xA0C964D9ECD580FCull, + 0x2C90F73CA03181FCull, 0x185CF84E5691EB9Eull, 0x4FC1F5EF2752AA9Bull, + 0xF5B7391A5E0A33EBull, 0xB9B84B83B4E96C9Cull, 0x5E42FE712A5CD9B4ull, + 0xA150F2F90C3F97DCull, 0x7FA522D75E2D637Dull, 0x181AD0CC0DFFD32Bull, + 0x3889ED981E854028ull, 0xFB4297E8C586EE2Dull, 0x6D064A45BB28059Cull, + 0x90563609B3EC860Cull, 0x7AA4FCE94097C666ull, 0x1326BAC06B911E08ull, + 0xB926168D2B154F34ull, 0x9919848945B1948Dull, 0xA2A98FC534825EBEull, + 0xE9809095213EF0B6ull, 0x582E5483707BC0E9ull, 0x086E9414A88A6AF5ull, + 0xEE86B98D20F6743Dull, 0xF89B7FF609B1C0A7ull, 0x4C7D9CC19E22C3E8ull, + 0x9A97005024562A6Full, 0x5DD41CF423E6EBEFull, 0xDF13609C0468E227ull, + 0x6E0DA4F64188155Aull, 0xB755BA4B50D7D4A1ull, 0x887A3484647479BDull, + 0xAB8EEBE9BF2139A0ull, 0x75542C5D4CD2A6FFull}; + +// WARNING: HighwayHash is frozen, so the golden values must not change. +const HHResult128 kExpected128[kMaxSize + 1] = { + {0x0FED268F9D8FFEC7ull, 0x33565E767F093E6Full}, + {0xD6B0A8893681E7A8ull, 0xDC291DF9EB9CDCB4ull}, + {0x3D15AD265A16DA04ull, 0x78085638DC32E868ull}, + {0x0607621B295F0BEBull, 0xBFE69A0FD9CEDD79ull}, + {0x26399EB46DACE49Eull, 0x2E922AD039319208ull}, + {0x3250BDC386D12ED8ull, 0x193810906C63C23Aull}, + {0x6F476AB3CB896547ull, 0x7CDE576F37ED1019ull}, + {0x2A401FCA697171B4ull, 0xBE1F03FF9F02796Cull}, + {0xA1E96D84280552E8ull, 0x695CF1C63BEC0AC2ull}, + {0x142A2102F31E63B2ull, 0x1A85B98C5B5000CCull}, + {0x51A1B70E26B6BC5Bull, 0x929E1F3B2DA45559ull}, + {0x88990362059A415Bull, 0xBED21F22C47B7D13ull}, + {0xCD1F1F5F1CAF9566ull, 0xA818BA8CE0F9C8D4ull}, + {0xA225564112FE6157ull, 0xB2E94C78B8DDB848ull}, + {0xBD492FEBD1CC0919ull, 0xCECD1DBC025641A2ull}, + {0x142237A52BC4AF54ull, 0xE0796C0B6E26BCD7ull}, + {0x414460FFD5A401ADull, 0x029EA3D5019F18C8ull}, + {0xC52A4B96C51C9962ull, 0xECB878B1169B5EA0ull}, + {0xD940CA8F11FBEACEull, 0xF93A46D616F8D531ull}, + {0x8AC49D0AE5C0CBF5ull, 0x3FFDBF8DF51D7C93ull}, + {0xAC6D279B852D00A8ull, 0x7DCD3A6BA5EBAA46ull}, + {0xF11621BD93F08A56ull, 0x3173C398163DD9D5ull}, + {0x0C4CE250F68CF89Full, 0xB3123CDA411898EDull}, + {0x15AB97ED3D9A51CEull, 0x7CE274479169080Eull}, + {0xCD001E198D4845B8ull, 0xD0D9D98BD8AA2D77ull}, + {0x34F3D617A0493D79ull, 0x7DD304F6397F7E16ull}, + {0x5CB56890A9F4C6B6ull, 0x130829166567304Full}, + {0x30DA6F8B245BD1C0ull, 0x6F828B7E3FD9748Cull}, + {0xE0580349204C12C0ull, 0x93F6DA0CAC5F441Cull}, + {0xF648731BA5073045ull, 0x5FB897114FB65976ull}, + {0x024F8354738A5206ull, 0x509A4918EB7E0991ull}, + {0x06E7B465E8A57C29ull, 0x52415E3A07F5D446ull}, + {0x1984DF66C1434AAAull, 0x16FC1958F9B3E4B9ull}, + {0x111678AFE0C6C36Cull, 0xF958B59DE5A2849Dull}, + {0x773FBC8440FB0490ull, 0xC96ED5D243658536ull}, + {0x91E3DC710BB6C941ull, 0xEA336A0BC1EEACE9ull}, + {0x25CFE3815D7AD9D4ull, 0xF2E94F8C828FC59Eull}, + {0xB9FB38B83CC288F2ull, 0x7479C4C8F850EC04ull}, + {0x1D85D5C525982B8Cull, 0x6E26B1C16F48DBF4ull}, + {0x8A4E55BD6060BDE7ull, 0x2134D599058B3FD0ull}, + {0x2A958FF994778F36ull, 0xE8052D1AE61D6423ull}, + {0x89233AE6BE453233ull, 0x3ACF9C87D7E8C0B9ull}, + {0x4458F5E27EA9C8D5ull, 0x418FB49BCA2A5140ull}, + {0x090301837ED12A68ull, 0x1017F69633C861E6ull}, + {0x330DD84704D49590ull, 0x339DF1AD3A4BA6E4ull}, + {0x569363A663F2C576ull, 0x363B3D95E3C95EF6ull}, + {0xACC8D08586B90737ull, 0x2BA0E8087D4E28E9ull}, + {0x39C27A27C86D9520ull, 0x8DB620A45160932Eull}, + {0x8E6A4AEB671A072Dull, 0x6ED3561A10E47EE6ull}, + {0x0011D765B1BEC74Aull, 0xD80E6E656EDE842Eull}, + {0x2515D62B936AC64Cull, 0xCE088794D7088A7Dull}, + {0x91621552C16E23AFull, 0x264F0094EB23CCEFull}, + {0x1E21880D97263480ull, 0xD8654807D3A31086ull}, + {0x39D76AAF097F432Dull, 0xA517E1E09D074739ull}, + {0x0F17A4F337C65A14ull, 0x2F51215F69F976D4ull}, + {0xA0FB5CDA12895E44ull, 0x568C3DC4D1F13CD1ull}, + {0x93C8FC00D89C46CEull, 0xBAD5DA947E330E69ull}, + {0x817C07501D1A5694ull, 0x584D6EE72CBFAC2Bull}, + {0x91D668AF73F053BFull, 0xF98E647683C1E0EDull}, + {0x5281E1EF6B3CCF8Bull, 0xBC4CC3DF166083D8ull}, + {0xAAD61B6DBEAAEEB9ull, 0xFF969D000C16787Bull}, + {0x4325D84FC0475879ull, 0x14B919BD905F1C2Dull}, + {0x79A176D1AA6BA6D1ull, 0xF1F720C5A53A2B86ull}, + {0x74BD7018022F3EF0ull, 0x3AEA94A8AD5F4BCBull}, + {0x98BB1F7198D4C4F2ull, 0xE0BC0571DE918FC8ull}}; + +// WARNING: HighwayHash is frozen, so the golden values must not change. +const HHResult256 kExpected256[kMaxSize + 1] = { + {0xDD44482AC2C874F5ull, 0xD946017313C7351Full, 0xB3AEBECCB98714FFull, + 0x41DA233145751DF4ull}, + {0xEDB941BCE45F8254ull, 0xE20D44EF3DCAC60Full, 0x72651B9BCB324A47ull, + 0x2073624CB275E484ull}, + {0x3FDFF9DF24AFE454ull, 0x11C4BF1A1B0AE873ull, 0x115169CC6922597Aull, + 0x1208F6590D33B42Cull}, + {0x480AA0D70DD1D95Cull, 0x89225E7C6911D1D0ull, 0x8EA8426B8BBB865Aull, + 0xE23DFBC390E1C722ull}, + {0xC9CFC497212BE4DCull, 0xA85F9DF6AFD2929Bull, 0x1FDA9F211DF4109Eull, + 0x07E4277A374D4F9Bull}, + {0xB4B4F566A4DC85B3ull, 0xBF4B63BA5E460142ull, 0x15F48E68CDDC1DE3ull, + 0x0F74587D388085C6ull}, + {0x6445C70A86ADB9B4ull, 0xA99CFB2784B4CEB6ull, 0xDAE29D40A0B2DB13ull, + 0xB6526DF29A9D1170ull}, + {0xD666B1A00987AD81ull, 0xA4F1F838EB8C6D37ull, 0xE9226E07D463E030ull, + 0x5754D67D062C526Cull}, + {0xF1B905B0ED768BC0ull, 0xE6976FF3FCFF3A45ull, 0x4FBE518DD9D09778ull, + 0xD9A0AFEB371E0D33ull}, + {0x80D8E4D70D3C2981ull, 0xF10FBBD16424F1A1ull, 0xCF5C2DBE9D3F0CD1ull, + 0xC0BFE8F701B673F2ull}, + {0xADE48C50E5A262BEull, 0x8E9492B1FDFE38E0ull, 0x0784B74B2FE9B838ull, + 0x0E41D574DB656DCDull}, + {0xA1BE77B9531807CFull, 0xBA97A7DE6A1A9738ull, 0xAF274CEF9C8E261Full, + 0x3E39B935C74CE8E8ull}, + {0x15AD3802E3405857ull, 0x9D11CBDC39E853A0ull, 0x23EA3E993C31B225ull, + 0x6CD9E9E3CAF4212Eull}, + {0x01C96F5EB1D77C36ull, 0xA367F9C1531F95A6ull, 0x1F94A3427CDADCB8ull, + 0x97F1000ABF3BD5D3ull}, + {0x0815E91EEEFF8E41ull, 0x0E0C28FA6E21DF5Dull, 0x4EAD8E62ED095374ull, + 0x3FFD01DA1C9D73E6ull}, + {0xC11905707842602Eull, 0x62C3DB018501B146ull, 0x85F5AD17FA3406C1ull, + 0xC884F87BD4FEC347ull}, + {0xF51AD989A1B6CD1Full, 0xF7F075D62A627BD9ull, 0x7E01D5F579F28A06ull, + 0x1AD415C16A174D9Full}, + {0x19F4CFA82CA4068Eull, 0x3B9D4ABD3A9275B9ull, 0x8000B0DDE9C010C6ull, + 0x8884D50949215613ull}, + {0x126D6C7F81AB9F5Dull, 0x4EDAA3C5097716EEull, 0xAF121573A7DD3E49ull, + 0x9001AC85AA80C32Dull}, + {0x06AABEF9149155FAull, 0xDF864F4144E71C3Dull, 0xFDBABCE860BC64DAull, + 0xDE2BA54792491CB6ull}, + {0xADFC6B4035079FDBull, 0xA087B7328E486E65ull, 0x46D1A9935A4623EAull, + 0xE3895C440D3CEE44ull}, + {0xB5F9D31DEEA3B3DFull, 0x8F3024E20A06E133ull, 0xF24C38C8288FE120ull, + 0x703F1DCF9BD69749ull}, + {0x2B3C0B854794EFE3ull, 0x1C5D3F969BDACEA0ull, 0x81F16AAFA563AC2Eull, + 0x23441C5A79D03075ull}, + {0x418AF8C793FD3762ull, 0xBC6B8E9461D7F924ull, 0x776FF26A2A1A9E78ull, + 0x3AA0B7BFD417CA6Eull}, + {0xCD03EA2AD255A3C1ull, 0x0185FEE5B59C1B2Aull, 0xD1F438D44F9773E4ull, + 0xBE69DD67F83B76E4ull}, + {0xF951A8873887A0FBull, 0x2C7B31D2A548E0AEull, 0x44803838B6186EFAull, + 0xA3C78EC7BE219F72ull}, + {0x958FF151EA0D8C08ull, 0x4B7E8997B4F63488ull, 0xC78E074351C5386Dull, + 0xD95577556F20EEFAull}, + {0x29A917807FB05406ull, 0x3318F884351F578Cull, 0xDD24EA6EF6F6A7FAull, + 0xE74393465E97AEFFull}, + {0x98240880935E6CCBull, 0x1FD0D271B09F97DAull, 0x56E786472700B183ull, + 0x291649F99F747817ull}, + {0x1BD4954F7054C556ull, 0xFFDB2EFF7C596CEBull, 0x7C6AC69A1BAB6B5Bull, + 0x0F037670537FC153ull}, + {0x8825E38897597498ull, 0x647CF6EBAF6332C1ull, 0x552BD903DC28C917ull, + 0x72D7632C00BFC5ABull}, + {0x6880E276601A644Dull, 0xB3728B20B10FB7DAull, 0xD0BD12060610D16Eull, + 0x8AEF14EF33452EF2ull}, + {0xBCE38C9039A1C3FEull, 0x42D56326A3C11289ull, 0xE35595F764FCAEA9ull, + 0xC9B03C6BC9475A99ull}, + {0xF60115CBF034A6E5ull, 0x6C36EA75BFCE46D0ull, 0x3B17C8D382725990ull, + 0x7EDAA2ED11007A35ull}, + {0x1326E959EDF9DEA2ull, 0xC4776801739F720Cull, 0x5169500FD762F62Full, + 0x8A0DD0D90A2529ABull}, + {0x935149D503D442D4ull, 0xFF6BB41302DAD144ull, 0x339CB012CD9D36ECull, + 0xE61D53619ECC2230ull}, + {0x528BC888AA50B696ull, 0xB8AEECA36084E1FCull, 0xA158151EC0243476ull, + 0x02C14AAD097CEC44ull}, + {0xBED688A72217C327ull, 0x1EE65114F760873Full, 0x3F5C26B37D3002A6ull, + 0xDDF2E895631597B9ull}, + {0xE7DB21CF2B0B51ADull, 0xFAFC6324F4B0AB6Cull, 0xB0857244C22D9C5Bull, + 0xF0AD888D1E05849Cull}, + {0x05519793CD4DCB00ull, 0x3C594A3163067DEBull, 0xAC75081ACF119E34ull, + 0x5AC86297805CB094ull}, + {0x09228D8C22B5779Eull, 0x19644DB2516B7E84ull, 0x2B92C8ABF83141A0ull, + 0x7F785AD725E19391ull}, + {0x59C42E5D46D0A74Bull, 0x5EA53C65CA036064ull, 0x48A9916BB635AEB4ull, + 0xBAE6DF143F54E9D4ull}, + {0x5EB623696D03D0E3ull, 0xD53D78BCB41DA092ull, 0xFE2348DC52F6B10Dull, + 0x64802457632C8C11ull}, + {0x43B61BB2C4B85481ull, 0xC6318C25717E80A1ull, 0x8C4A7F4D6F9C687Dull, + 0xBD0217E035401D7Cull}, + {0x7F51CA5743824C37ull, 0xB04C4D5EB11D703Aull, 0x4D511E1ECBF6F369ull, + 0xD66775EA215456E2ull}, + {0x39B409EEF87E45CCull, 0x52B8E8C459FC79B3ull, 0x44920918D1858C24ull, + 0x80F07B645EEE0149ull}, + {0xCE8694D1BE9AD514ull, 0xBFA19026526836E7ull, 0x1EA4FDF6E4902A7Dull, + 0x380C4458D696E1FEull}, + {0xD189E18BF823A0A4ull, 0x1F3B353BE501A7D7ull, 0xA24F77B4E02E2884ull, + 0x7E94646F74F9180Cull}, + {0xAFF8C635D325EC48ull, 0x2C2E0AA414038D0Bull, 0x4ED37F611A447467ull, + 0x39EC38E33B501489ull}, + {0x2A2BFDAD5F83F197ull, 0x013D3E6EBEF274CCull, 0xE1563C0477726155ull, + 0xF15A8A5DE932037Eull}, + {0xD5D1F91EC8126332ull, 0x10110B9BF9B1FF11ull, 0xA175AB26541C6032ull, + 0x87BADC5728701552ull}, + {0xC7B5A92CD8082884ull, 0xDDA62AB61B2EEEFBull, 0x8F9882ECFEAE732Full, + 0x6B38BD5CC01F4FFBull}, + {0xCF6EF275733D32F0ull, 0xA3F0822DA2BF7D8Bull, 0x304E7435F512406Aull, + 0x0B28E3EFEBB3172Dull}, + {0xE698F80701B2E9DBull, 0x66AE2A819A8A8828ull, 0x14EA9024C9B8F2C9ull, + 0xA7416170523EB5A4ull}, + {0x3A917E87E307EDB7ull, 0x17B4DEDAE34452C1ull, 0xF689F162E711CC70ull, + 0x29CE6BFE789CDD0Eull}, + {0x0EFF3AD8CB155D8Eull, 0x47CD9EAD4C0844A2ull, 0x46C8E40EE6FE21EBull, + 0xDEF3C25DF0340A51ull}, + {0x03FD86E62B82D04Dull, 0x32AB0D600717136Dull, 0x682B0E832B857A89ull, + 0x138CE3F1443739B1ull}, + {0x2F77C754C4D7F902ull, 0x1053E0A9D9ADBFEAull, 0x58E66368544AE70Aull, + 0xC48A829C72DD83CAull}, + {0xF900EB19E466A09Full, 0x31BE9E01A8C7D314ull, 0x3AFEC6B8CA08F471ull, + 0xB8C0EB0F87FFE7FBull}, + {0xDB277D8FBE3C8EFBull, 0x53CE6877E11AA57Bull, 0x719C94D20D9A7E7Dull, + 0xB345B56392453CC9ull}, + {0x37639C3BDBA4F2C9ull, 0x6095E7B336466DC8ull, 0x3A8049791E65B88Aull, + 0x82C988CDE5927CD5ull}, + {0x6B1FB1A714234AE4ull, 0x20562E255BA6467Eull, 0x3E2B892D40F3D675ull, + 0xF40CE3FBE41ED768ull}, + {0x8EE11CB1B287C92Aull, 0x8FC2AAEFF63D266Dull, 0x66643487E6EB9F03ull, + 0x578AA91DE8D56873ull}, + {0xF5B1F8266A3AEB67ull, 0x83B040BE4DEC1ADDull, 0x7FE1C8635B26FBAEull, + 0xF4A3A447DEFED79Full}, + {0x90D8E6FF6AC12475ull, 0x1A422A196EDAC1F2ull, 0x9E3765FE1F8EB002ull, + 0xC1BDD7C4C351CFBEull}}; + +void RunTests() { + // TODO(janwas): detect number of cores. + ThreadPool pool(4); + + TargetBits tested = ~0U; + tested &= VerifyImplementations(kExpected64); + tested &= VerifyImplementations(kExpected128); + tested &= VerifyImplementations(kExpected256); + // Any failure causes immediate exit, so apparently all succeeded. + HH_TARGET_NAME::ForeachTarget(tested, [](const TargetBits target) { + printf("%10s: OK\n", TargetName(target)); + }); + + tested = ~0U; + tested &= VerifyCat(&pool); + tested &= VerifyCat(&pool); + tested &= VerifyCat(&pool); + HH_TARGET_NAME::ForeachTarget(tested, [](const TargetBits target) { + printf("%10sCat: OK\n", TargetName(target)); + }); +} + +#ifdef HH_GOOGLETEST +TEST(HighwayhashTest, OutputMatchesExpectations) { RunTests(); } +#endif + +} // namespace +} // namespace highwayhash + +#ifndef HH_GOOGLETEST +int main(int argc, char* argv[]) { + highwayhash::RunTests(); + return 0; +} +#endif diff --git a/highwayhash/highwayhash/highwayhash_test_avx2.cc b/highwayhash/highwayhash/highwayhash_test_avx2.cc new file mode 100644 index 000000000..f1efe0b5f --- /dev/null +++ b/highwayhash/highwayhash/highwayhash_test_avx2.cc @@ -0,0 +1,19 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// WARNING: this is a "restricted" source file; avoid including any headers +// unless they are also restricted. See arch_specific.h for details. + +#define HH_TARGET_NAME AVX2 +#include "highwayhash/highwayhash_test_target.cc" diff --git a/highwayhash/highwayhash/highwayhash_test_neon.cc b/highwayhash/highwayhash/highwayhash_test_neon.cc new file mode 100644 index 000000000..df5058829 --- /dev/null +++ b/highwayhash/highwayhash/highwayhash_test_neon.cc @@ -0,0 +1,22 @@ +// Copyright 2017-2019 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// WARNING: this is a "restricted" source file; avoid including any headers +// unless they are also restricted. See arch_specific.h for details. + +#define HH_TARGET_NAME NEON +// GCC 4.5.4 only defines the former; 5.4 defines both. +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +#include "highwayhash/highwayhash_test_target.cc" +#endif diff --git a/highwayhash/highwayhash/highwayhash_test_portable.cc b/highwayhash/highwayhash/highwayhash_test_portable.cc new file mode 100644 index 000000000..04930a7e1 --- /dev/null +++ b/highwayhash/highwayhash/highwayhash_test_portable.cc @@ -0,0 +1,19 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// WARNING: this is a "restricted" source file; avoid including any headers +// unless they are also restricted. See arch_specific.h for details. + +#define HH_TARGET_NAME Portable +#include "highwayhash/highwayhash_test_target.cc" diff --git a/highwayhash/highwayhash/highwayhash_test_sse41.cc b/highwayhash/highwayhash/highwayhash_test_sse41.cc new file mode 100644 index 000000000..2d6e83d66 --- /dev/null +++ b/highwayhash/highwayhash/highwayhash_test_sse41.cc @@ -0,0 +1,19 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// WARNING: this is a "restricted" source file; avoid including any headers +// unless they are also restricted. See arch_specific.h for details. + +#define HH_TARGET_NAME SSE41 +#include "highwayhash/highwayhash_test_target.cc" diff --git a/highwayhash/highwayhash/highwayhash_test_target.cc b/highwayhash/highwayhash/highwayhash_test_target.cc new file mode 100644 index 000000000..65afd4e91 --- /dev/null +++ b/highwayhash/highwayhash/highwayhash_test_target.cc @@ -0,0 +1,220 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// WARNING: this is a "restricted" source file; avoid including any headers +// unless they are also restricted. See arch_specific.h for details. + +#include "highwayhash/highwayhash_test_target.h" + +#include "highwayhash/highwayhash.h" + +#ifndef HH_DISABLE_TARGET_SPECIFIC +namespace highwayhash { +namespace { + +void NotifyIfUnequal(const size_t size, const HHResult64& expected, + const HHResult64& actual, const HHNotify notify) { + if (expected != actual) { + (*notify)(TargetName(HH_TARGET), size); + } +} + +// Overload for HHResult128 or HHResult256 (arrays). +template +void NotifyIfUnequal(const size_t size, const uint64_t (&expected)[kNumLanes], + const uint64_t (&actual)[kNumLanes], + const HHNotify notify) { + for (size_t i = 0; i < kNumLanes; ++i) { + if (expected[i] != actual[i]) { + (*notify)(TargetName(HH_TARGET), size); + return; + } + } +} + +// Shared logic for all HighwayHashTest::operator() overloads. +template +void TestHighwayHash(HHStateT* HH_RESTRICT state, + const char* HH_RESTRICT bytes, const size_t size, + const Result* expected, const HHNotify notify) { + // TODO(janwas): investigate (length=33) +#if HH_TARGET == HH_TARGET_Portable && HH_GCC_VERSION && !HH_CLANG_VERSION + return; +#endif + Result actual; + HighwayHashT(state, bytes, size, &actual); + NotifyIfUnequal(size, *expected, actual, notify); +} + +// Shared logic for all HighwayHashCatTest::operator() overloads. +template +void TestHighwayHashCat(const HHKey& key, const char* HH_RESTRICT bytes, + const size_t size, const Result* expected, + const HHNotify notify) { + // TODO(janwas): investigate (length=33) +#if HH_TARGET == HH_TARGET_Portable && HH_GCC_VERSION && !HH_CLANG_VERSION + return; +#endif + + // Slightly faster to compute the expected prefix hashes only once. + // Use new instead of vector to avoid headers with inline functions. + Result* results = new Result[size + 1]; + for (size_t i = 0; i <= size; ++i) { + HHStateT state_flat(key); + HighwayHashT(&state_flat, bytes, i, &results[i]); + } + + // Splitting into three fragments/Append should cover all codepaths. + const size_t max_fragment_size = size / 3; + for (size_t size1 = 0; size1 < max_fragment_size; ++size1) { + for (size_t size2 = 0; size2 < max_fragment_size; ++size2) { + for (size_t size3 = 0; size3 < max_fragment_size; ++size3) { + HighwayHashCatT cat(key); + const char* pos = bytes; + cat.Append(pos, size1); + pos += size1; + cat.Append(pos, size2); + pos += size2; + cat.Append(pos, size3); + pos += size3; + + Result result_cat; + cat.Finalize(&result_cat); + + const size_t total_size = pos - bytes; + NotifyIfUnequal(total_size, results[total_size], result_cat, notify); + } + } + } + + delete[] results; +} + +} // namespace + +template +void HighwayHashTest::operator()(const HHKey& key, + const char* HH_RESTRICT bytes, + const size_t size, + const HHResult64* expected, + const HHNotify notify) const { + HHStateT state(key); + TestHighwayHash(&state, bytes, size, expected, notify); +} + +template +void HighwayHashTest::operator()(const HHKey& key, + const char* HH_RESTRICT bytes, + const size_t size, + const HHResult128* expected, + const HHNotify notify) const { + HHStateT state(key); + TestHighwayHash(&state, bytes, size, expected, notify); +} + +template +void HighwayHashTest::operator()(const HHKey& key, + const char* HH_RESTRICT bytes, + const size_t size, + const HHResult256* expected, + const HHNotify notify) const { + HHStateT state(key); + TestHighwayHash(&state, bytes, size, expected, notify); +} + +template +void HighwayHashCatTest::operator()(const HHKey& key, + const char* HH_RESTRICT bytes, + const uint64_t size, + const HHResult64* expected, + const HHNotify notify) const { + TestHighwayHashCat(key, bytes, size, expected, notify); +} + +template +void HighwayHashCatTest::operator()(const HHKey& key, + const char* HH_RESTRICT bytes, + const uint64_t size, + const HHResult128* expected, + const HHNotify notify) const { + TestHighwayHashCat(key, bytes, size, expected, notify); +} + +template +void HighwayHashCatTest::operator()(const HHKey& key, + const char* HH_RESTRICT bytes, + const uint64_t size, + const HHResult256* expected, + const HHNotify notify) const { + TestHighwayHashCat(key, bytes, size, expected, notify); +} + +// Instantiate for the current target. +template struct HighwayHashTest; +template struct HighwayHashCatTest; + +//----------------------------------------------------------------------------- +// benchmark + +namespace { + +template +uint64_t RunHighway(const void*, const size_t size) { + HH_ALIGNAS(32) static const HHKey key = {0, 1, 2, 3}; + char in[kMaxBenchmarkInputSize]; + in[0] = static_cast(size & 0xFF); + HHResult64 result; + HHStateT state(key); + HighwayHashT(&state, in, size, &result); + return result; +} + +template +uint64_t RunHighwayCat(const void*, const size_t size) { + HH_ALIGNAS(32) static const HHKey key = {0, 1, 2, 3}; + HH_ALIGNAS(64) HighwayHashCatT cat(key); + char in[kMaxBenchmarkInputSize]; + in[0] = static_cast(size & 0xFF); + const size_t half_size = size / 2; + cat.Append(in, half_size); + cat.Append(in + half_size, size - half_size); + HHResult64 result; + cat.Finalize(&result); + return result; +} + +} // namespace + +template +void HighwayHashBenchmark::operator()(DurationsForInputs* input_map, + NotifyBenchmark notify, + void* context) const { + MeasureDurations(&RunHighway, input_map); + notify("HighwayHash", TargetName(Target), input_map, context); +} + +template +void HighwayHashCatBenchmark::operator()(DurationsForInputs* input_map, + NotifyBenchmark notify, + void* context) const { + MeasureDurations(&RunHighwayCat, input_map); + notify("HighwayHashCat", TargetName(Target), input_map, context); +} + +// Instantiate for the current target. +template struct HighwayHashBenchmark; +template struct HighwayHashCatBenchmark; + +} // namespace highwayhash +#endif // HH_DISABLE_TARGET_SPECIFIC diff --git a/highwayhash/highwayhash/highwayhash_test_target.h b/highwayhash/highwayhash/highwayhash_test_target.h new file mode 100644 index 000000000..56ae960ba --- /dev/null +++ b/highwayhash/highwayhash/highwayhash_test_target.h @@ -0,0 +1,90 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef HIGHWAYHASH_HIGHWAYHASH_TEST_TARGET_H_ +#define HIGHWAYHASH_HIGHWAYHASH_TEST_TARGET_H_ + +// Tests called by InstructionSets::RunAll, so we can verify all +// implementations supported by the current CPU. + +// WARNING: this is a "restricted" header because it is included from +// translation units compiled with different flags. This header and its +// dependencies must not define any function unless it is static inline and/or +// within namespace HH_TARGET_NAME. See arch_specific.h for details. + +#include + +#include "highwayhash/arch_specific.h" +#include "highwayhash/compiler_specific.h" +#include "highwayhash/hh_types.h" +#include "highwayhash/highwayhash.h" +#include "highwayhash/nanobenchmark.h" + +namespace highwayhash { + +// Verifies the hash result matches "expected" and calls "notify" if not. +template +struct HighwayHashTest { + void operator()(const HHKey& key, const char* HH_RESTRICT bytes, + const size_t size, const HHResult64* expected, + const HHNotify notify) const; + void operator()(const HHKey& key, const char* HH_RESTRICT bytes, + const size_t size, const HHResult128* expected, + const HHNotify notify) const; + void operator()(const HHKey& key, const char* HH_RESTRICT bytes, + const size_t size, const HHResult256* expected, + const HHNotify notify) const; +}; + +// For every possible partition of "bytes" into zero to three fragments, +// verifies HighwayHashCat returns the same result as HighwayHashT of the +// concatenated fragments, and calls "notify" if not. The value of "expected" +// is ignored; it is only used for overloading. +template +struct HighwayHashCatTest { + void operator()(const HHKey& key, const char* HH_RESTRICT bytes, + const uint64_t size, const HHResult64* expected, + const HHNotify notify) const; + void operator()(const HHKey& key, const char* HH_RESTRICT bytes, + const uint64_t size, const HHResult128* expected, + const HHNotify notify) const; + void operator()(const HHKey& key, const char* HH_RESTRICT bytes, + const uint64_t size, const HHResult256* expected, + const HHNotify notify) const; +}; + +// Called by benchmark with prefix, target_name, input_map, context. +// This function must set input_map->num_items to 0. +using NotifyBenchmark = void (*)(const char*, const char*, DurationsForInputs*, + void*); + +constexpr size_t kMaxBenchmarkInputSize = 1024; + +// Calls "notify" with benchmark results for the input sizes specified by +// "input_map" (<= kMaxBenchmarkInputSize) plus a "context" parameter. +template +struct HighwayHashBenchmark { + void operator()(DurationsForInputs* input_map, NotifyBenchmark notify, + void* context) const; +}; + +template +struct HighwayHashCatBenchmark { + void operator()(DurationsForInputs* input_map, NotifyBenchmark notify, + void* context) const; +}; + +} // namespace highwayhash + +#endif // HIGHWAYHASH_HIGHWAYHASH_TEST_TARGET_H_ diff --git a/highwayhash/highwayhash/highwayhash_test_vsx.cc b/highwayhash/highwayhash/highwayhash_test_vsx.cc new file mode 100644 index 000000000..224a65efe --- /dev/null +++ b/highwayhash/highwayhash/highwayhash_test_vsx.cc @@ -0,0 +1,22 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// WARNING: this is a "restricted" source file; avoid including any headers +// unless they are also restricted. See arch_specific.h for details. + +#define HH_TARGET_NAME VSX + +#ifdef __VSX__ +#include "highwayhash/highwayhash_test_target.cc" +#endif diff --git a/highwayhash/highwayhash/iaca.h b/highwayhash/highwayhash/iaca.h new file mode 100644 index 000000000..80e1013ae --- /dev/null +++ b/highwayhash/highwayhash/iaca.h @@ -0,0 +1,63 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef HIGHWAYHASH_IACA_H_ +#define HIGHWAYHASH_IACA_H_ + +// WARNING: this is a "restricted" header because it is included from +// translation units compiled with different flags. This header and its +// dependencies must not define any function unless it is static inline and/or +// within namespace HH_TARGET_NAME. See arch_specific.h for details. + +#include "highwayhash/compiler_specific.h" + +// IACA (Intel's Code Analyzer, go/intel-iaca) analyzes instruction latencies, +// but only for code between special markers. These functions embed such markers +// in an executable, but only for reading via IACA - they deliberately trigger +// a crash if executed to ensure they are removed in normal builds. + +// Default off; callers must `#define HH_ENABLE_IACA 1` before including this. +#ifndef HH_ENABLE_IACA +#define HH_ENABLE_IACA 0 +#endif + +namespace highwayhash { + +#if HH_ENABLE_IACA && (HH_GCC_VERSION || HH_CLANG_VERSION) + +// Call before the region of interest. Fences hopefully prevent reordering. +static HH_INLINE void BeginIACA() { + HH_COMPILER_FENCE; + asm volatile( + ".byte 0x0F, 0x0B\n\t" // UD2 + "movl $111, %ebx\n\t" + ".byte 0x64, 0x67, 0x90\n\t"); + HH_COMPILER_FENCE; +} + +// Call after the region of interest. Fences hopefully prevent reordering. +static HH_INLINE void EndIACA() { + HH_COMPILER_FENCE; + asm volatile( + "movl $222, %ebx\n\t" + ".byte 0x64, 0x67, 0x90\n\t" + ".byte 0x0F, 0x0B\n\t"); // UD2 + HH_COMPILER_FENCE; +} + +#endif + +} // namespace highwayhash + +#endif // HIGHWAYHASH_IACA_H_ diff --git a/highwayhash/highwayhash/instruction_sets.cc b/highwayhash/highwayhash/instruction_sets.cc new file mode 100644 index 000000000..ab6775b10 --- /dev/null +++ b/highwayhash/highwayhash/instruction_sets.cc @@ -0,0 +1,144 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "highwayhash/instruction_sets.h" +#include "highwayhash/arch_specific.h" + +// Currently there are only specialized targets for X64; other architectures +// only use HH_TARGET_Portable, in which case Supported() just returns that. +#if HH_ARCH_X64 + +#include + +namespace highwayhash { + +namespace { + +bool IsBitSet(const uint32_t reg, const int index) { + return (reg & (1U << index)) != 0; +} + +// Returns the lower 32 bits of extended control register 0. +// Requires CPU support for "OSXSAVE" (see below). +uint32_t ReadXCR0() { +#if HH_MSC_VERSION + return static_cast(_xgetbv(0)); +#else + uint32_t xcr0, xcr0_high; + const uint32_t index = 0; + asm volatile(".byte 0x0F, 0x01, 0xD0" + : "=a"(xcr0), "=d"(xcr0_high) + : "c"(index)); + return xcr0; +#endif +} + +// 0 iff not yet initialized by Supported(). +// Not function-local => no compiler-generated locking. +std::atomic supported_{0}; + +// Bits indicating which instruction set extensions are supported. +enum { + kBitSSE = 1 << 0, + kBitSSE2 = 1 << 1, + kBitSSE3 = 1 << 2, + kBitSSSE3 = 1 << 3, + kBitSSE41 = 1 << 4, + kBitSSE42 = 1 << 5, + kBitAVX = 1 << 6, + kBitAVX2 = 1 << 7, + kBitFMA = 1 << 8, + kBitLZCNT = 1 << 9, + kBitBMI = 1 << 10, + kBitBMI2 = 1 << 11, + + kGroupAVX2 = kBitAVX | kBitAVX2 | kBitFMA | kBitLZCNT | kBitBMI | kBitBMI2, + kGroupSSE41 = kBitSSE | kBitSSE2 | kBitSSE3 | kBitSSSE3 | kBitSSE41 +}; + +} // namespace + +TargetBits InstructionSets::Supported() { + TargetBits supported = supported_.load(std::memory_order_acquire); + // Already initialized, return that. + if (HH_LIKELY(supported)) { + return supported; + } + + uint32_t flags = 0; + uint32_t abcd[4]; + + Cpuid(0, 0, abcd); + const uint32_t max_level = abcd[0]; + + // Standard feature flags + Cpuid(1, 0, abcd); + flags |= IsBitSet(abcd[3], 25) ? kBitSSE : 0; + flags |= IsBitSet(abcd[3], 26) ? kBitSSE2 : 0; + flags |= IsBitSet(abcd[2], 0) ? kBitSSE3 : 0; + flags |= IsBitSet(abcd[2], 9) ? kBitSSSE3 : 0; + flags |= IsBitSet(abcd[2], 19) ? kBitSSE41 : 0; + flags |= IsBitSet(abcd[2], 20) ? kBitSSE42 : 0; + flags |= IsBitSet(abcd[2], 12) ? kBitFMA : 0; + flags |= IsBitSet(abcd[2], 28) ? kBitAVX : 0; + const bool has_xsave = IsBitSet(abcd[2], 26); + const bool has_osxsave = IsBitSet(abcd[2], 27); + + // Extended feature flags + Cpuid(0x80000001U, 0, abcd); + flags |= IsBitSet(abcd[2], 5) ? kBitLZCNT : 0; + + // Extended features + if (max_level >= 7) { + Cpuid(7, 0, abcd); + flags |= IsBitSet(abcd[1], 3) ? kBitBMI : 0; + flags |= IsBitSet(abcd[1], 5) ? kBitAVX2 : 0; + flags |= IsBitSet(abcd[1], 8) ? kBitBMI2 : 0; + } + + // Verify OS support for XSAVE, without which XMM/YMM registers are not + // preserved across context switches and are not safe to use. + if (has_xsave && has_osxsave) { + const uint32_t xcr0 = ReadXCR0(); + // XMM/YMM + if ((xcr0 & 2) == 0 || (xcr0 & 4) == 0) { + flags &= ~(kBitAVX | kBitAVX2); + } + } else { + // Clear the AVX/AVX2 bits if the CPU or OS does not support XSAVE. + // + // The lower 128 bits of XMM0-XMM15 are guaranteed to be preserved across + // context switches on x86_64 and any modern 32-bit system, so only AVX2 + // needs to be disabled. + flags &= ~(kBitAVX | kBitAVX2); + } + + // Also indicates "supported" has been initialized. + supported = HH_TARGET_Portable; + + // Set target bit(s) if all their group's flags are all set. + if ((flags & kGroupAVX2) == kGroupAVX2) { + supported |= HH_TARGET_AVX2; + } + if ((flags & kGroupSSE41) == kGroupSSE41) { + supported |= HH_TARGET_SSE41; + } + + supported_.store(supported, std::memory_order_release); + return supported; +} + +} // namespace highwayhash + +#endif // HH_ARCH_X64 diff --git a/highwayhash/highwayhash/instruction_sets.h b/highwayhash/highwayhash/instruction_sets.h new file mode 100644 index 000000000..aa7bd6b3f --- /dev/null +++ b/highwayhash/highwayhash/instruction_sets.h @@ -0,0 +1,118 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef HIGHWAYHASH_INSTRUCTION_SETS_H_ +#define HIGHWAYHASH_INSTRUCTION_SETS_H_ + +// Calls the best specialization of a template supported by the current CPU. +// +// Usage: for each dispatch site, declare a Functor template with a 'Target' +// argument, add a source file defining its operator() and instantiating +// Functor, add a cc_library_for_targets rule for that source file, +// and call InstructionSets::Run(/*args*/). + +#include // std::forward + +#include "highwayhash/arch_specific.h" // HH_TARGET_* +#include "highwayhash/compiler_specific.h" + +namespace highwayhash { + +// Detects TargetBits and calls specializations of a user-defined functor. +class InstructionSets { + public: +// Returns bit array of HH_TARGET_* supported by the current CPU. +// The HH_TARGET_Portable bit is guaranteed to be set. +#if HH_ARCH_X64 + static TargetBits Supported(); +#elif HH_ARCH_PPC + static HH_INLINE TargetBits Supported() { + return HH_TARGET_VSX | HH_TARGET_Portable; + } +#elif HH_ARCH_NEON + static HH_INLINE TargetBits Supported() { + return HH_TARGET_NEON | HH_TARGET_Portable; + } +#else + static HH_INLINE TargetBits Supported() { return HH_TARGET_Portable; } +#endif + + // Chooses the best available "Target" for the current CPU, runs the + // corresponding Func::operator()(args) and returns that Target + // (a single bit). The overhead of dispatching is low, about 4 cycles, but + // this should only be called infrequently (e.g. hoisting it out of loops). + template