From a7e7b0237e286b8e92100d89785223bcd5158e21 Mon Sep 17 00:00:00 2001 From: Kannan Goundan Date: Thu, 20 Dec 2018 00:40:37 -0800 Subject: [PATCH] use optimized implementations from XKCP (#8) * src/libkeccak: Use XKCP's optimized implementations It looks like we were using the reference C implementation, which is not designed for good performance. Switch to using either the 32-bit or 64-bit optimized C implementations, depending on the Node "arch" variable. * Remove submodule KeccakCodePackage * Bump minimum node version to 6.0.0 * Small improvements in src/README.md * Add preprocessor in src/addon.cc * Add submodle github.com/XKCP/XKCP for test vectors --- .gitmodules | 6 +- binding.gyp | 34 +- package.json | 3 +- src/README.md | 28 + src/addon.cc | 25 +- .../KeccakP-1600-SnP.h | 21 +- src/libkeccak-32/KeccakP-1600-inplace32BI.c | 1162 +++++++++++++++++ src/libkeccak-32/KeccakSponge-common.h | 35 + .../KeccakSponge.inc | 14 +- src/libkeccak-32/KeccakSpongeWidth1600.c | 54 + src/libkeccak-32/KeccakSpongeWidth1600.h | 31 + src/libkeccak-32/SnP-Relaned.h | 140 ++ src/{libkeccak => libkeccak-32}/align.h | 12 +- src/{libkeccak => libkeccak-32}/brg_endian.h | 15 +- src/libkeccak-64/KeccakP-1600-64.macros | 745 +++++++++++ src/libkeccak-64/KeccakP-1600-SnP.h | 51 + src/libkeccak-64/KeccakP-1600-opt64-config.h | 6 + src/libkeccak-64/KeccakP-1600-opt64.c | 564 ++++++++ .../KeccakP-1600-unrolling.macros | 302 +++++ src/libkeccak-64/KeccakSponge-common.h | 35 + src/libkeccak-64/KeccakSponge.inc | 311 +++++ src/libkeccak-64/KeccakSpongeWidth1600.c | 54 + src/libkeccak-64/KeccakSpongeWidth1600.h | 31 + src/libkeccak-64/SnP-Relaned.h | 140 ++ src/libkeccak-64/align.h | 32 + src/libkeccak-64/brg_endian.h | 143 ++ src/libkeccak/KeccakP-1600-reference.c | 397 ------ src/libkeccak/KeccakP-1600-reference.h | 22 - src/libkeccak/KeccakSponge.c | 110 -- src/libkeccak/KeccakSponge.h | 178 --- test/vectors-sha3.js | 2 +- test/vectors-shake.js | 2 +- util/KeccakCodePackage | 1 - util/XKCP | 1 + util/libkeccak.sh | 27 - 35 files changed, 3950 insertions(+), 784 deletions(-) create mode 100644 src/README.md rename src/{libkeccak => libkeccak-32}/KeccakP-1600-SnP.h (75%) create mode 100644 src/libkeccak-32/KeccakP-1600-inplace32BI.c create mode 100644 src/libkeccak-32/KeccakSponge-common.h rename src/{libkeccak => libkeccak-32}/KeccakSponge.inc (97%) create mode 100644 src/libkeccak-32/KeccakSpongeWidth1600.c create mode 100644 src/libkeccak-32/KeccakSpongeWidth1600.h create mode 100644 src/libkeccak-32/SnP-Relaned.h rename src/{libkeccak => libkeccak-32}/align.h (71%) rename src/{libkeccak => libkeccak-32}/brg_endian.h (88%) create mode 100644 src/libkeccak-64/KeccakP-1600-64.macros create mode 100644 src/libkeccak-64/KeccakP-1600-SnP.h create mode 100644 src/libkeccak-64/KeccakP-1600-opt64-config.h create mode 100644 src/libkeccak-64/KeccakP-1600-opt64.c create mode 100644 src/libkeccak-64/KeccakP-1600-unrolling.macros create mode 100644 src/libkeccak-64/KeccakSponge-common.h create mode 100644 src/libkeccak-64/KeccakSponge.inc create mode 100644 src/libkeccak-64/KeccakSpongeWidth1600.c create mode 100644 src/libkeccak-64/KeccakSpongeWidth1600.h create mode 100644 src/libkeccak-64/SnP-Relaned.h create mode 100644 src/libkeccak-64/align.h create mode 100644 src/libkeccak-64/brg_endian.h delete mode 100644 src/libkeccak/KeccakP-1600-reference.c delete mode 100644 src/libkeccak/KeccakP-1600-reference.h delete mode 100644 src/libkeccak/KeccakSponge.c delete mode 100644 src/libkeccak/KeccakSponge.h delete mode 160000 util/KeccakCodePackage create mode 160000 util/XKCP delete mode 100755 util/libkeccak.sh diff --git a/.gitmodules b/.gitmodules index cf81732..e84c4c1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ -[submodule "keccak-src"] - path = util/KeccakCodePackage - url = https://github.com/gvanas/KeccakCodePackage.git +[submodule "util/XKCP"] + path = util/XKCP + url = https://github.com/XKCP/XKCP.git diff --git a/binding.gyp b/binding.gyp index 9162bd2..2971268 100644 --- a/binding.gyp +++ b/binding.gyp @@ -1,19 +1,39 @@ { + "variables": { + "arch": "=4.0.0" + "node": ">=6.0.0" }, "gypfile": true, "browser": { diff --git a/src/README.md b/src/README.md new file mode 100644 index 0000000..3ffe0e5 --- /dev/null +++ b/src/README.md @@ -0,0 +1,28 @@ +# Importing Keccak C code + +The XKCP project contains various implementations of Keccak-related algorithms. These are the steps to select a specific implementation and import the code into our project. + +First, generate the source bundles in XKCP: + +``` +git clone https://github.com/XKCP/XKCP.git +cd XKCP +git checkout 58b20ec + +# Edit "Makefile.build". After all the tags, add the following two tags: + + + +make node32.pack node64.pack +``` + +The source files we need are now under XKCP's "bin/.pack/npm32/" and "bin/.pack/npm64/". +- Copy those to our repo under "src/libkeccak-32" and "src/libkeccak-64". +- Update our "binding.gyp" to point to the correct ".c" files. +- Run `npm run rebuild`. + +## Implementation Choice + +Currently, we're using two of XKCP KeccakP[1600] implementations -- the generic 32-bit-optimized one and the generic 64-bit-optimized one. + +XKCP has implementations that use CPU-specific instructions (e.g. Intel AVR) and are likely much faster. It might be worth using those. diff --git a/src/addon.cc b/src/addon.cc index 77947ef..ecc1542 100644 --- a/src/addon.cc +++ b/src/addon.cc @@ -2,7 +2,13 @@ #include extern "C" { - #include "libkeccak/KeccakSponge.h" +#if LIBKECCAK == 32 + #include "libkeccak-32/KeccakSpongeWidth1600.h" +#elif LIBKECCAK == 64 + #include "libkeccak-64/KeccakSpongeWidth1600.h" +#else + #error "LIBKECCAK not defined correctly" +#endif } class KeccakWrapper : public Nan::ObjectWrap { @@ -34,8 +40,13 @@ class KeccakWrapper : public Nan::ObjectWrap { static NAN_METHOD(Initialize) { KeccakWrapper* obj = Nan::ObjectWrap::Unwrap(info.Holder()); +#if (NODE_MODULE_VERSION >= NODE_7_0_MODULE_VERSION) + unsigned int rate = info[0]->IntegerValue(info.GetIsolate()->GetCurrentContext()).ToChecked(); + unsigned int capacity = info[1]->IntegerValue(info.GetIsolate()->GetCurrentContext()).ToChecked(); +#else unsigned int rate = info[0]->IntegerValue(); unsigned int capacity = info[1]->IntegerValue(); +#endif // ignore return code, rate & capacity always will right because internal object KeccakWidth1600_SpongeInitialize(&obj->sponge, rate, capacity); @@ -53,7 +64,11 @@ class KeccakWrapper : public Nan::ObjectWrap { static NAN_METHOD(AbsorbLastFewBits) { KeccakWrapper* obj = Nan::ObjectWrap::Unwrap(info.Holder()); +#if (NODE_MODULE_VERSION >= NODE_7_0_MODULE_VERSION) + unsigned char bits = info[0]->IntegerValue(info.GetIsolate()->GetCurrentContext()).ToChecked(); +#else unsigned char bits = info[0]->IntegerValue(); +#endif // ignore return code, bcause internal object KeccakWidth1600_SpongeAbsorbLastFewBits(&obj->sponge, bits); @@ -61,7 +76,11 @@ class KeccakWrapper : public Nan::ObjectWrap { static NAN_METHOD(Squeeze) { KeccakWrapper* obj = Nan::ObjectWrap::Unwrap(info.Holder()); +#if (NODE_MODULE_VERSION >= NODE_7_0_MODULE_VERSION) + size_t length = info[0]->IntegerValue(info.GetIsolate()->GetCurrentContext()).ToChecked(); +#else size_t length = info[0]->IntegerValue(); +#endif v8::Local buffer = Nan::NewBuffer(length).ToLocalChecked(); unsigned char* data = (unsigned char*) node::Buffer::Data(buffer); @@ -72,7 +91,11 @@ class KeccakWrapper : public Nan::ObjectWrap { static NAN_METHOD(Copy) { KeccakWrapper* from = Nan::ObjectWrap::Unwrap(info.Holder()); +#if (NODE_MODULE_VERSION >= NODE_7_0_MODULE_VERSION) + KeccakWrapper* to = Nan::ObjectWrap::Unwrap(info[0]->ToObject(info.GetIsolate()->GetCurrentContext()).ToLocalChecked()); +#else KeccakWrapper* to = Nan::ObjectWrap::Unwrap(info[0]->ToObject()); +#endif memcpy(&to->sponge, &from->sponge, sizeof(KeccakWidth1600_SpongeInstance)); } diff --git a/src/libkeccak/KeccakP-1600-SnP.h b/src/libkeccak-32/KeccakP-1600-SnP.h similarity index 75% rename from src/libkeccak/KeccakP-1600-SnP.h rename to src/libkeccak-32/KeccakP-1600-SnP.h index 22dddb0..9cb124f 100644 --- a/src/libkeccak/KeccakP-1600-SnP.h +++ b/src/libkeccak-32/KeccakP-1600-SnP.h @@ -1,16 +1,16 @@ /* -Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, -Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby -denoted as "the implementer". +Implementation by Ronny Van Keer, hereby denoted as "the implementer". -For more information, feedback or questions, please refer to our websites: -http://keccak.noekeon.org/ -http://keyak.noekeon.org/ -http://ketje.noekeon.org/ +For more information, feedback or questions, please refer to our website: +https://keccak.team/ To the extent possible under law, the implementer has waived all copyright and related or neighboring rights to the source code in this file. http://creativecommons.org/publicdomain/zero/1.0/ + +--- + +Please refer to SnP-documentation.h for more details. */ #ifndef _KeccakP_1600_SnP_h_ @@ -19,20 +19,17 @@ and related or neighboring rights to the source code in this file. /** For the documentation, see SnP-documentation.h. */ -#define KeccakP1600_implementation "64-bit reference implementation" +#define KeccakP1600_implementation "in-place 32-bit optimized implementation" #define KeccakP1600_stateSizeInBytes 200 #define KeccakP1600_stateAlignment 8 -#ifdef KeccakReference -void KeccakP1600_StaticInitialize( void ); -#else #define KeccakP1600_StaticInitialize() -#endif void KeccakP1600_Initialize(void *state); void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset); void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length); void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length); void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount); +void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds); void KeccakP1600_Permute_12rounds(void *state); void KeccakP1600_Permute_24rounds(void *state); void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length); diff --git a/src/libkeccak-32/KeccakP-1600-inplace32BI.c b/src/libkeccak-32/KeccakP-1600-inplace32BI.c new file mode 100644 index 0000000..d11acde --- /dev/null +++ b/src/libkeccak-32/KeccakP-1600-inplace32BI.c @@ -0,0 +1,1162 @@ +/* +Implementation by Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to our website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ + +--- + +This file implements Keccak-p[1600] in a SnP-compatible way. +Please refer to SnP-documentation.h for more details. + +This implementation comes with KeccakP-1600-SnP.h in the same folder. +Please refer to LowLevel.build for the exact list of other files it must be combined with. +*/ + +#include +#include "brg_endian.h" +#include "KeccakP-1600-SnP.h" +#include "SnP-Relaned.h" + +typedef unsigned char UINT8; +typedef unsigned int UINT32; +/* WARNING: on 8-bit and 16-bit platforms, this should be replaced by: */ +/* typedef unsigned long UINT32; */ + +#define ROL32(a, offset) ((((UINT32)a) << (offset)) ^ (((UINT32)a) >> (32-(offset)))) + +/* Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +#define prepareToBitInterleaving(low, high, temp, temp0, temp1) \ + temp0 = (low); \ + temp = (temp0 ^ (temp0 >> 1)) & 0x22222222UL; temp0 = temp0 ^ temp ^ (temp << 1); \ + temp = (temp0 ^ (temp0 >> 2)) & 0x0C0C0C0CUL; temp0 = temp0 ^ temp ^ (temp << 2); \ + temp = (temp0 ^ (temp0 >> 4)) & 0x00F000F0UL; temp0 = temp0 ^ temp ^ (temp << 4); \ + temp = (temp0 ^ (temp0 >> 8)) & 0x0000FF00UL; temp0 = temp0 ^ temp ^ (temp << 8); \ + temp1 = (high); \ + temp = (temp1 ^ (temp1 >> 1)) & 0x22222222UL; temp1 = temp1 ^ temp ^ (temp << 1); \ + temp = (temp1 ^ (temp1 >> 2)) & 0x0C0C0C0CUL; temp1 = temp1 ^ temp ^ (temp << 2); \ + temp = (temp1 ^ (temp1 >> 4)) & 0x00F000F0UL; temp1 = temp1 ^ temp ^ (temp << 4); \ + temp = (temp1 ^ (temp1 >> 8)) & 0x0000FF00UL; temp1 = temp1 ^ temp ^ (temp << 8); + +#define toBitInterleavingAndXOR(low, high, even, odd, temp, temp0, temp1) \ + prepareToBitInterleaving(low, high, temp, temp0, temp1) \ + even ^= (temp0 & 0x0000FFFF) | (temp1 << 16); \ + odd ^= (temp0 >> 16) | (temp1 & 0xFFFF0000); + +#define toBitInterleavingAndAND(low, high, even, odd, temp, temp0, temp1) \ + prepareToBitInterleaving(low, high, temp, temp0, temp1) \ + even &= (temp0 & 0x0000FFFF) | (temp1 << 16); \ + odd &= (temp0 >> 16) | (temp1 & 0xFFFF0000); + +#define toBitInterleavingAndSet(low, high, even, odd, temp, temp0, temp1) \ + prepareToBitInterleaving(low, high, temp, temp0, temp1) \ + even = (temp0 & 0x0000FFFF) | (temp1 << 16); \ + odd = (temp0 >> 16) | (temp1 & 0xFFFF0000); + +/* Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +#define prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \ + temp0 = (even); \ + temp1 = (odd); \ + temp = (temp0 & 0x0000FFFF) | (temp1 << 16); \ + temp1 = (temp0 >> 16) | (temp1 & 0xFFFF0000); \ + temp0 = temp; \ + temp = (temp0 ^ (temp0 >> 8)) & 0x0000FF00UL; temp0 = temp0 ^ temp ^ (temp << 8); \ + temp = (temp0 ^ (temp0 >> 4)) & 0x00F000F0UL; temp0 = temp0 ^ temp ^ (temp << 4); \ + temp = (temp0 ^ (temp0 >> 2)) & 0x0C0C0C0CUL; temp0 = temp0 ^ temp ^ (temp << 2); \ + temp = (temp0 ^ (temp0 >> 1)) & 0x22222222UL; temp0 = temp0 ^ temp ^ (temp << 1); \ + temp = (temp1 ^ (temp1 >> 8)) & 0x0000FF00UL; temp1 = temp1 ^ temp ^ (temp << 8); \ + temp = (temp1 ^ (temp1 >> 4)) & 0x00F000F0UL; temp1 = temp1 ^ temp ^ (temp << 4); \ + temp = (temp1 ^ (temp1 >> 2)) & 0x0C0C0C0CUL; temp1 = temp1 ^ temp ^ (temp << 2); \ + temp = (temp1 ^ (temp1 >> 1)) & 0x22222222UL; temp1 = temp1 ^ temp ^ (temp << 1); + +#define fromBitInterleaving(even, odd, low, high, temp, temp0, temp1) \ + prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \ + low = temp0; \ + high = temp1; + +#define fromBitInterleavingAndXOR(even, odd, lowIn, highIn, lowOut, highOut, temp, temp0, temp1) \ + prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \ + lowOut = lowIn ^ temp0; \ + highOut = highIn ^ temp1; + +void KeccakP1600_SetBytesInLaneToZero(void *state, unsigned int lanePosition, unsigned int offset, unsigned int length) +{ + UINT8 laneAsBytes[8]; + UINT32 low, high; + UINT32 temp, temp0, temp1; + UINT32 *stateAsHalfLanes = (UINT32*)state; + + memset(laneAsBytes, 0xFF, offset); + memset(laneAsBytes+offset, 0x00, length); + memset(laneAsBytes+offset+length, 0xFF, 8-offset-length); +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + low = *((UINT32*)(laneAsBytes+0)); + high = *((UINT32*)(laneAsBytes+4)); +#else + low = laneAsBytes[0] + | ((UINT32)(laneAsBytes[1]) << 8) + | ((UINT32)(laneAsBytes[2]) << 16) + | ((UINT32)(laneAsBytes[3]) << 24); + high = laneAsBytes[4] + | ((UINT32)(laneAsBytes[5]) << 8) + | ((UINT32)(laneAsBytes[6]) << 16) + | ((UINT32)(laneAsBytes[7]) << 24); +#endif + toBitInterleavingAndAND(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_Initialize(void *state) +{ + memset(state, 0, 200); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset) +{ + unsigned int lanePosition = offset/8; + unsigned int offsetInLane = offset%8; + UINT32 low, high; + UINT32 temp, temp0, temp1; + UINT32 *stateAsHalfLanes = (UINT32*)state; + + if (offsetInLane < 4) { + low = (UINT32)byte << (offsetInLane*8); + high = 0; + } + else { + low = 0; + high = (UINT32)byte << ((offsetInLane-4)*8); + } + toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length) +{ + UINT8 laneAsBytes[8]; + UINT32 low, high; + UINT32 temp, temp0, temp1; + UINT32 *stateAsHalfLanes = (UINT32*)state; + + memset(laneAsBytes, 0, 8); + memcpy(laneAsBytes+offset, data, length); +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + low = *((UINT32*)(laneAsBytes+0)); + high = *((UINT32*)(laneAsBytes+4)); +#else + low = laneAsBytes[0] + | ((UINT32)(laneAsBytes[1]) << 8) + | ((UINT32)(laneAsBytes[2]) << 16) + | ((UINT32)(laneAsBytes[3]) << 24); + high = laneAsBytes[4] + | ((UINT32)(laneAsBytes[5]) << 8) + | ((UINT32)(laneAsBytes[6]) << 16) + | ((UINT32)(laneAsBytes[7]) << 24); +#endif + toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_AddLanes(void *state, const unsigned char *data, unsigned int laneCount) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + const UINT32 * pI = (const UINT32 *)data; + UINT32 * pS = (UINT32*)state; + UINT32 t, x0, x1; + int i; + for (i = laneCount-1; i >= 0; --i) { +#ifdef NO_MISALIGNED_ACCESSES + UINT32 low; + UINT32 high; + memcpy(&low, pI++, 4); + memcpy(&high, pI++, 4); + toBitInterleavingAndXOR(low, high, *(pS++), *(pS++), t, x0, x1); +#else + toBitInterleavingAndXOR(*(pI++), *(pI++), *(pS++), *(pS++), t, x0, x1) +#endif + } +#else + unsigned int lanePosition; + for(lanePosition=0; lanePosition= 0; --i) { +#ifdef NO_MISALIGNED_ACCESSES + UINT32 low; + UINT32 high; + memcpy(&low, pI++, 4); + memcpy(&high, pI++, 4); + toBitInterleavingAndSet(low, high, *(pS++), *(pS++), t, x0, x1); +#else + toBitInterleavingAndSet(*(pI++), *(pI++), *(pS++), *(pS++), t, x0, x1) +#endif + } +#else + unsigned int lanePosition; + for(lanePosition=0; lanePosition> 8) & 0xFF; + laneAsBytes[2] = (low >> 16) & 0xFF; + laneAsBytes[3] = (low >> 24) & 0xFF; + laneAsBytes[4] = high & 0xFF; + laneAsBytes[5] = (high >> 8) & 0xFF; + laneAsBytes[6] = (high >> 16) & 0xFF; + laneAsBytes[7] = (high >> 24) & 0xFF; +#endif + memcpy(data, laneAsBytes+offset, length); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + UINT32 * pI = (UINT32 *)data; + const UINT32 * pS = ( const UINT32 *)state; + UINT32 t, x0, x1; + int i; + for (i = laneCount-1; i >= 0; --i) { +#ifdef NO_MISALIGNED_ACCESSES + UINT32 low; + UINT32 high; + fromBitInterleaving(*(pS++), *(pS++), low, high, t, x0, x1); + memcpy(pI++, &low, 4); + memcpy(pI++, &high, 4); +#else + fromBitInterleaving(*(pS++), *(pS++), *(pI++), *(pI++), t, x0, x1) +#endif + } +#else + unsigned int lanePosition; + for(lanePosition=0; lanePosition> 8) & 0xFF; + laneAsBytes[2] = (low >> 16) & 0xFF; + laneAsBytes[3] = (low >> 24) & 0xFF; + laneAsBytes[4] = high & 0xFF; + laneAsBytes[5] = (high >> 8) & 0xFF; + laneAsBytes[6] = (high >> 16) & 0xFF; + laneAsBytes[7] = (high >> 24) & 0xFF; + memcpy(data+lanePosition*8, laneAsBytes, 8); + } +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length) +{ + SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +{ + UINT32 *stateAsHalfLanes = (UINT32*)state; + UINT32 low, high, temp, temp0, temp1; + UINT8 laneAsBytes[8]; + unsigned int i; + + fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1); +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + *((UINT32*)(laneAsBytes+0)) = low; + *((UINT32*)(laneAsBytes+4)) = high; +#else + laneAsBytes[0] = low & 0xFF; + laneAsBytes[1] = (low >> 8) & 0xFF; + laneAsBytes[2] = (low >> 16) & 0xFF; + laneAsBytes[3] = (low >> 24) & 0xFF; + laneAsBytes[4] = high & 0xFF; + laneAsBytes[5] = (high >> 8) & 0xFF; + laneAsBytes[6] = (high >> 16) & 0xFF; + laneAsBytes[7] = (high >> 24) & 0xFF; +#endif + for(i=0; i= 0; --i) { +#ifdef NO_MISALIGNED_ACCESSES + UINT32 low; + UINT32 high; + fromBitInterleaving(*(pS++), *(pS++), low, high, t, x0, x1); + *(pO++) = *(pI++) ^ low; + *(pO++) = *(pI++) ^ high; +#else + fromBitInterleavingAndXOR(*(pS++), *(pS++), *(pI++), *(pI++), *(pO++), *(pO++), t, x0, x1) +#endif + } +#else + unsigned int lanePosition; + for(lanePosition=0; lanePosition> 8) & 0xFF; + laneAsBytes[2] = (low >> 16) & 0xFF; + laneAsBytes[3] = (low >> 24) & 0xFF; + laneAsBytes[4] = high & 0xFF; + laneAsBytes[5] = (high >> 8) & 0xFF; + laneAsBytes[6] = (high >> 16) & 0xFF; + laneAsBytes[7] = (high >> 24) & 0xFF; + ((UINT32*)(output+lanePosition*8))[0] = ((UINT32*)(input+lanePosition*8))[0] ^ (*(const UINT32*)(laneAsBytes+0)); + ((UINT32*)(output+lanePosition*8))[1] = ((UINT32*)(input+lanePosition*8))[0] ^ (*(const UINT32*)(laneAsBytes+4)); + } +#endif +} +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +{ + SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +static const UINT32 KeccakF1600RoundConstants_int2[2*24+1] = +{ + 0x00000001UL, 0x00000000UL, + 0x00000000UL, 0x00000089UL, + 0x00000000UL, 0x8000008bUL, + 0x00000000UL, 0x80008080UL, + 0x00000001UL, 0x0000008bUL, + 0x00000001UL, 0x00008000UL, + 0x00000001UL, 0x80008088UL, + 0x00000001UL, 0x80000082UL, + 0x00000000UL, 0x0000000bUL, + 0x00000000UL, 0x0000000aUL, + 0x00000001UL, 0x00008082UL, + 0x00000000UL, 0x00008003UL, + 0x00000001UL, 0x0000808bUL, + 0x00000001UL, 0x8000000bUL, + 0x00000001UL, 0x8000008aUL, + 0x00000001UL, 0x80000081UL, + 0x00000000UL, 0x80000081UL, + 0x00000000UL, 0x80000008UL, + 0x00000000UL, 0x00000083UL, + 0x00000000UL, 0x80008003UL, + 0x00000001UL, 0x80008088UL, + 0x00000000UL, 0x80000088UL, + 0x00000001UL, 0x00008000UL, + 0x00000000UL, 0x80008082UL, + 0x000000FFUL +}; + +#define KeccakRound0() \ + Cx = Abu0^Agu0^Aku0^Amu0^Asu0; \ + Du1 = Abe1^Age1^Ake1^Ame1^Ase1; \ + Da0 = Cx^ROL32(Du1, 1); \ + Cz = Abu1^Agu1^Aku1^Amu1^Asu1; \ + Du0 = Abe0^Age0^Ake0^Ame0^Ase0; \ + Da1 = Cz^Du0; \ + Cw = Abi0^Agi0^Aki0^Ami0^Asi0; \ + Do0 = Cw^ROL32(Cz, 1); \ + Cy = Abi1^Agi1^Aki1^Ami1^Asi1; \ + Do1 = Cy^Cx; \ + Cx = Aba0^Aga0^Aka0^Ama0^Asa0; \ + De0 = Cx^ROL32(Cy, 1); \ + Cz = Aba1^Aga1^Aka1^Ama1^Asa1; \ + De1 = Cz^Cw; \ + Cy = Abo1^Ago1^Ako1^Amo1^Aso1; \ + Di0 = Du0^ROL32(Cy, 1); \ + Cw = Abo0^Ago0^Ako0^Amo0^Aso0; \ + Di1 = Du1^Cw; \ + Du0 = Cw^ROL32(Cz, 1); \ + Du1 = Cy^Cx; \ +\ + Ba = (Aba0^Da0); \ + Be = ROL32((Age0^De0), 22); \ + Bi = ROL32((Aki1^Di1), 22); \ + Bo = ROL32((Amo1^Do1), 11); \ + Bu = ROL32((Asu0^Du0), 7); \ + Aba0 = Ba ^((~Be)& Bi ); \ + Aba0 ^= *(pRoundConstants++); \ + Age0 = Be ^((~Bi)& Bo ); \ + Aki1 = Bi ^((~Bo)& Bu ); \ + Amo1 = Bo ^((~Bu)& Ba ); \ + Asu0 = Bu ^((~Ba)& Be ); \ + Ba = (Aba1^Da1); \ + Be = ROL32((Age1^De1), 22); \ + Bi = ROL32((Aki0^Di0), 21); \ + Bo = ROL32((Amo0^Do0), 10); \ + Bu = ROL32((Asu1^Du1), 7); \ + Aba1 = Ba ^((~Be)& Bi ); \ + Aba1 ^= *(pRoundConstants++); \ + Age1 = Be ^((~Bi)& Bo ); \ + Aki0 = Bi ^((~Bo)& Bu ); \ + Amo0 = Bo ^((~Bu)& Ba ); \ + Asu1 = Bu ^((~Ba)& Be ); \ + Bi = ROL32((Aka1^Da1), 2); \ + Bo = ROL32((Ame1^De1), 23); \ + Bu = ROL32((Asi1^Di1), 31); \ + Ba = ROL32((Abo0^Do0), 14); \ + Be = ROL32((Agu0^Du0), 10); \ + Aka1 = Ba ^((~Be)& Bi ); \ + Ame1 = Be ^((~Bi)& Bo ); \ + Asi1 = Bi ^((~Bo)& Bu ); \ + Abo0 = Bo ^((~Bu)& Ba ); \ + Agu0 = Bu ^((~Ba)& Be ); \ + Bi = ROL32((Aka0^Da0), 1); \ + Bo = ROL32((Ame0^De0), 22); \ + Bu = ROL32((Asi0^Di0), 30); \ + Ba = ROL32((Abo1^Do1), 14); \ + Be = ROL32((Agu1^Du1), 10); \ + Aka0 = Ba ^((~Be)& Bi ); \ + Ame0 = Be ^((~Bi)& Bo ); \ + Asi0 = Bi ^((~Bo)& Bu ); \ + Abo1 = Bo ^((~Bu)& Ba ); \ + Agu1 = Bu ^((~Ba)& Be ); \ + Bu = ROL32((Asa0^Da0), 9); \ + Ba = ROL32((Abe1^De1), 1); \ + Be = ROL32((Agi0^Di0), 3); \ + Bi = ROL32((Ako1^Do1), 13); \ + Bo = ROL32((Amu0^Du0), 4); \ + Asa0 = Ba ^((~Be)& Bi ); \ + Abe1 = Be ^((~Bi)& Bo ); \ + Agi0 = Bi ^((~Bo)& Bu ); \ + Ako1 = Bo ^((~Bu)& Ba ); \ + Amu0 = Bu ^((~Ba)& Be ); \ + Bu = ROL32((Asa1^Da1), 9); \ + Ba = (Abe0^De0); \ + Be = ROL32((Agi1^Di1), 3); \ + Bi = ROL32((Ako0^Do0), 12); \ + Bo = ROL32((Amu1^Du1), 4); \ + Asa1 = Ba ^((~Be)& Bi ); \ + Abe0 = Be ^((~Bi)& Bo ); \ + Agi1 = Bi ^((~Bo)& Bu ); \ + Ako0 = Bo ^((~Bu)& Ba ); \ + Amu1 = Bu ^((~Ba)& Be ); \ + Be = ROL32((Aga0^Da0), 18); \ + Bi = ROL32((Ake0^De0), 5); \ + Bo = ROL32((Ami1^Di1), 8); \ + Bu = ROL32((Aso0^Do0), 28); \ + Ba = ROL32((Abu1^Du1), 14); \ + Aga0 = Ba ^((~Be)& Bi ); \ + Ake0 = Be ^((~Bi)& Bo ); \ + Ami1 = Bi ^((~Bo)& Bu ); \ + Aso0 = Bo ^((~Bu)& Ba ); \ + Abu1 = Bu ^((~Ba)& Be ); \ + Be = ROL32((Aga1^Da1), 18); \ + Bi = ROL32((Ake1^De1), 5); \ + Bo = ROL32((Ami0^Di0), 7); \ + Bu = ROL32((Aso1^Do1), 28); \ + Ba = ROL32((Abu0^Du0), 13); \ + Aga1 = Ba ^((~Be)& Bi ); \ + Ake1 = Be ^((~Bi)& Bo ); \ + Ami0 = Bi ^((~Bo)& Bu ); \ + Aso1 = Bo ^((~Bu)& Ba ); \ + Abu0 = Bu ^((~Ba)& Be ); \ + Bo = ROL32((Ama1^Da1), 21); \ + Bu = ROL32((Ase0^De0), 1); \ + Ba = ROL32((Abi0^Di0), 31); \ + Be = ROL32((Ago1^Do1), 28); \ + Bi = ROL32((Aku1^Du1), 20); \ + Ama1 = Ba ^((~Be)& Bi ); \ + Ase0 = Be ^((~Bi)& Bo ); \ + Abi0 = Bi ^((~Bo)& Bu ); \ + Ago1 = Bo ^((~Bu)& Ba ); \ + Aku1 = Bu ^((~Ba)& Be ); \ + Bo = ROL32((Ama0^Da0), 20); \ + Bu = ROL32((Ase1^De1), 1); \ + Ba = ROL32((Abi1^Di1), 31); \ + Be = ROL32((Ago0^Do0), 27); \ + Bi = ROL32((Aku0^Du0), 19); \ + Ama0 = Ba ^((~Be)& Bi ); \ + Ase1 = Be ^((~Bi)& Bo ); \ + Abi1 = Bi ^((~Bo)& Bu ); \ + Ago0 = Bo ^((~Bu)& Ba ); \ + Aku0 = Bu ^((~Ba)& Be ) + +#define KeccakRound1() \ + Cx = Asu0^Agu0^Amu0^Abu1^Aku1; \ + Du1 = Age1^Ame0^Abe0^Ake1^Ase1; \ + Da0 = Cx^ROL32(Du1, 1); \ + Cz = Asu1^Agu1^Amu1^Abu0^Aku0; \ + Du0 = Age0^Ame1^Abe1^Ake0^Ase0; \ + Da1 = Cz^Du0; \ + Cw = Aki1^Asi1^Agi0^Ami1^Abi0; \ + Do0 = Cw^ROL32(Cz, 1); \ + Cy = Aki0^Asi0^Agi1^Ami0^Abi1; \ + Do1 = Cy^Cx; \ + Cx = Aba0^Aka1^Asa0^Aga0^Ama1; \ + De0 = Cx^ROL32(Cy, 1); \ + Cz = Aba1^Aka0^Asa1^Aga1^Ama0; \ + De1 = Cz^Cw; \ + Cy = Amo0^Abo1^Ako0^Aso1^Ago0; \ + Di0 = Du0^ROL32(Cy, 1); \ + Cw = Amo1^Abo0^Ako1^Aso0^Ago1; \ + Di1 = Du1^Cw; \ + Du0 = Cw^ROL32(Cz, 1); \ + Du1 = Cy^Cx; \ +\ + Ba = (Aba0^Da0); \ + Be = ROL32((Ame1^De0), 22); \ + Bi = ROL32((Agi1^Di1), 22); \ + Bo = ROL32((Aso1^Do1), 11); \ + Bu = ROL32((Aku1^Du0), 7); \ + Aba0 = Ba ^((~Be)& Bi ); \ + Aba0 ^= *(pRoundConstants++); \ + Ame1 = Be ^((~Bi)& Bo ); \ + Agi1 = Bi ^((~Bo)& Bu ); \ + Aso1 = Bo ^((~Bu)& Ba ); \ + Aku1 = Bu ^((~Ba)& Be ); \ + Ba = (Aba1^Da1); \ + Be = ROL32((Ame0^De1), 22); \ + Bi = ROL32((Agi0^Di0), 21); \ + Bo = ROL32((Aso0^Do0), 10); \ + Bu = ROL32((Aku0^Du1), 7); \ + Aba1 = Ba ^((~Be)& Bi ); \ + Aba1 ^= *(pRoundConstants++); \ + Ame0 = Be ^((~Bi)& Bo ); \ + Agi0 = Bi ^((~Bo)& Bu ); \ + Aso0 = Bo ^((~Bu)& Ba ); \ + Aku0 = Bu ^((~Ba)& Be ); \ + Bi = ROL32((Asa1^Da1), 2); \ + Bo = ROL32((Ake1^De1), 23); \ + Bu = ROL32((Abi1^Di1), 31); \ + Ba = ROL32((Amo1^Do0), 14); \ + Be = ROL32((Agu0^Du0), 10); \ + Asa1 = Ba ^((~Be)& Bi ); \ + Ake1 = Be ^((~Bi)& Bo ); \ + Abi1 = Bi ^((~Bo)& Bu ); \ + Amo1 = Bo ^((~Bu)& Ba ); \ + Agu0 = Bu ^((~Ba)& Be ); \ + Bi = ROL32((Asa0^Da0), 1); \ + Bo = ROL32((Ake0^De0), 22); \ + Bu = ROL32((Abi0^Di0), 30); \ + Ba = ROL32((Amo0^Do1), 14); \ + Be = ROL32((Agu1^Du1), 10); \ + Asa0 = Ba ^((~Be)& Bi ); \ + Ake0 = Be ^((~Bi)& Bo ); \ + Abi0 = Bi ^((~Bo)& Bu ); \ + Amo0 = Bo ^((~Bu)& Ba ); \ + Agu1 = Bu ^((~Ba)& Be ); \ + Bu = ROL32((Ama1^Da0), 9); \ + Ba = ROL32((Age1^De1), 1); \ + Be = ROL32((Asi1^Di0), 3); \ + Bi = ROL32((Ako0^Do1), 13); \ + Bo = ROL32((Abu1^Du0), 4); \ + Ama1 = Ba ^((~Be)& Bi ); \ + Age1 = Be ^((~Bi)& Bo ); \ + Asi1 = Bi ^((~Bo)& Bu ); \ + Ako0 = Bo ^((~Bu)& Ba ); \ + Abu1 = Bu ^((~Ba)& Be ); \ + Bu = ROL32((Ama0^Da1), 9); \ + Ba = (Age0^De0); \ + Be = ROL32((Asi0^Di1), 3); \ + Bi = ROL32((Ako1^Do0), 12); \ + Bo = ROL32((Abu0^Du1), 4); \ + Ama0 = Ba ^((~Be)& Bi ); \ + Age0 = Be ^((~Bi)& Bo ); \ + Asi0 = Bi ^((~Bo)& Bu ); \ + Ako1 = Bo ^((~Bu)& Ba ); \ + Abu0 = Bu ^((~Ba)& Be ); \ + Be = ROL32((Aka1^Da0), 18); \ + Bi = ROL32((Abe1^De0), 5); \ + Bo = ROL32((Ami0^Di1), 8); \ + Bu = ROL32((Ago1^Do0), 28); \ + Ba = ROL32((Asu1^Du1), 14); \ + Aka1 = Ba ^((~Be)& Bi ); \ + Abe1 = Be ^((~Bi)& Bo ); \ + Ami0 = Bi ^((~Bo)& Bu ); \ + Ago1 = Bo ^((~Bu)& Ba ); \ + Asu1 = Bu ^((~Ba)& Be ); \ + Be = ROL32((Aka0^Da1), 18); \ + Bi = ROL32((Abe0^De1), 5); \ + Bo = ROL32((Ami1^Di0), 7); \ + Bu = ROL32((Ago0^Do1), 28); \ + Ba = ROL32((Asu0^Du0), 13); \ + Aka0 = Ba ^((~Be)& Bi ); \ + Abe0 = Be ^((~Bi)& Bo ); \ + Ami1 = Bi ^((~Bo)& Bu ); \ + Ago0 = Bo ^((~Bu)& Ba ); \ + Asu0 = Bu ^((~Ba)& Be ); \ + Bo = ROL32((Aga1^Da1), 21); \ + Bu = ROL32((Ase0^De0), 1); \ + Ba = ROL32((Aki1^Di0), 31); \ + Be = ROL32((Abo1^Do1), 28); \ + Bi = ROL32((Amu1^Du1), 20); \ + Aga1 = Ba ^((~Be)& Bi ); \ + Ase0 = Be ^((~Bi)& Bo ); \ + Aki1 = Bi ^((~Bo)& Bu ); \ + Abo1 = Bo ^((~Bu)& Ba ); \ + Amu1 = Bu ^((~Ba)& Be ); \ + Bo = ROL32((Aga0^Da0), 20); \ + Bu = ROL32((Ase1^De1), 1); \ + Ba = ROL32((Aki0^Di1), 31); \ + Be = ROL32((Abo0^Do0), 27); \ + Bi = ROL32((Amu0^Du0), 19); \ + Aga0 = Ba ^((~Be)& Bi ); \ + Ase1 = Be ^((~Bi)& Bo ); \ + Aki0 = Bi ^((~Bo)& Bu ); \ + Abo0 = Bo ^((~Bu)& Ba ); \ + Amu0 = Bu ^((~Ba)& Be ); + +#define KeccakRound2() \ + Cx = Aku1^Agu0^Abu1^Asu1^Amu1; \ + Du1 = Ame0^Ake0^Age0^Abe0^Ase1; \ + Da0 = Cx^ROL32(Du1, 1); \ + Cz = Aku0^Agu1^Abu0^Asu0^Amu0; \ + Du0 = Ame1^Ake1^Age1^Abe1^Ase0; \ + Da1 = Cz^Du0; \ + Cw = Agi1^Abi1^Asi1^Ami0^Aki1; \ + Do0 = Cw^ROL32(Cz, 1); \ + Cy = Agi0^Abi0^Asi0^Ami1^Aki0; \ + Do1 = Cy^Cx; \ + Cx = Aba0^Asa1^Ama1^Aka1^Aga1; \ + De0 = Cx^ROL32(Cy, 1); \ + Cz = Aba1^Asa0^Ama0^Aka0^Aga0; \ + De1 = Cz^Cw; \ + Cy = Aso0^Amo0^Ako1^Ago0^Abo0; \ + Di0 = Du0^ROL32(Cy, 1); \ + Cw = Aso1^Amo1^Ako0^Ago1^Abo1; \ + Di1 = Du1^Cw; \ + Du0 = Cw^ROL32(Cz, 1); \ + Du1 = Cy^Cx; \ +\ + Ba = (Aba0^Da0); \ + Be = ROL32((Ake1^De0), 22); \ + Bi = ROL32((Asi0^Di1), 22); \ + Bo = ROL32((Ago0^Do1), 11); \ + Bu = ROL32((Amu1^Du0), 7); \ + Aba0 = Ba ^((~Be)& Bi ); \ + Aba0 ^= *(pRoundConstants++); \ + Ake1 = Be ^((~Bi)& Bo ); \ + Asi0 = Bi ^((~Bo)& Bu ); \ + Ago0 = Bo ^((~Bu)& Ba ); \ + Amu1 = Bu ^((~Ba)& Be ); \ + Ba = (Aba1^Da1); \ + Be = ROL32((Ake0^De1), 22); \ + Bi = ROL32((Asi1^Di0), 21); \ + Bo = ROL32((Ago1^Do0), 10); \ + Bu = ROL32((Amu0^Du1), 7); \ + Aba1 = Ba ^((~Be)& Bi ); \ + Aba1 ^= *(pRoundConstants++); \ + Ake0 = Be ^((~Bi)& Bo ); \ + Asi1 = Bi ^((~Bo)& Bu ); \ + Ago1 = Bo ^((~Bu)& Ba ); \ + Amu0 = Bu ^((~Ba)& Be ); \ + Bi = ROL32((Ama0^Da1), 2); \ + Bo = ROL32((Abe0^De1), 23); \ + Bu = ROL32((Aki0^Di1), 31); \ + Ba = ROL32((Aso1^Do0), 14); \ + Be = ROL32((Agu0^Du0), 10); \ + Ama0 = Ba ^((~Be)& Bi ); \ + Abe0 = Be ^((~Bi)& Bo ); \ + Aki0 = Bi ^((~Bo)& Bu ); \ + Aso1 = Bo ^((~Bu)& Ba ); \ + Agu0 = Bu ^((~Ba)& Be ); \ + Bi = ROL32((Ama1^Da0), 1); \ + Bo = ROL32((Abe1^De0), 22); \ + Bu = ROL32((Aki1^Di0), 30); \ + Ba = ROL32((Aso0^Do1), 14); \ + Be = ROL32((Agu1^Du1), 10); \ + Ama1 = Ba ^((~Be)& Bi ); \ + Abe1 = Be ^((~Bi)& Bo ); \ + Aki1 = Bi ^((~Bo)& Bu ); \ + Aso0 = Bo ^((~Bu)& Ba ); \ + Agu1 = Bu ^((~Ba)& Be ); \ + Bu = ROL32((Aga1^Da0), 9); \ + Ba = ROL32((Ame0^De1), 1); \ + Be = ROL32((Abi1^Di0), 3); \ + Bi = ROL32((Ako1^Do1), 13); \ + Bo = ROL32((Asu1^Du0), 4); \ + Aga1 = Ba ^((~Be)& Bi ); \ + Ame0 = Be ^((~Bi)& Bo ); \ + Abi1 = Bi ^((~Bo)& Bu ); \ + Ako1 = Bo ^((~Bu)& Ba ); \ + Asu1 = Bu ^((~Ba)& Be ); \ + Bu = ROL32((Aga0^Da1), 9); \ + Ba = (Ame1^De0); \ + Be = ROL32((Abi0^Di1), 3); \ + Bi = ROL32((Ako0^Do0), 12); \ + Bo = ROL32((Asu0^Du1), 4); \ + Aga0 = Ba ^((~Be)& Bi ); \ + Ame1 = Be ^((~Bi)& Bo ); \ + Abi0 = Bi ^((~Bo)& Bu ); \ + Ako0 = Bo ^((~Bu)& Ba ); \ + Asu0 = Bu ^((~Ba)& Be ); \ + Be = ROL32((Asa1^Da0), 18); \ + Bi = ROL32((Age1^De0), 5); \ + Bo = ROL32((Ami1^Di1), 8); \ + Bu = ROL32((Abo1^Do0), 28); \ + Ba = ROL32((Aku0^Du1), 14); \ + Asa1 = Ba ^((~Be)& Bi ); \ + Age1 = Be ^((~Bi)& Bo ); \ + Ami1 = Bi ^((~Bo)& Bu ); \ + Abo1 = Bo ^((~Bu)& Ba ); \ + Aku0 = Bu ^((~Ba)& Be ); \ + Be = ROL32((Asa0^Da1), 18); \ + Bi = ROL32((Age0^De1), 5); \ + Bo = ROL32((Ami0^Di0), 7); \ + Bu = ROL32((Abo0^Do1), 28); \ + Ba = ROL32((Aku1^Du0), 13); \ + Asa0 = Ba ^((~Be)& Bi ); \ + Age0 = Be ^((~Bi)& Bo ); \ + Ami0 = Bi ^((~Bo)& Bu ); \ + Abo0 = Bo ^((~Bu)& Ba ); \ + Aku1 = Bu ^((~Ba)& Be ); \ + Bo = ROL32((Aka0^Da1), 21); \ + Bu = ROL32((Ase0^De0), 1); \ + Ba = ROL32((Agi1^Di0), 31); \ + Be = ROL32((Amo0^Do1), 28); \ + Bi = ROL32((Abu0^Du1), 20); \ + Aka0 = Ba ^((~Be)& Bi ); \ + Ase0 = Be ^((~Bi)& Bo ); \ + Agi1 = Bi ^((~Bo)& Bu ); \ + Amo0 = Bo ^((~Bu)& Ba ); \ + Abu0 = Bu ^((~Ba)& Be ); \ + Bo = ROL32((Aka1^Da0), 20); \ + Bu = ROL32((Ase1^De1), 1); \ + Ba = ROL32((Agi0^Di1), 31); \ + Be = ROL32((Amo1^Do0), 27); \ + Bi = ROL32((Abu1^Du0), 19); \ + Aka1 = Ba ^((~Be)& Bi ); \ + Ase1 = Be ^((~Bi)& Bo ); \ + Agi0 = Bi ^((~Bo)& Bu ); \ + Amo1 = Bo ^((~Bu)& Ba ); \ + Abu1 = Bu ^((~Ba)& Be ); + +#define KeccakRound3() \ + Cx = Amu1^Agu0^Asu1^Aku0^Abu0; \ + Du1 = Ake0^Abe1^Ame1^Age0^Ase1; \ + Da0 = Cx^ROL32(Du1, 1); \ + Cz = Amu0^Agu1^Asu0^Aku1^Abu1; \ + Du0 = Ake1^Abe0^Ame0^Age1^Ase0; \ + Da1 = Cz^Du0; \ + Cw = Asi0^Aki0^Abi1^Ami1^Agi1; \ + Do0 = Cw^ROL32(Cz, 1); \ + Cy = Asi1^Aki1^Abi0^Ami0^Agi0; \ + Do1 = Cy^Cx; \ + Cx = Aba0^Ama0^Aga1^Asa1^Aka0; \ + De0 = Cx^ROL32(Cy, 1); \ + Cz = Aba1^Ama1^Aga0^Asa0^Aka1; \ + De1 = Cz^Cw; \ + Cy = Ago1^Aso0^Ako0^Abo0^Amo1; \ + Di0 = Du0^ROL32(Cy, 1); \ + Cw = Ago0^Aso1^Ako1^Abo1^Amo0; \ + Di1 = Du1^Cw; \ + Du0 = Cw^ROL32(Cz, 1); \ + Du1 = Cy^Cx; \ +\ + Ba = (Aba0^Da0); \ + Be = ROL32((Abe0^De0), 22); \ + Bi = ROL32((Abi0^Di1), 22); \ + Bo = ROL32((Abo0^Do1), 11); \ + Bu = ROL32((Abu0^Du0), 7); \ + Aba0 = Ba ^((~Be)& Bi ); \ + Aba0 ^= *(pRoundConstants++); \ + Abe0 = Be ^((~Bi)& Bo ); \ + Abi0 = Bi ^((~Bo)& Bu ); \ + Abo0 = Bo ^((~Bu)& Ba ); \ + Abu0 = Bu ^((~Ba)& Be ); \ + Ba = (Aba1^Da1); \ + Be = ROL32((Abe1^De1), 22); \ + Bi = ROL32((Abi1^Di0), 21); \ + Bo = ROL32((Abo1^Do0), 10); \ + Bu = ROL32((Abu1^Du1), 7); \ + Aba1 = Ba ^((~Be)& Bi ); \ + Aba1 ^= *(pRoundConstants++); \ + Abe1 = Be ^((~Bi)& Bo ); \ + Abi1 = Bi ^((~Bo)& Bu ); \ + Abo1 = Bo ^((~Bu)& Ba ); \ + Abu1 = Bu ^((~Ba)& Be ); \ + Bi = ROL32((Aga0^Da1), 2); \ + Bo = ROL32((Age0^De1), 23); \ + Bu = ROL32((Agi0^Di1), 31); \ + Ba = ROL32((Ago0^Do0), 14); \ + Be = ROL32((Agu0^Du0), 10); \ + Aga0 = Ba ^((~Be)& Bi ); \ + Age0 = Be ^((~Bi)& Bo ); \ + Agi0 = Bi ^((~Bo)& Bu ); \ + Ago0 = Bo ^((~Bu)& Ba ); \ + Agu0 = Bu ^((~Ba)& Be ); \ + Bi = ROL32((Aga1^Da0), 1); \ + Bo = ROL32((Age1^De0), 22); \ + Bu = ROL32((Agi1^Di0), 30); \ + Ba = ROL32((Ago1^Do1), 14); \ + Be = ROL32((Agu1^Du1), 10); \ + Aga1 = Ba ^((~Be)& Bi ); \ + Age1 = Be ^((~Bi)& Bo ); \ + Agi1 = Bi ^((~Bo)& Bu ); \ + Ago1 = Bo ^((~Bu)& Ba ); \ + Agu1 = Bu ^((~Ba)& Be ); \ + Bu = ROL32((Aka0^Da0), 9); \ + Ba = ROL32((Ake0^De1), 1); \ + Be = ROL32((Aki0^Di0), 3); \ + Bi = ROL32((Ako0^Do1), 13); \ + Bo = ROL32((Aku0^Du0), 4); \ + Aka0 = Ba ^((~Be)& Bi ); \ + Ake0 = Be ^((~Bi)& Bo ); \ + Aki0 = Bi ^((~Bo)& Bu ); \ + Ako0 = Bo ^((~Bu)& Ba ); \ + Aku0 = Bu ^((~Ba)& Be ); \ + Bu = ROL32((Aka1^Da1), 9); \ + Ba = (Ake1^De0); \ + Be = ROL32((Aki1^Di1), 3); \ + Bi = ROL32((Ako1^Do0), 12); \ + Bo = ROL32((Aku1^Du1), 4); \ + Aka1 = Ba ^((~Be)& Bi ); \ + Ake1 = Be ^((~Bi)& Bo ); \ + Aki1 = Bi ^((~Bo)& Bu ); \ + Ako1 = Bo ^((~Bu)& Ba ); \ + Aku1 = Bu ^((~Ba)& Be ); \ + Be = ROL32((Ama0^Da0), 18); \ + Bi = ROL32((Ame0^De0), 5); \ + Bo = ROL32((Ami0^Di1), 8); \ + Bu = ROL32((Amo0^Do0), 28); \ + Ba = ROL32((Amu0^Du1), 14); \ + Ama0 = Ba ^((~Be)& Bi ); \ + Ame0 = Be ^((~Bi)& Bo ); \ + Ami0 = Bi ^((~Bo)& Bu ); \ + Amo0 = Bo ^((~Bu)& Ba ); \ + Amu0 = Bu ^((~Ba)& Be ); \ + Be = ROL32((Ama1^Da1), 18); \ + Bi = ROL32((Ame1^De1), 5); \ + Bo = ROL32((Ami1^Di0), 7); \ + Bu = ROL32((Amo1^Do1), 28); \ + Ba = ROL32((Amu1^Du0), 13); \ + Ama1 = Ba ^((~Be)& Bi ); \ + Ame1 = Be ^((~Bi)& Bo ); \ + Ami1 = Bi ^((~Bo)& Bu ); \ + Amo1 = Bo ^((~Bu)& Ba ); \ + Amu1 = Bu ^((~Ba)& Be ); \ + Bo = ROL32((Asa0^Da1), 21); \ + Bu = ROL32((Ase0^De0), 1); \ + Ba = ROL32((Asi0^Di0), 31); \ + Be = ROL32((Aso0^Do1), 28); \ + Bi = ROL32((Asu0^Du1), 20); \ + Asa0 = Ba ^((~Be)& Bi ); \ + Ase0 = Be ^((~Bi)& Bo ); \ + Asi0 = Bi ^((~Bo)& Bu ); \ + Aso0 = Bo ^((~Bu)& Ba ); \ + Asu0 = Bu ^((~Ba)& Be ); \ + Bo = ROL32((Asa1^Da0), 20); \ + Bu = ROL32((Ase1^De1), 1); \ + Ba = ROL32((Asi1^Di1), 31); \ + Be = ROL32((Aso1^Do0), 27); \ + Bi = ROL32((Asu1^Du0), 19); \ + Asa1 = Ba ^((~Be)& Bi ); \ + Ase1 = Be ^((~Bi)& Bo ); \ + Asi1 = Bi ^((~Bo)& Bu ); \ + Aso1 = Bo ^((~Bu)& Ba ); \ + Asu1 = Bu ^((~Ba)& Be ); + +void KeccakP1600_Permute_Nrounds(void *state, unsigned int nRounds) +{ + UINT32 Da0, De0, Di0, Do0, Du0; + UINT32 Da1, De1, Di1, Do1, Du1; + UINT32 Ba, Be, Bi, Bo, Bu; + UINT32 Cx, Cy, Cz, Cw; + const UINT32 *pRoundConstants = KeccakF1600RoundConstants_int2+(24-nRounds)*2; + UINT32 *stateAsHalfLanes = (UINT32*)state; + #define Aba0 stateAsHalfLanes[ 0] + #define Aba1 stateAsHalfLanes[ 1] + #define Abe0 stateAsHalfLanes[ 2] + #define Abe1 stateAsHalfLanes[ 3] + #define Abi0 stateAsHalfLanes[ 4] + #define Abi1 stateAsHalfLanes[ 5] + #define Abo0 stateAsHalfLanes[ 6] + #define Abo1 stateAsHalfLanes[ 7] + #define Abu0 stateAsHalfLanes[ 8] + #define Abu1 stateAsHalfLanes[ 9] + #define Aga0 stateAsHalfLanes[10] + #define Aga1 stateAsHalfLanes[11] + #define Age0 stateAsHalfLanes[12] + #define Age1 stateAsHalfLanes[13] + #define Agi0 stateAsHalfLanes[14] + #define Agi1 stateAsHalfLanes[15] + #define Ago0 stateAsHalfLanes[16] + #define Ago1 stateAsHalfLanes[17] + #define Agu0 stateAsHalfLanes[18] + #define Agu1 stateAsHalfLanes[19] + #define Aka0 stateAsHalfLanes[20] + #define Aka1 stateAsHalfLanes[21] + #define Ake0 stateAsHalfLanes[22] + #define Ake1 stateAsHalfLanes[23] + #define Aki0 stateAsHalfLanes[24] + #define Aki1 stateAsHalfLanes[25] + #define Ako0 stateAsHalfLanes[26] + #define Ako1 stateAsHalfLanes[27] + #define Aku0 stateAsHalfLanes[28] + #define Aku1 stateAsHalfLanes[29] + #define Ama0 stateAsHalfLanes[30] + #define Ama1 stateAsHalfLanes[31] + #define Ame0 stateAsHalfLanes[32] + #define Ame1 stateAsHalfLanes[33] + #define Ami0 stateAsHalfLanes[34] + #define Ami1 stateAsHalfLanes[35] + #define Amo0 stateAsHalfLanes[36] + #define Amo1 stateAsHalfLanes[37] + #define Amu0 stateAsHalfLanes[38] + #define Amu1 stateAsHalfLanes[39] + #define Asa0 stateAsHalfLanes[40] + #define Asa1 stateAsHalfLanes[41] + #define Ase0 stateAsHalfLanes[42] + #define Ase1 stateAsHalfLanes[43] + #define Asi0 stateAsHalfLanes[44] + #define Asi1 stateAsHalfLanes[45] + #define Aso0 stateAsHalfLanes[46] + #define Aso1 stateAsHalfLanes[47] + #define Asu0 stateAsHalfLanes[48] + #define Asu1 stateAsHalfLanes[49] + + nRounds &= 3; + switch ( nRounds ) + { + #define I0 Ba + #define I1 Be + #define T0 Bi + #define T1 Bo + #define SwapPI13( in0,in1,in2,in3,eo0,eo1,eo2,eo3 ) \ + I0 = (in0)[0]; I1 = (in0)[1]; \ + T0 = (in1)[0]; T1 = (in1)[1]; \ + (in0)[eo0] = T0; (in0)[eo0^1] = T1; \ + T0 = (in2)[0]; T1 = (in2)[1]; \ + (in1)[eo1] = T0; (in1)[eo1^1] = T1; \ + T0 = (in3)[0]; T1 = (in3)[1]; \ + (in2)[eo2] = T0; (in2)[eo2^1] = T1; \ + (in3)[eo3] = I0; (in3)[eo3^1] = I1 + #define SwapPI2( in0,in1,in2,in3 ) \ + I0 = (in0)[0]; I1 = (in0)[1]; \ + T0 = (in1)[0]; T1 = (in1)[1]; \ + (in0)[1] = T0; (in0)[0] = T1; \ + (in1)[1] = I0; (in1)[0] = I1; \ + I0 = (in2)[0]; I1 = (in2)[1]; \ + T0 = (in3)[0]; T1 = (in3)[1]; \ + (in2)[1] = T0; (in2)[0] = T1; \ + (in3)[1] = I0; (in3)[0] = I1 + #define SwapEO( even,odd ) T0 = even; even = odd; odd = T0 + + case 1: + SwapPI13( &Aga0, &Aka0, &Asa0, &Ama0, 1, 0, 1, 0 ); + SwapPI13( &Abe0, &Age0, &Ame0, &Ake0, 0, 1, 0, 1 ); + SwapPI13( &Abi0, &Aki0, &Agi0, &Asi0, 1, 0, 1, 0 ); + SwapEO( Ami0, Ami1 ); + SwapPI13( &Abo0, &Amo0, &Aso0, &Ago0, 1, 0, 1, 0 ); + SwapEO( Ako0, Ako1 ); + SwapPI13( &Abu0, &Asu0, &Aku0, &Amu0, 0, 1, 0, 1 ); + break; + + case 2: + SwapPI2( &Aga0, &Asa0, &Aka0, &Ama0 ); + SwapPI2( &Abe0, &Ame0, &Age0, &Ake0 ); + SwapPI2( &Abi0, &Agi0, &Aki0, &Asi0 ); + SwapPI2( &Abo0, &Aso0, &Ago0, &Amo0 ); + SwapPI2( &Abu0, &Aku0, &Amu0, &Asu0 ); + break; + + case 3: + SwapPI13( &Aga0, &Ama0, &Asa0, &Aka0, 0, 1, 0, 1 ); + SwapPI13( &Abe0, &Ake0, &Ame0, &Age0, 1, 0, 1, 0 ); + SwapPI13( &Abi0, &Asi0, &Agi0, &Aki0, 0, 1, 0, 1 ); + SwapEO( Ami0, Ami1 ); + SwapPI13( &Abo0, &Ago0, &Aso0, &Amo0, 0, 1, 0, 1 ); + SwapEO( Ako0, Ako1 ); + SwapPI13( &Abu0, &Amu0, &Aku0, &Asu0, 1, 0, 1, 0 ); + break; + #undef I0 + #undef I1 + #undef T0 + #undef T1 + #undef SwapPI13 + #undef SwapPI2 + #undef SwapEO + } + + do + { + /* Code for 4 rounds, using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */ + switch ( nRounds ) + { + case 0: KeccakRound0(); /* fall through */ + case 3: KeccakRound1(); + case 2: KeccakRound2(); + case 1: KeccakRound3(); + } + nRounds = 0; + } + while ( *pRoundConstants != 0xFF ); + + #undef Aba0 + #undef Aba1 + #undef Abe0 + #undef Abe1 + #undef Abi0 + #undef Abi1 + #undef Abo0 + #undef Abo1 + #undef Abu0 + #undef Abu1 + #undef Aga0 + #undef Aga1 + #undef Age0 + #undef Age1 + #undef Agi0 + #undef Agi1 + #undef Ago0 + #undef Ago1 + #undef Agu0 + #undef Agu1 + #undef Aka0 + #undef Aka1 + #undef Ake0 + #undef Ake1 + #undef Aki0 + #undef Aki1 + #undef Ako0 + #undef Ako1 + #undef Aku0 + #undef Aku1 + #undef Ama0 + #undef Ama1 + #undef Ame0 + #undef Ame1 + #undef Ami0 + #undef Ami1 + #undef Amo0 + #undef Amo1 + #undef Amu0 + #undef Amu1 + #undef Asa0 + #undef Asa1 + #undef Ase0 + #undef Ase1 + #undef Asi0 + #undef Asi1 + #undef Aso0 + #undef Aso1 + #undef Asu0 + #undef Asu1 +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_Permute_12rounds(void *state) +{ + KeccakP1600_Permute_Nrounds(state, 12); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_Permute_24rounds(void *state) +{ + KeccakP1600_Permute_Nrounds(state, 24); +} diff --git a/src/libkeccak-32/KeccakSponge-common.h b/src/libkeccak-32/KeccakSponge-common.h new file mode 100644 index 0000000..8fb3ba1 --- /dev/null +++ b/src/libkeccak-32/KeccakSponge-common.h @@ -0,0 +1,35 @@ +/* +Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen, +Michaël Peeters, Gilles Van Assche and Ronny Van Keer, +hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to our website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _KeccakSpongeCommon_h_ +#define _KeccakSpongeCommon_h_ + +#include +#include "align.h" + +#define KCP_DeclareSpongeStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_SpongeInstanceStruct { \ + unsigned char state[size]; \ + unsigned int rate; \ + unsigned int byteIOIndex; \ + int squeezing; \ + } prefix##_SpongeInstance; + +#define KCP_DeclareSpongeFunctions(prefix) \ + int prefix##_Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen); \ + int prefix##_SpongeInitialize(prefix##_SpongeInstance *spongeInstance, unsigned int rate, unsigned int capacity); \ + int prefix##_SpongeAbsorb(prefix##_SpongeInstance *spongeInstance, const unsigned char *data, size_t dataByteLen); \ + int prefix##_SpongeAbsorbLastFewBits(prefix##_SpongeInstance *spongeInstance, unsigned char delimitedData); \ + int prefix##_SpongeSqueeze(prefix##_SpongeInstance *spongeInstance, unsigned char *data, size_t dataByteLen); + +#endif diff --git a/src/libkeccak/KeccakSponge.inc b/src/libkeccak-32/KeccakSponge.inc similarity index 97% rename from src/libkeccak/KeccakSponge.inc rename to src/libkeccak-32/KeccakSponge.inc index 42a15aa..f6c59ce 100644 --- a/src/libkeccak/KeccakSponge.inc +++ b/src/libkeccak-32/KeccakSponge.inc @@ -1,12 +1,10 @@ /* -Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, -Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby -denoted as "the implementer". - -For more information, feedback or questions, please refer to our websites: -http://keccak.noekeon.org/ -http://keyak.noekeon.org/ -http://ketje.noekeon.org/ +Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen, +Michaël Peeters, Gilles Van Assche and Ronny Van Keer, +hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to our website: +https://keccak.team/ To the extent possible under law, the implementer has waived all copyright and related or neighboring rights to the source code in this file. diff --git a/src/libkeccak-32/KeccakSpongeWidth1600.c b/src/libkeccak-32/KeccakSpongeWidth1600.c new file mode 100644 index 0000000..672ec36 --- /dev/null +++ b/src/libkeccak-32/KeccakSpongeWidth1600.c @@ -0,0 +1,54 @@ +/* +Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen, +Michaël Peeters, Gilles Van Assche and Ronny Van Keer, +hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to our website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include "KeccakSpongeWidth1600.h" + +#ifdef KeccakReference + #include "displayIntermediateValues.h" +#endif + +#ifndef KeccakP1600_excluded + #include "KeccakP-1600-SnP.h" + + #define prefix KeccakWidth1600 + #define SnP KeccakP1600 + #define SnP_width 1600 + #define SnP_Permute KeccakP1600_Permute_24rounds + #if defined(KeccakF1600_FastLoop_supported) + #define SnP_FastLoop_Absorb KeccakF1600_FastLoop_Absorb + #endif + #include "KeccakSponge.inc" + #undef prefix + #undef SnP + #undef SnP_width + #undef SnP_Permute + #undef SnP_FastLoop_Absorb +#endif + +#ifndef KeccakP1600_excluded + #include "KeccakP-1600-SnP.h" + + #define prefix KeccakWidth1600_12rounds + #define SnP KeccakP1600 + #define SnP_width 1600 + #define SnP_Permute KeccakP1600_Permute_12rounds + #if defined(KeccakP1600_12rounds_FastLoop_supported) + #define SnP_FastLoop_Absorb KeccakP1600_12rounds_FastLoop_Absorb + #endif + #include "KeccakSponge.inc" + #undef prefix + #undef SnP + #undef SnP_width + #undef SnP_Permute + #undef SnP_FastLoop_Absorb +#endif diff --git a/src/libkeccak-32/KeccakSpongeWidth1600.h b/src/libkeccak-32/KeccakSpongeWidth1600.h new file mode 100644 index 0000000..1558256 --- /dev/null +++ b/src/libkeccak-32/KeccakSpongeWidth1600.h @@ -0,0 +1,31 @@ +/* +Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen, +Michaël Peeters, Gilles Van Assche and Ronny Van Keer, +hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to our website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _KeccakSpongeWidth1600_h_ +#define _KeccakSpongeWidth1600_h_ + +#include "KeccakSponge-common.h" + +#ifndef KeccakP1600_excluded + #include "KeccakP-1600-SnP.h" + KCP_DeclareSpongeStructure(KeccakWidth1600, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment) + KCP_DeclareSpongeFunctions(KeccakWidth1600) +#endif + +#ifndef KeccakP1600_excluded + #include "KeccakP-1600-SnP.h" + KCP_DeclareSpongeStructure(KeccakWidth1600_12rounds, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment) + KCP_DeclareSpongeFunctions(KeccakWidth1600_12rounds) +#endif + +#endif diff --git a/src/libkeccak-32/SnP-Relaned.h b/src/libkeccak-32/SnP-Relaned.h new file mode 100644 index 0000000..8dbf3d2 --- /dev/null +++ b/src/libkeccak-32/SnP-Relaned.h @@ -0,0 +1,140 @@ +/* +Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen, +Michaël Peeters, Gilles Van Assche and Ronny Van Keer, +hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to our website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ + +--- + +This file contains macros that help implement a permutation in a SnP-compatible way. +It converts an implementation that implement state input/output functions +in a lane-oriented fashion (i.e., using SnP_AddLanes() and SnP_AddBytesInLane, +and similarly for Overwite, Extract and ExtractAndAdd) to the byte-oriented SnP. +Please refer to SnP-documentation.h for more details. +*/ + +#ifndef _SnP_Relaned_h_ +#define _SnP_Relaned_h_ + +#define SnP_AddBytes(state, data, offset, length, SnP_AddLanes, SnP_AddBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_AddLanes(state, data, (length)/SnP_laneLengthInBytes); \ + SnP_AddBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + const unsigned char *_curData = (data); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_AddBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curData += _bytesInLane; \ + } \ + } \ + } + +#define SnP_OverwriteBytes(state, data, offset, length, SnP_OverwriteLanes, SnP_OverwriteBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_OverwriteLanes(state, data, (length)/SnP_laneLengthInBytes); \ + SnP_OverwriteBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + const unsigned char *_curData = (data); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_OverwriteBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curData += _bytesInLane; \ + } \ + } \ + } + +#define SnP_ExtractBytes(state, data, offset, length, SnP_ExtractLanes, SnP_ExtractBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_ExtractLanes(state, data, (length)/SnP_laneLengthInBytes); \ + SnP_ExtractBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + unsigned char *_curData = (data); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_ExtractBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curData += _bytesInLane; \ + } \ + } \ + } + +#define SnP_ExtractAndAddBytes(state, input, output, offset, length, SnP_ExtractAndAddLanes, SnP_ExtractAndAddBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_ExtractAndAddLanes(state, input, output, (length)/SnP_laneLengthInBytes); \ + SnP_ExtractAndAddBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (input)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + (output)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + const unsigned char *_curInput = (input); \ + unsigned char *_curOutput = (output); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_ExtractAndAddBytesInLane(state, _lanePosition, _curInput, _curOutput, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curInput += _bytesInLane; \ + _curOutput += _bytesInLane; \ + } \ + } \ + } + +#endif diff --git a/src/libkeccak/align.h b/src/libkeccak-32/align.h similarity index 71% rename from src/libkeccak/align.h rename to src/libkeccak-32/align.h index e29771e..90c1b37 100644 --- a/src/libkeccak/align.h +++ b/src/libkeccak-32/align.h @@ -1,12 +1,10 @@ /* -Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, -Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby -denoted as "the implementer". +Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen, +Michaël Peeters, Gilles Van Assche and Ronny Van Keer, +hereby denoted as "the implementer". -For more information, feedback or questions, please refer to our websites: -http://keccak.noekeon.org/ -http://keyak.noekeon.org/ -http://ketje.noekeon.org/ +For more information, feedback or questions, please refer to our website: +https://keccak.team/ To the extent possible under law, the implementer has waived all copyright and related or neighboring rights to the source code in this file. diff --git a/src/libkeccak/brg_endian.h b/src/libkeccak-32/brg_endian.h similarity index 88% rename from src/libkeccak/brg_endian.h rename to src/libkeccak-32/brg_endian.h index 7226eb3..7c640b9 100644 --- a/src/libkeccak/brg_endian.h +++ b/src/libkeccak-32/brg_endian.h @@ -114,13 +114,14 @@ defined( __VMS ) || defined( _M_X64 ) # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN -#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ - defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ - defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ - defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ - defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ - defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ - defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ + defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \ + defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ ) # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN #elif defined(__arm__) diff --git a/src/libkeccak-64/KeccakP-1600-64.macros b/src/libkeccak-64/KeccakP-1600-64.macros new file mode 100644 index 0000000..60f8054 --- /dev/null +++ b/src/libkeccak-64/KeccakP-1600-64.macros @@ -0,0 +1,745 @@ +/* +Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen, +Michaël Peeters, Gilles Van Assche and Ronny Van Keer, +hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to our website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define declareABCDE \ + UINT64 Aba, Abe, Abi, Abo, Abu; \ + UINT64 Aga, Age, Agi, Ago, Agu; \ + UINT64 Aka, Ake, Aki, Ako, Aku; \ + UINT64 Ama, Ame, Ami, Amo, Amu; \ + UINT64 Asa, Ase, Asi, Aso, Asu; \ + UINT64 Bba, Bbe, Bbi, Bbo, Bbu; \ + UINT64 Bga, Bge, Bgi, Bgo, Bgu; \ + UINT64 Bka, Bke, Bki, Bko, Bku; \ + UINT64 Bma, Bme, Bmi, Bmo, Bmu; \ + UINT64 Bsa, Bse, Bsi, Bso, Bsu; \ + UINT64 Ca, Ce, Ci, Co, Cu; \ + UINT64 Da, De, Di, Do, Du; \ + UINT64 Eba, Ebe, Ebi, Ebo, Ebu; \ + UINT64 Ega, Ege, Egi, Ego, Egu; \ + UINT64 Eka, Eke, Eki, Eko, Eku; \ + UINT64 Ema, Eme, Emi, Emo, Emu; \ + UINT64 Esa, Ese, Esi, Eso, Esu; \ + +#define prepareTheta \ + Ca = Aba^Aga^Aka^Ama^Asa; \ + Ce = Abe^Age^Ake^Ame^Ase; \ + Ci = Abi^Agi^Aki^Ami^Asi; \ + Co = Abo^Ago^Ako^Amo^Aso; \ + Cu = Abu^Agu^Aku^Amu^Asu; \ + +#ifdef UseBebigokimisa +/* --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') */ +/* --- 64-bit lanes mapped to 64-bit words */ +#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^( Bbe | Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + Ca = E##ba; \ + E##be = Bbe ^((~Bbi)| Bbo ); \ + Ce = E##be; \ + E##bi = Bbi ^( Bbo & Bbu ); \ + Ci = E##bi; \ + E##bo = Bbo ^( Bbu | Bba ); \ + Co = E##bo; \ + E##bu = Bbu ^( Bba & Bbe ); \ + Cu = E##bu; \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^( Bge | Bgi ); \ + Ca ^= E##ga; \ + E##ge = Bge ^( Bgi & Bgo ); \ + Ce ^= E##ge; \ + E##gi = Bgi ^( Bgo |(~Bgu)); \ + Ci ^= E##gi; \ + E##go = Bgo ^( Bgu | Bga ); \ + Co ^= E##go; \ + E##gu = Bgu ^( Bga & Bge ); \ + Cu ^= E##gu; \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^( Bke | Bki ); \ + Ca ^= E##ka; \ + E##ke = Bke ^( Bki & Bko ); \ + Ce ^= E##ke; \ + E##ki = Bki ^((~Bko)& Bku ); \ + Ci ^= E##ki; \ + E##ko = (~Bko)^( Bku | Bka ); \ + Co ^= E##ko; \ + E##ku = Bku ^( Bka & Bke ); \ + Cu ^= E##ku; \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^( Bme & Bmi ); \ + Ca ^= E##ma; \ + E##me = Bme ^( Bmi | Bmo ); \ + Ce ^= E##me; \ + E##mi = Bmi ^((~Bmo)| Bmu ); \ + Ci ^= E##mi; \ + E##mo = (~Bmo)^( Bmu & Bma ); \ + Co ^= E##mo; \ + E##mu = Bmu ^( Bma | Bme ); \ + Cu ^= E##mu; \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + Ca ^= E##sa; \ + E##se = (~Bse)^( Bsi | Bso ); \ + Ce ^= E##se; \ + E##si = Bsi ^( Bso & Bsu ); \ + Ci ^= E##si; \ + E##so = Bso ^( Bsu | Bsa ); \ + Co ^= E##so; \ + E##su = Bsu ^( Bsa & Bse ); \ + Cu ^= E##su; \ +\ + +/* --- Code for round (lane complementing pattern 'bebigokimisa') */ +/* --- 64-bit lanes mapped to 64-bit words */ +#define thetaRhoPiChiIota(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^( Bbe | Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + E##be = Bbe ^((~Bbi)| Bbo ); \ + E##bi = Bbi ^( Bbo & Bbu ); \ + E##bo = Bbo ^( Bbu | Bba ); \ + E##bu = Bbu ^( Bba & Bbe ); \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^( Bge | Bgi ); \ + E##ge = Bge ^( Bgi & Bgo ); \ + E##gi = Bgi ^( Bgo |(~Bgu)); \ + E##go = Bgo ^( Bgu | Bga ); \ + E##gu = Bgu ^( Bga & Bge ); \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^( Bke | Bki ); \ + E##ke = Bke ^( Bki & Bko ); \ + E##ki = Bki ^((~Bko)& Bku ); \ + E##ko = (~Bko)^( Bku | Bka ); \ + E##ku = Bku ^( Bka & Bke ); \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^( Bme & Bmi ); \ + E##me = Bme ^( Bmi | Bmo ); \ + E##mi = Bmi ^((~Bmo)| Bmu ); \ + E##mo = (~Bmo)^( Bmu & Bma ); \ + E##mu = Bmu ^( Bma | Bme ); \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + E##se = (~Bse)^( Bsi | Bso ); \ + E##si = Bsi ^( Bso & Bsu ); \ + E##so = Bso ^( Bsu | Bsa ); \ + E##su = Bsu ^( Bsa & Bse ); \ +\ + +#else /* UseBebigokimisa */ +/* --- Code for round, with prepare-theta */ +/* --- 64-bit lanes mapped to 64-bit words */ +#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^((~Bbe)& Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + Ca = E##ba; \ + E##be = Bbe ^((~Bbi)& Bbo ); \ + Ce = E##be; \ + E##bi = Bbi ^((~Bbo)& Bbu ); \ + Ci = E##bi; \ + E##bo = Bbo ^((~Bbu)& Bba ); \ + Co = E##bo; \ + E##bu = Bbu ^((~Bba)& Bbe ); \ + Cu = E##bu; \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^((~Bge)& Bgi ); \ + Ca ^= E##ga; \ + E##ge = Bge ^((~Bgi)& Bgo ); \ + Ce ^= E##ge; \ + E##gi = Bgi ^((~Bgo)& Bgu ); \ + Ci ^= E##gi; \ + E##go = Bgo ^((~Bgu)& Bga ); \ + Co ^= E##go; \ + E##gu = Bgu ^((~Bga)& Bge ); \ + Cu ^= E##gu; \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^((~Bke)& Bki ); \ + Ca ^= E##ka; \ + E##ke = Bke ^((~Bki)& Bko ); \ + Ce ^= E##ke; \ + E##ki = Bki ^((~Bko)& Bku ); \ + Ci ^= E##ki; \ + E##ko = Bko ^((~Bku)& Bka ); \ + Co ^= E##ko; \ + E##ku = Bku ^((~Bka)& Bke ); \ + Cu ^= E##ku; \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^((~Bme)& Bmi ); \ + Ca ^= E##ma; \ + E##me = Bme ^((~Bmi)& Bmo ); \ + Ce ^= E##me; \ + E##mi = Bmi ^((~Bmo)& Bmu ); \ + Ci ^= E##mi; \ + E##mo = Bmo ^((~Bmu)& Bma ); \ + Co ^= E##mo; \ + E##mu = Bmu ^((~Bma)& Bme ); \ + Cu ^= E##mu; \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + Ca ^= E##sa; \ + E##se = Bse ^((~Bsi)& Bso ); \ + Ce ^= E##se; \ + E##si = Bsi ^((~Bso)& Bsu ); \ + Ci ^= E##si; \ + E##so = Bso ^((~Bsu)& Bsa ); \ + Co ^= E##so; \ + E##su = Bsu ^((~Bsa)& Bse ); \ + Cu ^= E##su; \ +\ + +/* --- Code for round */ +/* --- 64-bit lanes mapped to 64-bit words */ +#define thetaRhoPiChiIota(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^((~Bbe)& Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + E##be = Bbe ^((~Bbi)& Bbo ); \ + E##bi = Bbi ^((~Bbo)& Bbu ); \ + E##bo = Bbo ^((~Bbu)& Bba ); \ + E##bu = Bbu ^((~Bba)& Bbe ); \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^((~Bge)& Bgi ); \ + E##ge = Bge ^((~Bgi)& Bgo ); \ + E##gi = Bgi ^((~Bgo)& Bgu ); \ + E##go = Bgo ^((~Bgu)& Bga ); \ + E##gu = Bgu ^((~Bga)& Bge ); \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^((~Bke)& Bki ); \ + E##ke = Bke ^((~Bki)& Bko ); \ + E##ki = Bki ^((~Bko)& Bku ); \ + E##ko = Bko ^((~Bku)& Bka ); \ + E##ku = Bku ^((~Bka)& Bke ); \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^((~Bme)& Bmi ); \ + E##me = Bme ^((~Bmi)& Bmo ); \ + E##mi = Bmi ^((~Bmo)& Bmu ); \ + E##mo = Bmo ^((~Bmu)& Bma ); \ + E##mu = Bmu ^((~Bma)& Bme ); \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + E##se = Bse ^((~Bsi)& Bso ); \ + E##si = Bsi ^((~Bso)& Bsu ); \ + E##so = Bso ^((~Bsu)& Bsa ); \ + E##su = Bsu ^((~Bsa)& Bse ); \ +\ + +#endif /* UseBebigokimisa */ + +#define copyFromState(X, state) \ + X##ba = state[ 0]; \ + X##be = state[ 1]; \ + X##bi = state[ 2]; \ + X##bo = state[ 3]; \ + X##bu = state[ 4]; \ + X##ga = state[ 5]; \ + X##ge = state[ 6]; \ + X##gi = state[ 7]; \ + X##go = state[ 8]; \ + X##gu = state[ 9]; \ + X##ka = state[10]; \ + X##ke = state[11]; \ + X##ki = state[12]; \ + X##ko = state[13]; \ + X##ku = state[14]; \ + X##ma = state[15]; \ + X##me = state[16]; \ + X##mi = state[17]; \ + X##mo = state[18]; \ + X##mu = state[19]; \ + X##sa = state[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + X##so = state[23]; \ + X##su = state[24]; \ + +#define copyToState(state, X) \ + state[ 0] = X##ba; \ + state[ 1] = X##be; \ + state[ 2] = X##bi; \ + state[ 3] = X##bo; \ + state[ 4] = X##bu; \ + state[ 5] = X##ga; \ + state[ 6] = X##ge; \ + state[ 7] = X##gi; \ + state[ 8] = X##go; \ + state[ 9] = X##gu; \ + state[10] = X##ka; \ + state[11] = X##ke; \ + state[12] = X##ki; \ + state[13] = X##ko; \ + state[14] = X##ku; \ + state[15] = X##ma; \ + state[16] = X##me; \ + state[17] = X##mi; \ + state[18] = X##mo; \ + state[19] = X##mu; \ + state[20] = X##sa; \ + state[21] = X##se; \ + state[22] = X##si; \ + state[23] = X##so; \ + state[24] = X##su; \ + +#define copyStateVariables(X, Y) \ + X##ba = Y##ba; \ + X##be = Y##be; \ + X##bi = Y##bi; \ + X##bo = Y##bo; \ + X##bu = Y##bu; \ + X##ga = Y##ga; \ + X##ge = Y##ge; \ + X##gi = Y##gi; \ + X##go = Y##go; \ + X##gu = Y##gu; \ + X##ka = Y##ka; \ + X##ke = Y##ke; \ + X##ki = Y##ki; \ + X##ko = Y##ko; \ + X##ku = Y##ku; \ + X##ma = Y##ma; \ + X##me = Y##me; \ + X##mi = Y##mi; \ + X##mo = Y##mo; \ + X##mu = Y##mu; \ + X##sa = Y##sa; \ + X##se = Y##se; \ + X##si = Y##si; \ + X##so = Y##so; \ + X##su = Y##su; \ + +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) +#define HTOLE64(x) (x) +#else +#define HTOLE64(x) (\ + ((x & 0xff00000000000000ull) >> 56) | \ + ((x & 0x00ff000000000000ull) >> 40) | \ + ((x & 0x0000ff0000000000ull) >> 24) | \ + ((x & 0x000000ff00000000ull) >> 8) | \ + ((x & 0x00000000ff000000ull) << 8) | \ + ((x & 0x0000000000ff0000ull) << 24) | \ + ((x & 0x000000000000ff00ull) << 40) | \ + ((x & 0x00000000000000ffull) << 56)) +#endif + +#define addInput(X, input, laneCount) \ + if (laneCount == 21) { \ + X##ba ^= HTOLE64(input[ 0]); \ + X##be ^= HTOLE64(input[ 1]); \ + X##bi ^= HTOLE64(input[ 2]); \ + X##bo ^= HTOLE64(input[ 3]); \ + X##bu ^= HTOLE64(input[ 4]); \ + X##ga ^= HTOLE64(input[ 5]); \ + X##ge ^= HTOLE64(input[ 6]); \ + X##gi ^= HTOLE64(input[ 7]); \ + X##go ^= HTOLE64(input[ 8]); \ + X##gu ^= HTOLE64(input[ 9]); \ + X##ka ^= HTOLE64(input[10]); \ + X##ke ^= HTOLE64(input[11]); \ + X##ki ^= HTOLE64(input[12]); \ + X##ko ^= HTOLE64(input[13]); \ + X##ku ^= HTOLE64(input[14]); \ + X##ma ^= HTOLE64(input[15]); \ + X##me ^= HTOLE64(input[16]); \ + X##mi ^= HTOLE64(input[17]); \ + X##mo ^= HTOLE64(input[18]); \ + X##mu ^= HTOLE64(input[19]); \ + X##sa ^= HTOLE64(input[20]); \ + } \ + else if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + if (laneCount < 1) { \ + } \ + else { \ + X##ba ^= HTOLE64(input[ 0]); \ + } \ + } \ + else { \ + X##ba ^= HTOLE64(input[ 0]); \ + X##be ^= HTOLE64(input[ 1]); \ + if (laneCount < 3) { \ + } \ + else { \ + X##bi ^= HTOLE64(input[ 2]); \ + } \ + } \ + } \ + else { \ + X##ba ^= HTOLE64(input[ 0]); \ + X##be ^= HTOLE64(input[ 1]); \ + X##bi ^= HTOLE64(input[ 2]); \ + X##bo ^= HTOLE64(input[ 3]); \ + if (laneCount < 6) { \ + if (laneCount < 5) { \ + } \ + else { \ + X##bu ^= HTOLE64(input[ 4]); \ + } \ + } \ + else { \ + X##bu ^= HTOLE64(input[ 4]); \ + X##ga ^= HTOLE64(input[ 5]); \ + if (laneCount < 7) { \ + } \ + else { \ + X##ge ^= HTOLE64(input[ 6]); \ + } \ + } \ + } \ + } \ + else { \ + X##ba ^= HTOLE64(input[ 0]); \ + X##be ^= HTOLE64(input[ 1]); \ + X##bi ^= HTOLE64(input[ 2]); \ + X##bo ^= HTOLE64(input[ 3]); \ + X##bu ^= HTOLE64(input[ 4]); \ + X##ga ^= HTOLE64(input[ 5]); \ + X##ge ^= HTOLE64(input[ 6]); \ + X##gi ^= HTOLE64(input[ 7]); \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + if (laneCount < 9) { \ + } \ + else { \ + X##go ^= HTOLE64(input[ 8]); \ + } \ + } \ + else { \ + X##go ^= HTOLE64(input[ 8]); \ + X##gu ^= HTOLE64(input[ 9]); \ + if (laneCount < 11) { \ + } \ + else { \ + X##ka ^= HTOLE64(input[10]); \ + } \ + } \ + } \ + else { \ + X##go ^= HTOLE64(input[ 8]); \ + X##gu ^= HTOLE64(input[ 9]); \ + X##ka ^= HTOLE64(input[10]); \ + X##ke ^= HTOLE64(input[11]); \ + if (laneCount < 14) { \ + if (laneCount < 13) { \ + } \ + else { \ + X##ki ^= HTOLE64(input[12]); \ + } \ + } \ + else { \ + X##ki ^= HTOLE64(input[12]); \ + X##ko ^= HTOLE64(input[13]); \ + if (laneCount < 15) { \ + } \ + else { \ + X##ku ^= HTOLE64(input[14]); \ + } \ + } \ + } \ + } \ + } \ + else { \ + X##ba ^= HTOLE64(input[ 0]); \ + X##be ^= HTOLE64(input[ 1]); \ + X##bi ^= HTOLE64(input[ 2]); \ + X##bo ^= HTOLE64(input[ 3]); \ + X##bu ^= HTOLE64(input[ 4]); \ + X##ga ^= HTOLE64(input[ 5]); \ + X##ge ^= HTOLE64(input[ 6]); \ + X##gi ^= HTOLE64(input[ 7]); \ + X##go ^= HTOLE64(input[ 8]); \ + X##gu ^= HTOLE64(input[ 9]); \ + X##ka ^= HTOLE64(input[10]); \ + X##ke ^= HTOLE64(input[11]); \ + X##ki ^= HTOLE64(input[12]); \ + X##ko ^= HTOLE64(input[13]); \ + X##ku ^= HTOLE64(input[14]); \ + X##ma ^= HTOLE64(input[15]); \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + if (laneCount < 17) { \ + } \ + else { \ + X##me ^= HTOLE64(input[16]); \ + } \ + } \ + else { \ + X##me ^= HTOLE64(input[16]); \ + X##mi ^= HTOLE64(input[17]); \ + if (laneCount < 19) { \ + } \ + else { \ + X##mo ^= HTOLE64(input[18]); \ + } \ + } \ + } \ + else { \ + X##me ^= HTOLE64(input[16]); \ + X##mi ^= HTOLE64(input[17]); \ + X##mo ^= HTOLE64(input[18]); \ + X##mu ^= HTOLE64(input[19]); \ + if (laneCount < 22) { \ + if (laneCount < 21) { \ + } \ + else { \ + X##sa ^= HTOLE64(input[20]); \ + } \ + } \ + else { \ + X##sa ^= HTOLE64(input[20]); \ + X##se ^= HTOLE64(input[21]); \ + if (laneCount < 23) { \ + } \ + else { \ + X##si ^= HTOLE64(input[22]); \ + } \ + } \ + } \ + } \ + else { \ + X##me ^= HTOLE64(input[16]); \ + X##mi ^= HTOLE64(input[17]); \ + X##mo ^= HTOLE64(input[18]); \ + X##mu ^= HTOLE64(input[19]); \ + X##sa ^= HTOLE64(input[20]); \ + X##se ^= HTOLE64(input[21]); \ + X##si ^= HTOLE64(input[22]); \ + X##so ^= HTOLE64(input[23]); \ + if (laneCount < 25) { \ + } \ + else { \ + X##su ^= HTOLE64(input[24]); \ + } \ + } \ + } diff --git a/src/libkeccak-64/KeccakP-1600-SnP.h b/src/libkeccak-64/KeccakP-1600-SnP.h new file mode 100644 index 0000000..8335ae3 --- /dev/null +++ b/src/libkeccak-64/KeccakP-1600-SnP.h @@ -0,0 +1,51 @@ +/* +Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen, +Michaël Peeters, Gilles Van Assche and Ronny Van Keer, +hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to our website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ + +--- + +Please refer to SnP-documentation.h for more details. +*/ + +#ifndef _KeccakP_1600_SnP_h_ +#define _KeccakP_1600_SnP_h_ + +#include "brg_endian.h" +#include "KeccakP-1600-opt64-config.h" + +#define KeccakP1600_implementation "generic 64-bit optimized implementation (" KeccakP1600_implementation_config ")" +#define KeccakP1600_stateSizeInBytes 200 +#define KeccakP1600_stateAlignment 8 +#define KeccakF1600_FastLoop_supported +#define KeccakP1600_12rounds_FastLoop_supported + +#include + +#define KeccakP1600_StaticInitialize() +void KeccakP1600_Initialize(void *state); +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) +#define KeccakP1600_AddByte(state, byte, offset) \ + ((unsigned char*)(state))[(offset)] ^= (byte) +#else +void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset); +#endif +void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length); +void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length); +void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount); +void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds); +void KeccakP1600_Permute_12rounds(void *state); +void KeccakP1600_Permute_24rounds(void *state); +void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length); +void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length); +size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen); +size_t KeccakP1600_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen); + +#endif diff --git a/src/libkeccak-64/KeccakP-1600-opt64-config.h b/src/libkeccak-64/KeccakP-1600-opt64-config.h new file mode 100644 index 0000000..085b6c9 --- /dev/null +++ b/src/libkeccak-64/KeccakP-1600-opt64-config.h @@ -0,0 +1,6 @@ +/* +This file defines some parameters of the implementation in the parent directory. +*/ + +#define KeccakP1600_implementation_config "all rounds unrolled" +#define KeccakP1600_fullUnrolling diff --git a/src/libkeccak-64/KeccakP-1600-opt64.c b/src/libkeccak-64/KeccakP-1600-opt64.c new file mode 100644 index 0000000..97e3ea6 --- /dev/null +++ b/src/libkeccak-64/KeccakP-1600-opt64.c @@ -0,0 +1,564 @@ +/* +Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen, +Michaël Peeters, Gilles Van Assche and Ronny Van Keer, +hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to our website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ + +--- + +This file implements Keccak-p[1600] in a SnP-compatible way. +Please refer to SnP-documentation.h for more details. + +This implementation comes with KeccakP-1600-SnP.h in the same folder. +Please refer to LowLevel.build for the exact list of other files it must be combined with. +*/ + +#include +#include +#include "brg_endian.h" +#include "KeccakP-1600-opt64-config.h" + +typedef unsigned char UINT8; +typedef unsigned long long int UINT64; + +#if defined(KeccakP1600_useLaneComplementing) +#define UseBebigokimisa +#endif + +#if defined(_MSC_VER) +#define ROL64(a, offset) _rotl64(a, offset) +#elif defined(KeccakP1600_useSHLD) + #define ROL64(x,N) ({ \ + register UINT64 __out; \ + register UINT64 __in = x; \ + __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \ + __out; \ + }) +#else +#define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset))) +#endif + +#include "KeccakP-1600-64.macros" +#ifdef KeccakP1600_fullUnrolling +#define FullUnrolling +#else +#define Unrolling KeccakP1600_unrolling +#endif +#include "KeccakP-1600-unrolling.macros" +#include "SnP-Relaned.h" + +static const UINT64 KeccakF1600RoundConstants[24] = { + 0x0000000000000001ULL, + 0x0000000000008082ULL, + 0x800000000000808aULL, + 0x8000000080008000ULL, + 0x000000000000808bULL, + 0x0000000080000001ULL, + 0x8000000080008081ULL, + 0x8000000000008009ULL, + 0x000000000000008aULL, + 0x0000000000000088ULL, + 0x0000000080008009ULL, + 0x000000008000000aULL, + 0x000000008000808bULL, + 0x800000000000008bULL, + 0x8000000000008089ULL, + 0x8000000000008003ULL, + 0x8000000000008002ULL, + 0x8000000000000080ULL, + 0x000000000000800aULL, + 0x800000008000000aULL, + 0x8000000080008081ULL, + 0x8000000000008080ULL, + 0x0000000080000001ULL, + 0x8000000080008008ULL }; + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_Initialize(void *state) +{ + memset(state, 0, 200); +#ifdef KeccakP1600_useLaneComplementing + ((UINT64*)state)[ 1] = ~(UINT64)0; + ((UINT64*)state)[ 2] = ~(UINT64)0; + ((UINT64*)state)[ 8] = ~(UINT64)0; + ((UINT64*)state)[12] = ~(UINT64)0; + ((UINT64*)state)[17] = ~(UINT64)0; + ((UINT64*)state)[20] = ~(UINT64)0; +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + UINT64 lane; + if (length == 0) + return; + if (length == 1) + lane = data[0]; + else { + lane = 0; + memcpy(&lane, data, length); + } + lane <<= offset*8; +#else + UINT64 lane = 0; + unsigned int i; + for(i=0; i>= offset*8; + for(i=0; i>= 8; + } +#endif +} + +/* ---------------------------------------------------------------- */ + +#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) +static void fromWordToBytes(UINT8 *bytes, const UINT64 word) +{ + unsigned int i; + + for(i=0; i<(64/8); i++) + bytes[i] = (word >> (8*i)) & 0xFF; +} +#endif + +void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + memcpy(data, state, laneCount*8); +#else + unsigned int i; + + for(i=0; i 1) { + ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1]; + if (laneCount > 2) { + ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2]; + if (laneCount > 8) { + ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8]; + if (laneCount > 12) { + ((UINT64*)data)[12] = ~((UINT64*)data)[12]; + if (laneCount > 17) { + ((UINT64*)data)[17] = ~((UINT64*)data)[17]; + if (laneCount > 20) { + ((UINT64*)data)[20] = ~((UINT64*)data)[20]; + } + } + } + } + } + } +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length) +{ + SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +{ + UINT64 lane = ((UINT64*)state)[lanePosition]; +#ifdef KeccakP1600_useLaneComplementing + if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) + lane = ~lane; +#endif +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + { + unsigned int i; + UINT64 lane1[1]; + lane1[0] = lane; + for(i=0; i>= offset*8; + for(i=0; i>= 8; + } +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *input, unsigned char *output, unsigned int laneCount) +{ + unsigned int i; +#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) + unsigned char temp[8]; + unsigned int j; +#endif + + for(i=0; i 1) { + ((UINT64*)output)[ 1] = ~((UINT64*)output)[ 1]; + if (laneCount > 2) { + ((UINT64*)output)[ 2] = ~((UINT64*)output)[ 2]; + if (laneCount > 8) { + ((UINT64*)output)[ 8] = ~((UINT64*)output)[ 8]; + if (laneCount > 12) { + ((UINT64*)output)[12] = ~((UINT64*)output)[12]; + if (laneCount > 17) { + ((UINT64*)output)[17] = ~((UINT64*)output)[17]; + if (laneCount > 20) { + ((UINT64*)output)[20] = ~((UINT64*)output)[20]; + } + } + } + } + } + } +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +{ + SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen) +{ + size_t originalDataByteLen = dataByteLen; + declareABCDE + #ifndef KeccakP1600_fullUnrolling + unsigned int i; + #endif + UINT64 *stateAsLanes = (UINT64*)state; + UINT64 *inDataAsLanes = (UINT64*)data; + + copyFromState(A, stateAsLanes) + while(dataByteLen >= laneCount*8) { + addInput(A, inDataAsLanes, laneCount) + rounds24 + inDataAsLanes += laneCount; + dataByteLen -= laneCount*8; + } + copyToState(stateAsLanes, A) + return originalDataByteLen - dataByteLen; +} + +/* ---------------------------------------------------------------- */ + +size_t KeccakP1600_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen) +{ + size_t originalDataByteLen = dataByteLen; + declareABCDE + #ifndef KeccakP1600_fullUnrolling + unsigned int i; + #endif + UINT64 *stateAsLanes = (UINT64*)state; + UINT64 *inDataAsLanes = (UINT64*)data; + + copyFromState(A, stateAsLanes) + while(dataByteLen >= laneCount*8) { + addInput(A, inDataAsLanes, laneCount) + rounds12 + inDataAsLanes += laneCount; + dataByteLen -= laneCount*8; + } + copyToState(stateAsLanes, A) + return originalDataByteLen - dataByteLen; +} diff --git a/src/libkeccak-64/KeccakP-1600-unrolling.macros b/src/libkeccak-64/KeccakP-1600-unrolling.macros new file mode 100644 index 0000000..197e9b0 --- /dev/null +++ b/src/libkeccak-64/KeccakP-1600-unrolling.macros @@ -0,0 +1,302 @@ +/* +Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen, +Michaël Peeters, Gilles Van Assche and Ronny Van Keer, +hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to our website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#if (defined(FullUnrolling)) +#define rounds24 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta( 0, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 1, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 2, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 3, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 4, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 5, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 6, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 7, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 8, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 9, E, A) \ + thetaRhoPiChiIotaPrepareTheta(10, A, E) \ + thetaRhoPiChiIotaPrepareTheta(11, E, A) \ + thetaRhoPiChiIotaPrepareTheta(12, A, E) \ + thetaRhoPiChiIotaPrepareTheta(13, E, A) \ + thetaRhoPiChiIotaPrepareTheta(14, A, E) \ + thetaRhoPiChiIotaPrepareTheta(15, E, A) \ + thetaRhoPiChiIotaPrepareTheta(16, A, E) \ + thetaRhoPiChiIotaPrepareTheta(17, E, A) \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#define rounds12 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(12, A, E) \ + thetaRhoPiChiIotaPrepareTheta(13, E, A) \ + thetaRhoPiChiIotaPrepareTheta(14, A, E) \ + thetaRhoPiChiIotaPrepareTheta(15, E, A) \ + thetaRhoPiChiIotaPrepareTheta(16, A, E) \ + thetaRhoPiChiIotaPrepareTheta(17, E, A) \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#define rounds6 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#define rounds4 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#elif (Unrolling == 12) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i+=12) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 3, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 4, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 5, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 6, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 7, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 8, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 9, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+10, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+11, E, A) \ + } \ + +#define rounds12 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(12, A, E) \ + thetaRhoPiChiIotaPrepareTheta(13, E, A) \ + thetaRhoPiChiIotaPrepareTheta(14, A, E) \ + thetaRhoPiChiIotaPrepareTheta(15, E, A) \ + thetaRhoPiChiIotaPrepareTheta(16, A, E) \ + thetaRhoPiChiIotaPrepareTheta(17, E, A) \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#define rounds6 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#define rounds4 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#elif (Unrolling == 6) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i+=6) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \ + } \ + +#define rounds12 \ + prepareTheta \ + for(i=12; i<24; i+=6) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \ + } \ + +#define rounds6 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#define rounds4 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#elif (Unrolling == 4) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i+=4) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + } \ + +#define rounds12 \ + prepareTheta \ + for(i=12; i<24; i+=4) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + } \ + +#define rounds6 \ + prepareTheta \ + for(i=18; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + +#define rounds4 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#elif (Unrolling == 3) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i+=3) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + copyStateVariables(A, E) \ + } \ + +#define rounds12 \ + prepareTheta \ + for(i=12; i<24; i+=3) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + copyStateVariables(A, E) \ + } \ + +#define rounds6 \ + prepareTheta \ + for(i=18; i<24; i+=3) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + copyStateVariables(A, E) \ + } \ + +#define rounds4 \ + prepareTheta \ + for(i=20; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + +#elif (Unrolling == 2) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + +#define rounds12 \ + prepareTheta \ + for(i=12; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + +#define rounds6 \ + prepareTheta \ + for(i=18; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + +#define rounds4 \ + prepareTheta \ + for(i=20; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + +#elif (Unrolling == 1) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i++) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + copyStateVariables(A, E) \ + } \ + +#define rounds12 \ + prepareTheta \ + for(i=12; i<24; i++) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + copyStateVariables(A, E) \ + } \ + +#define rounds6 \ + prepareTheta \ + for(i=18; i<24; i++) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + copyStateVariables(A, E) \ + } \ + +#define rounds4 \ + prepareTheta \ + for(i=20; i<24; i++) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + copyStateVariables(A, E) \ + } \ + +#else +#error "Unrolling is not correctly specified!" +#endif + +#define roundsN(__nrounds) \ + prepareTheta \ + i = 24 - (__nrounds); \ + if ((i&1) != 0) { \ + thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + copyStateVariables(A, E) \ + ++i; \ + } \ + for( /* empty */; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } diff --git a/src/libkeccak-64/KeccakSponge-common.h b/src/libkeccak-64/KeccakSponge-common.h new file mode 100644 index 0000000..8fb3ba1 --- /dev/null +++ b/src/libkeccak-64/KeccakSponge-common.h @@ -0,0 +1,35 @@ +/* +Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen, +Michaël Peeters, Gilles Van Assche and Ronny Van Keer, +hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to our website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _KeccakSpongeCommon_h_ +#define _KeccakSpongeCommon_h_ + +#include +#include "align.h" + +#define KCP_DeclareSpongeStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_SpongeInstanceStruct { \ + unsigned char state[size]; \ + unsigned int rate; \ + unsigned int byteIOIndex; \ + int squeezing; \ + } prefix##_SpongeInstance; + +#define KCP_DeclareSpongeFunctions(prefix) \ + int prefix##_Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen); \ + int prefix##_SpongeInitialize(prefix##_SpongeInstance *spongeInstance, unsigned int rate, unsigned int capacity); \ + int prefix##_SpongeAbsorb(prefix##_SpongeInstance *spongeInstance, const unsigned char *data, size_t dataByteLen); \ + int prefix##_SpongeAbsorbLastFewBits(prefix##_SpongeInstance *spongeInstance, unsigned char delimitedData); \ + int prefix##_SpongeSqueeze(prefix##_SpongeInstance *spongeInstance, unsigned char *data, size_t dataByteLen); + +#endif diff --git a/src/libkeccak-64/KeccakSponge.inc b/src/libkeccak-64/KeccakSponge.inc new file mode 100644 index 0000000..f6c59ce --- /dev/null +++ b/src/libkeccak-64/KeccakSponge.inc @@ -0,0 +1,311 @@ +/* +Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen, +Michaël Peeters, Gilles Van Assche and Ronny Van Keer, +hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to our website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define JOIN0(a, b) a ## b +#define JOIN(a, b) JOIN0(a, b) + +#define Sponge JOIN(prefix, _Sponge) +#define SpongeInstance JOIN(prefix, _SpongeInstance) +#define SpongeInitialize JOIN(prefix, _SpongeInitialize) +#define SpongeAbsorb JOIN(prefix, _SpongeAbsorb) +#define SpongeAbsorbLastFewBits JOIN(prefix, _SpongeAbsorbLastFewBits) +#define SpongeSqueeze JOIN(prefix, _SpongeSqueeze) + +#define SnP_stateSizeInBytes JOIN(SnP, _stateSizeInBytes) +#define SnP_stateAlignment JOIN(SnP, _stateAlignment) +#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize) +#define SnP_Initialize JOIN(SnP, _Initialize) +#define SnP_AddByte JOIN(SnP, _AddByte) +#define SnP_AddBytes JOIN(SnP, _AddBytes) +#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes) + +int Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen) +{ + ALIGN(SnP_stateAlignment) unsigned char state[SnP_stateSizeInBytes]; + unsigned int partialBlock; + const unsigned char *curInput = input; + unsigned char *curOutput = output; + unsigned int rateInBytes = rate/8; + + if (rate+capacity != SnP_width) + return 1; + if ((rate <= 0) || (rate > SnP_width) || ((rate % 8) != 0)) + return 1; + if (suffix == 0) + return 1; + + /* Initialize the state */ + SnP_StaticInitialize(); + SnP_Initialize(state); + + /* First, absorb whole blocks */ +#ifdef SnP_FastLoop_Absorb + if (((rateInBytes % (SnP_width/200)) == 0) && (inputByteLen >= rateInBytes)) { + /* fast lane: whole lane rate */ + size_t j; + j = SnP_FastLoop_Absorb(state, rateInBytes/(SnP_width/200), curInput, inputByteLen); + curInput += j; + inputByteLen -= j; + } +#endif + while(inputByteLen >= (size_t)rateInBytes) { + #ifdef KeccakReference + displayBytes(1, "Block to be absorbed", curInput, rateInBytes); + #endif + SnP_AddBytes(state, curInput, 0, rateInBytes); + SnP_Permute(state); + curInput += rateInBytes; + inputByteLen -= rateInBytes; + } + + /* Then, absorb what remains */ + partialBlock = (unsigned int)inputByteLen; + #ifdef KeccakReference + displayBytes(1, "Block to be absorbed (part)", curInput, partialBlock); + #endif + SnP_AddBytes(state, curInput, 0, partialBlock); + + /* Finally, absorb the suffix */ + #ifdef KeccakReference + { + unsigned char delimitedData1[1]; + delimitedData1[0] = suffix; + displayBytes(1, "Block to be absorbed (last few bits + first bit of padding)", delimitedData1, 1); + } + #endif + /* Last few bits, whose delimiter coincides with first bit of padding */ + SnP_AddByte(state, suffix, partialBlock); + /* If the first bit of padding is at position rate-1, we need a whole new block for the second bit of padding */ + if ((suffix >= 0x80) && (partialBlock == (rateInBytes-1))) + SnP_Permute(state); + /* Second bit of padding */ + SnP_AddByte(state, 0x80, rateInBytes-1); + #ifdef KeccakReference + { + unsigned char block[SnP_width/8]; + memset(block, 0, SnP_width/8); + block[rateInBytes-1] = 0x80; + displayBytes(1, "Second bit of padding", block, rateInBytes); + } + #endif + SnP_Permute(state); + #ifdef KeccakReference + displayText(1, "--- Switching to squeezing phase ---"); + #endif + + /* First, output whole blocks */ + while(outputByteLen > (size_t)rateInBytes) { + SnP_ExtractBytes(state, curOutput, 0, rateInBytes); + SnP_Permute(state); + #ifdef KeccakReference + displayBytes(1, "Squeezed block", curOutput, rateInBytes); + #endif + curOutput += rateInBytes; + outputByteLen -= rateInBytes; + } + + /* Finally, output what remains */ + partialBlock = (unsigned int)outputByteLen; + SnP_ExtractBytes(state, curOutput, 0, partialBlock); + #ifdef KeccakReference + displayBytes(1, "Squeezed block (part)", curOutput, partialBlock); + #endif + + return 0; +} + +/* ---------------------------------------------------------------- */ +/* ---------------------------------------------------------------- */ +/* ---------------------------------------------------------------- */ + +int SpongeInitialize(SpongeInstance *instance, unsigned int rate, unsigned int capacity) +{ + if (rate+capacity != SnP_width) + return 1; + if ((rate <= 0) || (rate > SnP_width) || ((rate % 8) != 0)) + return 1; + SnP_StaticInitialize(); + SnP_Initialize(instance->state); + instance->rate = rate; + instance->byteIOIndex = 0; + instance->squeezing = 0; + + return 0; +} + +/* ---------------------------------------------------------------- */ + +int SpongeAbsorb(SpongeInstance *instance, const unsigned char *data, size_t dataByteLen) +{ + size_t i, j; + unsigned int partialBlock; + const unsigned char *curData; + unsigned int rateInBytes = instance->rate/8; + + if (instance->squeezing) + return 1; /* Too late for additional input */ + + i = 0; + curData = data; + while(i < dataByteLen) { + if ((instance->byteIOIndex == 0) && (dataByteLen >= (i + rateInBytes))) { +#ifdef SnP_FastLoop_Absorb + /* processing full blocks first */ + if ((rateInBytes % (SnP_width/200)) == 0) { + /* fast lane: whole lane rate */ + j = SnP_FastLoop_Absorb(instance->state, rateInBytes/(SnP_width/200), curData, dataByteLen - i); + i += j; + curData += j; + } + else { +#endif + for(j=dataByteLen-i; j>=rateInBytes; j-=rateInBytes) { + #ifdef KeccakReference + displayBytes(1, "Block to be absorbed", curData, rateInBytes); + #endif + SnP_AddBytes(instance->state, curData, 0, rateInBytes); + SnP_Permute(instance->state); + curData+=rateInBytes; + } + i = dataByteLen - j; +#ifdef SnP_FastLoop_Absorb + } +#endif + } + else { + /* normal lane: using the message queue */ + partialBlock = (unsigned int)(dataByteLen - i); + if (partialBlock+instance->byteIOIndex > rateInBytes) + partialBlock = rateInBytes-instance->byteIOIndex; + #ifdef KeccakReference + displayBytes(1, "Block to be absorbed (part)", curData, partialBlock); + #endif + i += partialBlock; + + SnP_AddBytes(instance->state, curData, instance->byteIOIndex, partialBlock); + curData += partialBlock; + instance->byteIOIndex += partialBlock; + if (instance->byteIOIndex == rateInBytes) { + SnP_Permute(instance->state); + instance->byteIOIndex = 0; + } + } + } + return 0; +} + +/* ---------------------------------------------------------------- */ + +int SpongeAbsorbLastFewBits(SpongeInstance *instance, unsigned char delimitedData) +{ + unsigned int rateInBytes = instance->rate/8; + + if (delimitedData == 0) + return 1; + if (instance->squeezing) + return 1; /* Too late for additional input */ + + #ifdef KeccakReference + { + unsigned char delimitedData1[1]; + delimitedData1[0] = delimitedData; + displayBytes(1, "Block to be absorbed (last few bits + first bit of padding)", delimitedData1, 1); + } + #endif + /* Last few bits, whose delimiter coincides with first bit of padding */ + SnP_AddByte(instance->state, delimitedData, instance->byteIOIndex); + /* If the first bit of padding is at position rate-1, we need a whole new block for the second bit of padding */ + if ((delimitedData >= 0x80) && (instance->byteIOIndex == (rateInBytes-1))) + SnP_Permute(instance->state); + /* Second bit of padding */ + SnP_AddByte(instance->state, 0x80, rateInBytes-1); + #ifdef KeccakReference + { + unsigned char block[SnP_width/8]; + memset(block, 0, SnP_width/8); + block[rateInBytes-1] = 0x80; + displayBytes(1, "Second bit of padding", block, rateInBytes); + } + #endif + SnP_Permute(instance->state); + instance->byteIOIndex = 0; + instance->squeezing = 1; + #ifdef KeccakReference + displayText(1, "--- Switching to squeezing phase ---"); + #endif + return 0; +} + +/* ---------------------------------------------------------------- */ + +int SpongeSqueeze(SpongeInstance *instance, unsigned char *data, size_t dataByteLen) +{ + size_t i, j; + unsigned int partialBlock; + unsigned int rateInBytes = instance->rate/8; + unsigned char *curData; + + if (!instance->squeezing) + SpongeAbsorbLastFewBits(instance, 0x01); + + i = 0; + curData = data; + while(i < dataByteLen) { + if ((instance->byteIOIndex == rateInBytes) && (dataByteLen >= (i + rateInBytes))) { + for(j=dataByteLen-i; j>=rateInBytes; j-=rateInBytes) { + SnP_Permute(instance->state); + SnP_ExtractBytes(instance->state, curData, 0, rateInBytes); + #ifdef KeccakReference + displayBytes(1, "Squeezed block", curData, rateInBytes); + #endif + curData+=rateInBytes; + } + i = dataByteLen - j; + } + else { + /* normal lane: using the message queue */ + if (instance->byteIOIndex == rateInBytes) { + SnP_Permute(instance->state); + instance->byteIOIndex = 0; + } + partialBlock = (unsigned int)(dataByteLen - i); + if (partialBlock+instance->byteIOIndex > rateInBytes) + partialBlock = rateInBytes-instance->byteIOIndex; + i += partialBlock; + + SnP_ExtractBytes(instance->state, curData, instance->byteIOIndex, partialBlock); + #ifdef KeccakReference + displayBytes(1, "Squeezed block (part)", curData, partialBlock); + #endif + curData += partialBlock; + instance->byteIOIndex += partialBlock; + } + } + return 0; +} + +/* ---------------------------------------------------------------- */ + +#undef Sponge +#undef SpongeInstance +#undef SpongeInitialize +#undef SpongeAbsorb +#undef SpongeAbsorbLastFewBits +#undef SpongeSqueeze +#undef SnP_stateSizeInBytes +#undef SnP_stateAlignment +#undef SnP_StaticInitialize +#undef SnP_Initialize +#undef SnP_AddByte +#undef SnP_AddBytes +#undef SnP_ExtractBytes diff --git a/src/libkeccak-64/KeccakSpongeWidth1600.c b/src/libkeccak-64/KeccakSpongeWidth1600.c new file mode 100644 index 0000000..672ec36 --- /dev/null +++ b/src/libkeccak-64/KeccakSpongeWidth1600.c @@ -0,0 +1,54 @@ +/* +Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen, +Michaël Peeters, Gilles Van Assche and Ronny Van Keer, +hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to our website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include "KeccakSpongeWidth1600.h" + +#ifdef KeccakReference + #include "displayIntermediateValues.h" +#endif + +#ifndef KeccakP1600_excluded + #include "KeccakP-1600-SnP.h" + + #define prefix KeccakWidth1600 + #define SnP KeccakP1600 + #define SnP_width 1600 + #define SnP_Permute KeccakP1600_Permute_24rounds + #if defined(KeccakF1600_FastLoop_supported) + #define SnP_FastLoop_Absorb KeccakF1600_FastLoop_Absorb + #endif + #include "KeccakSponge.inc" + #undef prefix + #undef SnP + #undef SnP_width + #undef SnP_Permute + #undef SnP_FastLoop_Absorb +#endif + +#ifndef KeccakP1600_excluded + #include "KeccakP-1600-SnP.h" + + #define prefix KeccakWidth1600_12rounds + #define SnP KeccakP1600 + #define SnP_width 1600 + #define SnP_Permute KeccakP1600_Permute_12rounds + #if defined(KeccakP1600_12rounds_FastLoop_supported) + #define SnP_FastLoop_Absorb KeccakP1600_12rounds_FastLoop_Absorb + #endif + #include "KeccakSponge.inc" + #undef prefix + #undef SnP + #undef SnP_width + #undef SnP_Permute + #undef SnP_FastLoop_Absorb +#endif diff --git a/src/libkeccak-64/KeccakSpongeWidth1600.h b/src/libkeccak-64/KeccakSpongeWidth1600.h new file mode 100644 index 0000000..1558256 --- /dev/null +++ b/src/libkeccak-64/KeccakSpongeWidth1600.h @@ -0,0 +1,31 @@ +/* +Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen, +Michaël Peeters, Gilles Van Assche and Ronny Van Keer, +hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to our website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _KeccakSpongeWidth1600_h_ +#define _KeccakSpongeWidth1600_h_ + +#include "KeccakSponge-common.h" + +#ifndef KeccakP1600_excluded + #include "KeccakP-1600-SnP.h" + KCP_DeclareSpongeStructure(KeccakWidth1600, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment) + KCP_DeclareSpongeFunctions(KeccakWidth1600) +#endif + +#ifndef KeccakP1600_excluded + #include "KeccakP-1600-SnP.h" + KCP_DeclareSpongeStructure(KeccakWidth1600_12rounds, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment) + KCP_DeclareSpongeFunctions(KeccakWidth1600_12rounds) +#endif + +#endif diff --git a/src/libkeccak-64/SnP-Relaned.h b/src/libkeccak-64/SnP-Relaned.h new file mode 100644 index 0000000..8dbf3d2 --- /dev/null +++ b/src/libkeccak-64/SnP-Relaned.h @@ -0,0 +1,140 @@ +/* +Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen, +Michaël Peeters, Gilles Van Assche and Ronny Van Keer, +hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to our website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ + +--- + +This file contains macros that help implement a permutation in a SnP-compatible way. +It converts an implementation that implement state input/output functions +in a lane-oriented fashion (i.e., using SnP_AddLanes() and SnP_AddBytesInLane, +and similarly for Overwite, Extract and ExtractAndAdd) to the byte-oriented SnP. +Please refer to SnP-documentation.h for more details. +*/ + +#ifndef _SnP_Relaned_h_ +#define _SnP_Relaned_h_ + +#define SnP_AddBytes(state, data, offset, length, SnP_AddLanes, SnP_AddBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_AddLanes(state, data, (length)/SnP_laneLengthInBytes); \ + SnP_AddBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + const unsigned char *_curData = (data); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_AddBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curData += _bytesInLane; \ + } \ + } \ + } + +#define SnP_OverwriteBytes(state, data, offset, length, SnP_OverwriteLanes, SnP_OverwriteBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_OverwriteLanes(state, data, (length)/SnP_laneLengthInBytes); \ + SnP_OverwriteBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + const unsigned char *_curData = (data); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_OverwriteBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curData += _bytesInLane; \ + } \ + } \ + } + +#define SnP_ExtractBytes(state, data, offset, length, SnP_ExtractLanes, SnP_ExtractBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_ExtractLanes(state, data, (length)/SnP_laneLengthInBytes); \ + SnP_ExtractBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + unsigned char *_curData = (data); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_ExtractBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curData += _bytesInLane; \ + } \ + } \ + } + +#define SnP_ExtractAndAddBytes(state, input, output, offset, length, SnP_ExtractAndAddLanes, SnP_ExtractAndAddBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_ExtractAndAddLanes(state, input, output, (length)/SnP_laneLengthInBytes); \ + SnP_ExtractAndAddBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (input)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + (output)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + const unsigned char *_curInput = (input); \ + unsigned char *_curOutput = (output); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_ExtractAndAddBytesInLane(state, _lanePosition, _curInput, _curOutput, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curInput += _bytesInLane; \ + _curOutput += _bytesInLane; \ + } \ + } \ + } + +#endif diff --git a/src/libkeccak-64/align.h b/src/libkeccak-64/align.h new file mode 100644 index 0000000..90c1b37 --- /dev/null +++ b/src/libkeccak-64/align.h @@ -0,0 +1,32 @@ +/* +Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen, +Michaël Peeters, Gilles Van Assche and Ronny Van Keer, +hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to our website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _align_h_ +#define _align_h_ + +/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */ +#ifdef ALIGN +#undef ALIGN +#endif + +#if defined(__GNUC__) +#define ALIGN(x) __attribute__ ((aligned(x))) +#elif defined(_MSC_VER) +#define ALIGN(x) __declspec(align(x)) +#elif defined(__ARMCC_VERSION) +#define ALIGN(x) __align(x) +#else +#define ALIGN(x) +#endif + +#endif diff --git a/src/libkeccak-64/brg_endian.h b/src/libkeccak-64/brg_endian.h new file mode 100644 index 0000000..7c640b9 --- /dev/null +++ b/src/libkeccak-64/brg_endian.h @@ -0,0 +1,143 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The redistribution and use of this software (with or without changes) + is allowed without the payment of fees or royalties provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + Changes for ARM 9/9/2010 +*/ + +#ifndef _BRG_ENDIAN_H +#define _BRG_ENDIAN_H + +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ + +#if 0 +/* Include files where endian defines and byteswap functions may reside */ +#if defined( __sun ) +# include +#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) +# include +#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ + defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) +# include +#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) +# if !defined( __MINGW32__ ) && !defined( _AIX ) +# include +# if !defined( __BEOS__ ) +# include +# endif +# endif +#endif +#endif + +/* Now attempt to set the define for platform byte order using any */ +/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ +/* seem to encompass most endian symbol definitions */ + +#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) +# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) +# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( _BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( _LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) +# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) +# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +/* if the platform byte order could not be determined, then try to */ +/* set this define using common machine defines */ +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) || defined( _M_X64 ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ + defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \ + defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + +#elif defined(__arm__) +# ifdef __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# else +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif 1 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#else +# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order +#endif + +#endif + +#endif diff --git a/src/libkeccak/KeccakP-1600-reference.c b/src/libkeccak/KeccakP-1600-reference.c deleted file mode 100644 index d28f7f9..0000000 --- a/src/libkeccak/KeccakP-1600-reference.c +++ /dev/null @@ -1,397 +0,0 @@ -/* -Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, -Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby -denoted as "the implementer". - -For more information, feedback or questions, please refer to our websites: -http://keccak.noekeon.org/ -http://keyak.noekeon.org/ -http://ketje.noekeon.org/ - -To the extent possible under law, the implementer has waived all copyright -and related or neighboring rights to the source code in this file. -http://creativecommons.org/publicdomain/zero/1.0/ -*/ - -#include -#include -#include -#include -#include "brg_endian.h" -#ifdef KeccakReference -#include "displayIntermediateValues.h" -#endif - -typedef unsigned char UINT8; -typedef unsigned long long UINT64; -typedef UINT64 tKeccakLane; - -#define maxNrRounds 24 -#define nrLanes 25 -#define index(x, y) (((x)%5)+5*((y)%5)) - -#ifdef KeccakReference - -static tKeccakLane KeccakRoundConstants[maxNrRounds]; -static unsigned int KeccakRhoOffsets[nrLanes]; - -/* ---------------------------------------------------------------- */ - -void KeccakP1600_InitializeRoundConstants(void); -void KeccakP1600_InitializeRhoOffsets(void); -static int LFSR86540(UINT8 *LFSR); - -void KeccakP1600_StaticInitialize(void) -{ - if (sizeof(tKeccakLane) != 8) { - printf("tKeccakLane should be 64-bit wide\n"); - abort(); - } - KeccakP1600_InitializeRoundConstants(); - KeccakP1600_InitializeRhoOffsets(); -} - -void KeccakP1600_InitializeRoundConstants(void) -{ - UINT8 LFSRstate = 0x01; - unsigned int i, j, bitPosition; - - for(i=0; i> (8*j)) & 0xFF); -} - -void KeccakP1600OnWords(tKeccakLane *state, unsigned int nrRounds) -{ - unsigned int i; - -#ifdef KeccakReference - displayStateAsLanes(3, "Same, with lanes as 64-bit words", state, 1600); -#endif - - for(i=(maxNrRounds-nrRounds); i> (64-offset))) : a) - -static void theta(tKeccakLane *A) -{ - unsigned int x, y; - tKeccakLane C[5], D[5]; - - for(x=0; x<5; x++) { - C[x] = 0; - for(y=0; y<5; y++) - C[x] ^= A[index(x, y)]; - } - for(x=0; x<5; x++) - D[x] = ROL64(C[(x+1)%5], 1) ^ C[(x+4)%5]; - for(x=0; x<5; x++) - for(y=0; y<5; y++) - A[index(x, y)] ^= D[x]; -} - -static void rho(tKeccakLane *A) -{ - unsigned int x, y; - - for(x=0; x<5; x++) for(y=0; y<5; y++) - A[index(x, y)] = ROL64(A[index(x, y)], KeccakRhoOffsets[index(x, y)]); -} - -static void pi(tKeccakLane *A) -{ - unsigned int x, y; - tKeccakLane tempA[25]; - - for(x=0; x<5; x++) for(y=0; y<5; y++) - tempA[index(x, y)] = A[index(x, y)]; - for(x=0; x<5; x++) for(y=0; y<5; y++) - A[index(0*x+1*y, 2*x+3*y)] = tempA[index(x, y)]; -} - -static void chi(tKeccakLane *A) -{ - unsigned int x, y; - tKeccakLane C[5]; - - for(y=0; y<5; y++) { - for(x=0; x<5; x++) - C[x] = A[index(x, y)] ^ ((~A[index(x+1, y)]) & A[index(x+2, y)]); - for(x=0; x<5; x++) - A[index(x, y)] = C[x]; - } -} - -static void iota(tKeccakLane *A, unsigned int indexRound) -{ - A[index(0, 0)] ^= KeccakRoundConstants[indexRound]; -} - -/* ---------------------------------------------------------------- */ - -void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length) -{ - assert(offset < 200); - assert(offset+length <= 200); - memcpy(data, (unsigned char*)state+offset, length); -} - -/* ---------------------------------------------------------------- */ - -void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) -{ - unsigned int i; - - assert(offset < 200); - assert(offset+length <= 200); - for(i=0; i> 32)); - fprintf(f, "%08X", (unsigned int)(KeccakRoundConstants[i] & 0xFFFFFFFFULL)); - fprintf(f, "\n"); - } - fprintf(f, "\n"); -} - -void KeccakP1600_DisplayRhoOffsets(FILE *f) -{ - unsigned int x, y; - - for(y=0; y<5; y++) for(x=0; x<5; x++) { - fprintf(f, "RhoOffset[%i][%i] = ", x, y); - fprintf(f, "%2i", KeccakRhoOffsets[index(x, y)]); - fprintf(f, "\n"); - } - fprintf(f, "\n"); -} diff --git a/src/libkeccak/KeccakP-1600-reference.h b/src/libkeccak/KeccakP-1600-reference.h deleted file mode 100644 index 6dc8dea..0000000 --- a/src/libkeccak/KeccakP-1600-reference.h +++ /dev/null @@ -1,22 +0,0 @@ -/* -Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, -Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby -denoted as "the implementer". - -For more information, feedback or questions, please refer to our websites: -http://keccak.noekeon.org/ -http://keyak.noekeon.org/ -http://ketje.noekeon.org/ - -To the extent possible under law, the implementer has waived all copyright -and related or neighboring rights to the source code in this file. -http://creativecommons.org/publicdomain/zero/1.0/ -*/ - -#ifndef _KeccakP_1600_reference_h_ -#define _KeccakP_1600_reference_h_ - -void KeccakP1600_DisplayRoundConstants(FILE *f); -void KeccakP1600_DisplayRhoOffsets(FILE *f); - -#endif diff --git a/src/libkeccak/KeccakSponge.c b/src/libkeccak/KeccakSponge.c deleted file mode 100644 index 08d4a19..0000000 --- a/src/libkeccak/KeccakSponge.c +++ /dev/null @@ -1,110 +0,0 @@ -/* -Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, -Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby -denoted as "the implementer". - -For more information, feedback or questions, please refer to our websites: -http://keccak.noekeon.org/ -http://keyak.noekeon.org/ -http://ketje.noekeon.org/ - -To the extent possible under law, the implementer has waived all copyright -and related or neighboring rights to the source code in this file. -http://creativecommons.org/publicdomain/zero/1.0/ -*/ - -#include "KeccakSponge.h" - -#ifdef KeccakReference - #include "displayIntermediateValues.h" -#endif - -#ifndef KeccakP200_excluded - #include "KeccakP-200-SnP.h" - - #define prefix KeccakWidth200 - #define SnP KeccakP200 - #define SnP_width 200 - #define SnP_Permute KeccakP200_Permute_18rounds - #if defined(KeccakF200_FastLoop_supported) - #define SnP_FastLoop_Absorb KeccakF200_FastLoop_Absorb - #endif - #include "KeccakSponge.inc" - #undef prefix - #undef SnP - #undef SnP_width - #undef SnP_Permute - #undef SnP_FastLoop_Absorb -#endif - -#ifndef KeccakP400_excluded - #include "KeccakP-400-SnP.h" - - #define prefix KeccakWidth400 - #define SnP KeccakP400 - #define SnP_width 400 - #define SnP_Permute KeccakP400_Permute_20rounds - #if defined(KeccakF400_FastLoop_supported) - #define SnP_FastLoop_Absorb KeccakF400_FastLoop_Absorb - #endif - #include "KeccakSponge.inc" - #undef prefix - #undef SnP - #undef SnP_width - #undef SnP_Permute - #undef SnP_FastLoop_Absorb -#endif - -#ifndef KeccakP800_excluded - #include "KeccakP-800-SnP.h" - - #define prefix KeccakWidth800 - #define SnP KeccakP800 - #define SnP_width 800 - #define SnP_Permute KeccakP800_Permute_22rounds - #if defined(KeccakF800_FastLoop_supported) - #define SnP_FastLoop_Absorb KeccakF800_FastLoop_Absorb - #endif - #include "KeccakSponge.inc" - #undef prefix - #undef SnP - #undef SnP_width - #undef SnP_Permute - #undef SnP_FastLoop_Absorb -#endif - -#ifndef KeccakP1600_excluded - #include "KeccakP-1600-SnP.h" - - #define prefix KeccakWidth1600 - #define SnP KeccakP1600 - #define SnP_width 1600 - #define SnP_Permute KeccakP1600_Permute_24rounds - #if defined(KeccakF1600_FastLoop_supported) - #define SnP_FastLoop_Absorb KeccakF1600_FastLoop_Absorb - #endif - #include "KeccakSponge.inc" - #undef prefix - #undef SnP - #undef SnP_width - #undef SnP_Permute - #undef SnP_FastLoop_Absorb -#endif - -#ifndef KeccakP1600_excluded - #include "KeccakP-1600-SnP.h" - - #define prefix KeccakWidth1600_12rounds - #define SnP KeccakP1600 - #define SnP_width 1600 - #define SnP_Permute KeccakP1600_Permute_12rounds - #if defined(KeccakP1600_12rounds_FastLoop_supported) - #define SnP_FastLoop_Absorb KeccakP1600_12rounds_FastLoop_Absorb - #endif - #include "KeccakSponge.inc" - #undef prefix - #undef SnP - #undef SnP_width - #undef SnP_Permute - #undef SnP_FastLoop_Absorb -#endif diff --git a/src/libkeccak/KeccakSponge.h b/src/libkeccak/KeccakSponge.h deleted file mode 100644 index a8526fe..0000000 --- a/src/libkeccak/KeccakSponge.h +++ /dev/null @@ -1,178 +0,0 @@ -/* -Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, -Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby -denoted as "the implementer". - -For more information, feedback or questions, please refer to our websites: -http://keccak.noekeon.org/ -http://keyak.noekeon.org/ -http://ketje.noekeon.org/ - -To the extent possible under law, the implementer has waived all copyright -and related or neighboring rights to the source code in this file. -http://creativecommons.org/publicdomain/zero/1.0/ -*/ - -#ifndef _KeccakSponge_h_ -#define _KeccakSponge_h_ - -/** General information - * - * The following type and functions are not actually implemented. Their - * documentation is generic, with the prefix Prefix replaced by - * - KeccakWidth200 for a sponge function based on Keccak-f[200] - * - KeccakWidth400 for a sponge function based on Keccak-f[400] - * - KeccakWidth800 for a sponge function based on Keccak-f[800] - * - KeccakWidth1600 for a sponge function based on Keccak-f[1600] - * - * In all these functions, the rate and capacity must sum to the width of the - * chosen permutation. For instance, to use the sponge function - * Keccak[r=1344, c=256], one must use KeccakWidth1600_Sponge() or a combination - * of KeccakWidth1600_SpongeInitialize(), KeccakWidth1600_SpongeAbsorb(), - * KeccakWidth1600_SpongeAbsorbLastFewBits() and - * KeccakWidth1600_SpongeSqueeze(). - * - * The Prefix_SpongeInstance contains the sponge instance attributes for use - * with the Prefix_Sponge* functions. - * It gathers the state processed by the permutation as well as the rate, - * the position of input/output bytes in the state and the phase - * (absorbing or squeezing). - */ - -#ifdef DontReallyInclude_DocumentationOnly -/** Function to evaluate the sponge function Keccak[r, c] in a single call. - * @param rate The value of the rate r. - * @param capacity The value of the capacity c. - * @param input Pointer to the input message (before the suffix). - * @param inputByteLen The length of the input message in bytes. - * @param suffix Byte containing from 0 to 7 suffix bits - * that must be absorbed after @a input. - * These n bits must be in the least significant bit positions. - * These bits must be delimited with a bit 1 at position n - * (counting from 0=LSB to 7=MSB) and followed by bits 0 - * from position n+1 to position 7. - * Some examples: - * - If no bits are to be absorbed, then @a suffix must be 0x01. - * - If the 2-bit sequence 0,0 is to be absorbed, @a suffix must be 0x04. - * - If the 5-bit sequence 0,1,0,0,1 is to be absorbed, @a suffix must be 0x32. - * - If the 7-bit sequence 1,1,0,1,0,0,0 is to be absorbed, @a suffix must be 0x8B. - * . - * @param output Pointer to the output buffer. - * @param outputByteLen The desired number of output bytes. - * @pre One must have r+c equal to the supported width of this implementation - * and the rate a multiple of 8 bits (one byte) in this implementation. - * @pre @a suffix ≠ 0x00 - * @return Zero if successful, 1 otherwise. - */ -int Prefix_Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen); - -/** - * Function to initialize the state of the Keccak[r, c] sponge function. - * The phase of the sponge function is set to absorbing. - * @param spongeInstance Pointer to the sponge instance to be initialized. - * @param rate The value of the rate r. - * @param capacity The value of the capacity c. - * @pre One must have r+c equal to the supported width of this implementation - * and the rate a multiple of 8 bits (one byte) in this implementation. - * @return Zero if successful, 1 otherwise. - */ -int Prefix_SpongeInitialize(Prefix_SpongeInstance *spongeInstance, unsigned int rate, unsigned int capacity); - -/** - * Function to give input data bytes for the sponge function to absorb. - * @param spongeInstance Pointer to the sponge instance initialized by Prefix_SpongeInitialize(). - * @param data Pointer to the input data. - * @param dataByteLen The number of input bytes provided in the input data. - * @pre The sponge function must be in the absorbing phase, - * i.e., Prefix_SpongeSqueeze() or Prefix_SpongeAbsorbLastFewBits() - * must not have been called before. - * @return Zero if successful, 1 otherwise. - */ -int Prefix_SpongeAbsorb(Prefix_SpongeInstance *spongeInstance, const unsigned char *data, size_t dataByteLen); - -/** - * Function to give input data bits for the sponge function to absorb - * and then to switch to the squeezing phase. - * @param spongeInstance Pointer to the sponge instance initialized by Prefix_SpongeInitialize(). - * @param delimitedData Byte containing from 0 to 7 trailing bits - * that must be absorbed. - * These n bits must be in the least significant bit positions. - * These bits must be delimited with a bit 1 at position n - * (counting from 0=LSB to 7=MSB) and followed by bits 0 - * from position n+1 to position 7. - * Some examples: - * - If no bits are to be absorbed, then @a delimitedData must be 0x01. - * - If the 2-bit sequence 0,0 is to be absorbed, @a delimitedData must be 0x04. - * - If the 5-bit sequence 0,1,0,0,1 is to be absorbed, @a delimitedData must be 0x32. - * - If the 7-bit sequence 1,1,0,1,0,0,0 is to be absorbed, @a delimitedData must be 0x8B. - * . - * @pre The sponge function must be in the absorbing phase, - * i.e., Prefix_SpongeSqueeze() or Prefix_SpongeAbsorbLastFewBits() - * must not have been called before. - * @pre @a delimitedData ≠ 0x00 - * @return Zero if successful, 1 otherwise. - */ -int Prefix_SpongeAbsorbLastFewBits(Prefix_SpongeInstance *spongeInstance, unsigned char delimitedData); - -/** - * Function to squeeze output data from the sponge function. - * If the sponge function was in the absorbing phase, this function - * switches it to the squeezing phase - * as if Prefix_SpongeAbsorbLastFewBits(spongeInstance, 0x01) was called. - * @param spongeInstance Pointer to the sponge instance initialized by Prefix_SpongeInitialize(). - * @param data Pointer to the buffer where to store the output data. - * @param dataByteLen The number of output bytes desired. - * @return Zero if successful, 1 otherwise. - */ -int Prefix_SpongeSqueeze(Prefix_SpongeInstance *spongeInstance, unsigned char *data, size_t dataByteLen); -#endif - -#include -#include "align.h" - -#define KCP_DeclareSpongeStructure(prefix, size, alignment) \ - ALIGN(alignment) typedef struct prefix##_SpongeInstanceStruct { \ - unsigned char state[size]; \ - unsigned int rate; \ - unsigned int byteIOIndex; \ - int squeezing; \ - } prefix##_SpongeInstance; - -#define KCP_DeclareSpongeFunctions(prefix) \ - int prefix##_Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen); \ - int prefix##_SpongeInitialize(prefix##_SpongeInstance *spongeInstance, unsigned int rate, unsigned int capacity); \ - int prefix##_SpongeAbsorb(prefix##_SpongeInstance *spongeInstance, const unsigned char *data, size_t dataByteLen); \ - int prefix##_SpongeAbsorbLastFewBits(prefix##_SpongeInstance *spongeInstance, unsigned char delimitedData); \ - int prefix##_SpongeSqueeze(prefix##_SpongeInstance *spongeInstance, unsigned char *data, size_t dataByteLen); - -#ifndef KeccakP200_excluded - #include "KeccakP-200-SnP.h" - KCP_DeclareSpongeStructure(KeccakWidth200, KeccakP200_stateSizeInBytes, KeccakP200_stateAlignment) - KCP_DeclareSpongeFunctions(KeccakWidth200) -#endif - -#ifndef KeccakP400_excluded - #include "KeccakP-400-SnP.h" - KCP_DeclareSpongeStructure(KeccakWidth400, KeccakP400_stateSizeInBytes, KeccakP400_stateAlignment) - KCP_DeclareSpongeFunctions(KeccakWidth400) -#endif - -#ifndef KeccakP800_excluded - #include "KeccakP-800-SnP.h" - KCP_DeclareSpongeStructure(KeccakWidth800, KeccakP800_stateSizeInBytes, KeccakP800_stateAlignment) - KCP_DeclareSpongeFunctions(KeccakWidth800) -#endif - -#ifndef KeccakP1600_excluded - #include "KeccakP-1600-SnP.h" - KCP_DeclareSpongeStructure(KeccakWidth1600, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment) - KCP_DeclareSpongeFunctions(KeccakWidth1600) -#endif - -#ifndef KeccakP1600_excluded - #include "KeccakP-1600-SnP.h" - KCP_DeclareSpongeStructure(KeccakWidth1600_12rounds, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment) - KCP_DeclareSpongeFunctions(KeccakWidth1600_12rounds) -#endif - -#endif diff --git a/test/vectors-sha3.js b/test/vectors-sha3.js index 378e70f..0fa7d08 100644 --- a/test/vectors-sha3.js +++ b/test/vectors-sha3.js @@ -4,7 +4,7 @@ const fs = require('fs') module.exports = (name, createHash) => { for (const hash of ['sha3-224', 'sha3-256', 'sha3-384', 'sha3-512']) { - const filename = require.resolve(`../util/KeccakCodePackage/TestVectors/ShortMsgKAT_${hash.toUpperCase()}.txt`) + const filename = require.resolve(`../util/XKCP/tests/TestVectors/ShortMsgKAT_${hash.toUpperCase()}.txt`) const content = fs.readFileSync(filename, 'utf8') const lines = content.split('\n') diff --git a/test/vectors-shake.js b/test/vectors-shake.js index e7084c3..c2bb1f2 100644 --- a/test/vectors-shake.js +++ b/test/vectors-shake.js @@ -4,7 +4,7 @@ const fs = require('fs') module.exports = (name, createHash) => { for (const hash of ['shake128', 'shake256']) { - const filename = require.resolve(`../util/KeccakCodePackage/TestVectors/ShortMsgKAT_${hash.toUpperCase()}.txt`) + const filename = require.resolve(`../util/XKCP/tests/TestVectors/ShortMsgKAT_${hash.toUpperCase()}.txt`) const content = fs.readFileSync(filename, 'utf8') const lines = content.split('\n') diff --git a/util/KeccakCodePackage b/util/KeccakCodePackage deleted file mode 160000 index e39f89a..0000000 --- a/util/KeccakCodePackage +++ /dev/null @@ -1 +0,0 @@ -Subproject commit e39f89a4490f012253d67474e45fc3867495631f diff --git a/util/XKCP b/util/XKCP new file mode 160000 index 0000000..58b20ec --- /dev/null +++ b/util/XKCP @@ -0,0 +1 @@ +Subproject commit 58b20ec99f8a891913d8cf0ea350d05b6fb3ae41 diff --git a/util/libkeccak.sh b/util/libkeccak.sh deleted file mode 100755 index 0fa9dc3..0000000 --- a/util/libkeccak.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env bash -set -x - -IMPL=reference -FILES=( - "align.h" - "brg_endian.h" - "KeccakP-1600-reference.c" - "KeccakP-1600-reference.h" - "KeccakP-1600-SnP.h" - "KeccakSponge.c" - "KeccakSponge.h" - "KeccakSponge.inc" -) - -rm -rf ./src/libkeccak - -make --directory=./util/KeccakCodePackage ${IMPL}/libkeccak.a.pack -tar -xf ./util/KeccakCodePackage/bin/${IMPL}_libkeccak.a.tar.gz - -mkdir ./src/libkeccak -for FILE in "${FILES[@]}" -do - mv ./${IMPL}/libkeccak.a/${FILE} ./src/libkeccak -done - -rm -rf ./${IMPL}