diff --git a/Shoko.Commons b/Shoko.Commons
index 0a4967ad4..71ef4e076 160000
--- a/Shoko.Commons
+++ b/Shoko.Commons
@@ -1 +1 @@
-Subproject commit 0a4967ad46374eb9d029f19e0f9ed2423ba247b7
+Subproject commit 71ef4e076be0ae17b3e3280b5994bfa19bb687e7
diff --git a/hasher/Hasher.vcxproj b/hasher/Hasher.vcxproj
index 3b4382218..bc1875bb4 100644
--- a/hasher/Hasher.vcxproj
+++ b/hasher/Hasher.vcxproj
@@ -132,13 +132,13 @@
build assemblies
- ml.exe md4_asm.asm /c /Cx /coff && ^
-ml.exe md5_asm.asm /c /Cx /coff && ^
-ml.exe sha_asm.asm /c /Cx /coff && ^
+ ml.exe md4x86.asm /c /Cx /coff && ^
+ml.exe md5x86.asm /c /Cx /coff && ^
+ml.exe sha1x86.asm /c /Cx /coff && ^
ml.exe crc32x86.asm /c /Cx /coff
- MD4_asm.obj;MD5_asm.obj;SHA_asm.obj;crc32x86.obj;%(AdditionalDependencies)
+ MD4x86.obj;MD5x86.obj;SHA1x86.obj;crc32x86.obj;%(AdditionalDependencies)
.\Debug\hasher.dll
true
true
@@ -171,7 +171,7 @@ ml.exe crc32x86.asm /c /Cx /coff
Disabled
- WIN64;_DEBUG;_WINDOWS;_USRDLL;HASHER_EXPORTS;%(PreprocessorDefinitions)
+ WIN64;_DEBUG;_WINDOWS;_USRDLL;HASHLIB_USE_ASM;HASHER_EXPORTS;%(PreprocessorDefinitions)
EnableFastChecks
MultiThreadedDebug
Use
@@ -189,7 +189,7 @@ ml.exe crc32x86.asm /c /Cx /coff
0x0813
- crc32x64.obj;%(AdditionalDependencies)
+ crc32x64.obj;MD5x64.obj;MD4x64.obj;sha1x64.obj;%(AdditionalDependencies)
.\Debug_x64\hasher.dll
true
true
@@ -209,7 +209,7 @@ ml.exe crc32x86.asm /c /Cx /coff
- ml64.exe crc32x64.asm /c /nologo /W3 /Zi
+ ml64.exe crc32x64.asm /c /nologo /W3 /Zi && ^ml64.exe MD5x64.asm /c /nologo /W3 /Zi && ^ml64.exe MD4x64.asm /c /nologo /W3 /Zi && ^ml64.exe sha1x64.asm /c /nologo /W3 /Zi
build assemblies
@@ -249,13 +249,13 @@ ml.exe crc32x86.asm /c /Cx /coff
build assemblies
- ml.exe md4_asm.asm /c /Cx /coff && ^
-ml.exe md5_asm.asm /c /Cx /coff && ^
-ml.exe sha_asm.asm /c /Cx /coff && ^
+ ml.exe md4x86.asm /c /Cx /coff && ^
+ml.exe md5x86.asm /c /Cx /coff && ^
+ml.exe sha1x86.asm /c /Cx /coff && ^
ml.exe crc32x86.asm /c /Cx /coff
- MD4_asm.obj;MD5_asm.obj;SHA_asm.obj;crc32x86.obj;%(AdditionalDependencies)
+ MD4x86.obj;MD5x86.obj;SHA1x86.obj;crc32x86.obj;%(AdditionalDependencies)
.\Release\hasher.dll
true
.\Release/hasher.pdb
@@ -286,7 +286,7 @@ ml.exe crc32x86.asm /c /Cx /coff
Full
AnySuitable
- WIN64;NDEBUG;_WINDOWS;x64;_USRDLL;HASHER_EXPORTS;%(PreprocessorDefinitions)
+ WIN64;NDEBUG;_WINDOWS;x64;_USRDLL;HASHER_EXPORTS;HASHLIB_USE_ASM;%(PreprocessorDefinitions)
true
MultiThreaded
true
@@ -306,7 +306,7 @@ ml.exe crc32x86.asm /c /Cx /coff
0x0813
- crc32x64.obj;%(AdditionalDependencies)
+ crc32x64.obj;MD5x64.obj;MD4x64.obj;sha1x64.obj;%(AdditionalDependencies)
.\Release_x64\hasher.dll
true
.\Release_x64/hasher.pdb
@@ -325,7 +325,7 @@ ml.exe crc32x86.asm /c /Cx /coff
- ml64.exe crc32x64.asm /c /nologo /W3 /Zi
+ ml64.exe crc32x64.asm /c /nologo /W3 /Zi && ^ml64.exe MD5x64.asm /c /nologo /W3 /Zi && ^ml64.exe MD4x64.asm /c /nologo /W3 /Zi && ^ml64.exe sha1x64.asm /c /nologo /W3 /Zi
build assemblies
@@ -336,14 +336,14 @@ ml.exe crc32x86.asm /c /Cx /coff
-
+
%(PreprocessorDefinitions)
%(PreprocessorDefinitions)
%(PreprocessorDefinitions)
%(PreprocessorDefinitions)
-
+
%(PreprocessorDefinitions)
%(PreprocessorDefinitions)
@@ -367,9 +367,12 @@ ml.exe crc32x86.asm /c /Cx /coff
-
-
-
+
+
+
+
+
+
diff --git a/hasher/Hasher.vcxproj.filters b/hasher/Hasher.vcxproj.filters
index 5cb076215..6db8df8b3 100644
--- a/hasher/Hasher.vcxproj.filters
+++ b/hasher/Hasher.vcxproj.filters
@@ -27,16 +27,16 @@
Source Files
-
+
Source Files
-
+
Source Files
-
+
Source Files
-
+
Source Files
@@ -64,10 +64,13 @@
-
-
-
+
+
+
+
+
+
\ No newline at end of file
diff --git a/hasher/MD4.cpp b/hasher/MD4.cpp
index 5b69d3f8e..6a0484d77 100644
--- a/hasher/MD4.cpp
+++ b/hasher/MD4.cpp
@@ -25,9 +25,6 @@
#include "MD4.h"
-#ifdef HASHLIB_USE_ASM
-extern "C" void __stdcall MD4_Add_p5(CMD4::MD4State*, const void* pData, std::size_t nLength);
-#endif
const unsigned char hashPadding[64] = {
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -68,13 +65,55 @@ void CMD4::Finish()
Add(&bits, sizeof(bits));
}
+
+
#ifdef HASHLIB_USE_ASM
+#if defined(_WIN64) || defined(__x86_64__)
+extern "C" void __fastcall MD4_x64(const void *, const void* pData, std::size_t nLength);
+#else
+extern "C" void __stdcall MD4_Add_p5(CMD4::MD4State*, const void* pData, std::size_t nLength);
+#endif
+
void CMD4::Add(const void* pData, std::size_t nLength)
{
+#if defined(_WIN64) || defined(__x86_64__)
+ // Update number of bytes
+ const char* input = static_cast< const char* >(pData);
+ {
+ uint32 index = static_cast< uint32 >(m_State.m_nCount % m_State.blockSize);
+ m_State.m_nCount += nLength;
+ if (index)
+ {
+ // buffer has some data already - lets fill it
+ // before doing the rest of the transformation on the original data
+ if (index + nLength < m_State.blockSize)
+ {
+ std::memcpy(m_State.m_oBuffer + index, input, nLength);
+ return;
+ }
+ std::memcpy(m_State.m_oBuffer + index, input, m_State.blockSize - index);
+ nLength -= m_State.blockSize - index;
+ input += m_State.blockSize - index;
+ MD4_x64(&(m_State.m_nState[0]), m_State.m_oBuffer, 1);
+ }
+ }
+ // Transform as many times as possible using the original data stream
+ const char* const end = input + nLength - nLength % m_State.blockSize;
+ size_t abs = nLength / m_State.blockSize;
+ MD4_x64(&(m_State.m_nState[0]), input, abs);
+ abs *= m_State.blockSize;
+ input += abs;
+ nLength %= m_State.blockSize;
+ // Buffer remaining input
+ if (nLength)
+ std::memcpy(m_State.m_oBuffer, input, nLength);
+#else
MD4_Add_p5(&m_State, pData, nLength);
-}
+#endif
+
+}
#else // HASHLIB_USE_ASM
namespace
@@ -189,13 +228,12 @@ void CMD4::Transform(const uint32* data)
m_State.m_nState[2] += c;
m_State.m_nState[3] += d;
}
-
void CMD4::Add(const void* pData, std::size_t nLength)
{
// Update number of bytes
- const char* input = static_cast< const char* >(pData);
+ const char* input = static_cast(pData);
{
- uint32 index = static_cast< uint32 >(m_State.m_nCount % m_State.blockSize);
+ uint32 index = static_cast(m_State.m_nCount % m_State.blockSize);
m_State.m_nCount += nLength;
if (index)
{
@@ -209,19 +247,20 @@ void CMD4::Add(const void* pData, std::size_t nLength)
std::memcpy(m_State.m_oBuffer + index, input, m_State.blockSize - index);
nLength -= m_State.blockSize - index;
input += m_State.blockSize - index;
- Transform(reinterpret_cast< const uint32* >(m_State.m_oBuffer));
+ Transform(reinterpret_cast(m_State.m_oBuffer));
}
}
// Transform as many times as possible using the original data stream
const char* const end = input + nLength - nLength % m_State.blockSize;
nLength %= m_State.blockSize;
for (; input != end; input += m_State.blockSize)
- Transform(reinterpret_cast< const uint32* >(input));
+ Transform(reinterpret_cast(input));
// Buffer remaining input
if (nLength)
std::memcpy(m_State.m_oBuffer, input, nLength);
}
+
#endif // HASHLIB_USE_ASM
//
diff --git a/hasher/MD4x64.asm b/hasher/MD4x64.asm
new file mode 100644
index 000000000..2d6c48f09
--- /dev/null
+++ b/hasher/MD4x64.asm
@@ -0,0 +1,535 @@
+.code
+
+
+MD4_x64 PROC
+ push rbp
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+ push rsi
+ push rdi
+; parameter 1 in rcx, param 2 in rdx , param 3 in r8
+
+; see http://weblogs.asp.net/oldnewthing/archive/2004/01/14/58579.aspx and
+; http://msdn.microsoft.com/library/en-us/kmarch/hh/kmarch/64bitAMD_8e951dd2-ee77-4728-8702-55ce4b5dd24a.xml.asp
+;
+; All registers must be preserved across the call, except for
+; rax, rcx, rdx, r8, r-9, r10, and r11, which are scratch.
+
+ ;# rdi = arg #1 (ctx, MD5_CTX pointer)
+ ;# rsi = arg #2 (ptr, data pointer)
+ ;# rdx = arg #3 (nbr, number of 16-word blocks to process)
+
+ mov rsi,rdx
+ mov edx,r8d
+
+ mov r12,rcx ;# rbp = ctx
+ shl rdx,6 ;# rdx = nbr in bytes
+ push r12
+ lea rdi,[rsi+rdx]; # rdi = end
+
+ mov eax,DWORD PTR 0[r12] ;# eax = ctx->A
+ mov ebx,DWORD PTR 4[r12] ;# ebx = ctx->B
+ mov ecx,DWORD PTR 8[r12] ;# ecx = ctx->C
+ mov edx,DWORD PTR 12[r12] ;# edx = ctx->D
+ ;push rbp ;# save ctx
+ ;# end is 'rdi'
+ ;# ptr is 'rsi'
+ ;# A is 'eax'
+ ;# B is 'ebx'
+ ;# C is 'ecx'
+ ;# D is 'edx'
+
+; it is better with align 16 here, I don't known why
+align 16
+ cmp rsi,rdi ;# cmp end with ptr
+ mov r13d,0ffffffffh
+ je lab1 ;# jmp if ptr == end
+
+ ;# BEGIN of loop over 16-word blocks
+lab2: ;# save old values of A, B, C, D
+ mov r8d,eax
+ mov r9d,ebx
+ mov r14d,ecx
+ mov r15d,edx
+; BEGIN of the round serie
+ mov r10 , QWORD PTR (0*4)[rsi] ;/* (NEXT STEP) X[0] */
+ mov r11d , edx ;/* (NEXT STEP) z' = %edx */
+ xor r11d,ecx ;/* y ^ ... */
+ lea eax, [ eax * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,ebx ;/* x & ... */
+ xor r11d,edx ;/* z ^ ... */
+ ;mov r10d,DWORD PTR (1*4)[rsi] ;/* (NEXT STEP) X[1] */
+ shr r10,32
+ add eax,r11d ;/* dst += ... */
+ rol eax, 3 ;/* dst <<< s */
+ mov r11d , ecx ;/* (NEXT STEP) z' = ecx */
+ xor r11d,ebx ;/* y ^ ... */
+ lea edx, [ edx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,eax ;/* x & ... */
+ xor r11d,ecx ;/* z ^ ... */
+ mov r10,QWORD PTR (2*4)[rsi] ;/* (NEXT STEP) X[2] */
+ add edx,r11d ;/* dst += ... */
+ rol edx, 7 ;/* dst <<< s */
+ mov r11d , ebx ;/* (NEXT STEP) z' = ebx */
+ xor r11d,eax ;/* y ^ ... */
+ lea ecx, [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,edx ;/* x & ... */
+ xor r11d,ebx ;/* z ^ ... */
+ ;mov r10d,DWORD PTR (3*4)[rsi] ;/* (NEXT STEP) X[3] */
+ shr r10,32
+ add ecx,r11d ;/* dst += ... */
+ rol ecx, 11 ;/* dst <<< s */
+ mov r11d , eax ;/* (NEXT STEP) z' = eax */
+ xor r11d,edx ;/* y ^ ... */
+ lea ebx, [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,ecx ;/* x & ... */
+ xor r11d,eax ;/* z ^ ... */
+ mov r10,QWORD PTR (4*4)[rsi] ;/* (NEXT STEP) X[4] */
+ add ebx,r11d ;/* dst += ... */
+ rol ebx, 19 ;/* dst <<< s */
+ mov r11d , edx ;/* (NEXT STEP) z' = edx */
+ xor r11d,ecx ;/* y ^ ... */
+ lea eax, [ eax * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,ebx ;/* x & ... */
+ xor r11d,edx ;/* z ^ ... */
+ ;mov r10d,DWORD PTR (5*4)[rsi] ;/* (NEXT STEP) X[5] */
+ shr r10,32
+ add eax,r11d ;/* dst += ... */
+ rol eax, 3 ;/* dst <<< s */
+ mov r11d , ecx ;/* (NEXT STEP) z' = ecx */
+ xor r11d,ebx ;/* y ^ ... */
+ lea edx, [ edx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,eax ;/* x & ... */
+ xor r11d,ecx ;/* z ^ ... */
+ mov r10,QWORD PTR (6*4)[rsi] ;/* (NEXT STEP) X[6] */
+ add edx,r11d ;/* dst += ... */
+ rol edx, 7 ;/* dst <<< s */
+ mov r11d , ebx ;/* (NEXT STEP) z' = ebx */
+ xor r11d,eax ;/* y ^ ... */
+ lea ecx, [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,edx ;/* x & ... */
+ xor r11d,ebx ;/* z ^ ... */
+ ;mov r10d,DWORD PTR (7*4)[rsi] ;/* (NEXT STEP) X[7] */
+ shr r10,32
+ add ecx,r11d ;/* dst += ... */
+ rol ecx, 11 ;/* dst <<< s */
+ mov r11d , eax ;/* (NEXT STEP) z' = eax */
+ xor r11d,edx ;/* y ^ ... */
+ lea ebx, [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,ecx ;/* x & ... */
+ xor r11d,eax ;/* z ^ ... */
+ mov r10,QWORD PTR (8*4)[rsi] ;/* (NEXT STEP) X[8] */
+ add ebx,r11d ;/* dst += ... */
+ rol ebx, 19 ;/* dst <<< s */
+ mov r11d , edx ;/* (NEXT STEP) z' = edx */
+ xor r11d,ecx ;/* y ^ ... */
+ lea eax, [ eax * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,ebx ;/* x & ... */
+ xor r11d,edx ;/* z ^ ... */
+ ;mov r10d,DWORD PTR (9*4)[rsi] ;/* (NEXT STEP) X[9] */
+ shr r10,32
+ add eax,r11d ;/* dst += ... */
+ rol eax, 3 ;/* dst <<< s */
+ mov r11d , ecx ;/* (NEXT STEP) z' = ecx */
+ xor r11d,ebx ;/* y ^ ... */
+ lea edx, [ edx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,eax ;/* x & ... */
+ xor r11d,ecx ;/* z ^ ... */
+ mov r10,QWORD PTR (10*4)[rsi] ;/* (NEXT STEP) X[10] */
+ add edx,r11d ;/* dst += ... */
+ rol edx, 7 ;/* dst <<< s */
+ mov r11d , ebx ;/* (NEXT STEP) z' = ebx */
+ xor r11d,eax ;/* y ^ ... */
+ lea ecx, [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,edx ;/* x & ... */
+ xor r11d,ebx ;/* z ^ ... */
+ ;mov r10d,DWORD PTR (11*4)[rsi] ;/* (NEXT STEP) X[11] */
+ shr r10,32
+ add ecx,r11d ;/* dst += ... */
+ rol ecx, 11 ;/* dst <<< s */
+ mov r11d , eax ;/* (NEXT STEP) z' = eax */
+ xor r11d,edx ;/* y ^ ... */
+ lea ebx, [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,ecx ;/* x & ... */
+ xor r11d,eax ;/* z ^ ... */
+ mov r10,QWORD PTR (12*4)[rsi] ;/* (NEXT STEP) X[12] */
+ add ebx,r11d ;/* dst += ... */
+ rol ebx, 19 ;/* dst <<< s */
+ mov r11d , edx ;/* (NEXT STEP) z' = edx */
+ xor r11d,ecx ;/* y ^ ... */
+ lea eax, [ eax * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,ebx ;/* x & ... */
+ xor r11d,edx ;/* z ^ ... */
+ ;mov r10d,DWORD PTR (13*4)[rsi] ;/* (NEXT STEP) X[13] */
+ shr r10,32
+ add eax,r11d ;/* dst += ... */
+ rol eax, 3 ;/* dst <<< s */
+ mov r11d , ecx ;/* (NEXT STEP) z' = ecx */
+ xor r11d,ebx ;/* y ^ ... */
+ lea edx, [ edx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,eax ;/* x & ... */
+ xor r11d,ecx ;/* z ^ ... */
+ mov r10,QWORD PTR (14*4)[rsi] ;/* (NEXT STEP) X[14] */
+ add edx,r11d ;/* dst += ... */
+ rol edx, 7 ;/* dst <<< s */
+ mov r11d , ebx ;/* (NEXT STEP) z' = ebx */
+ xor r11d,eax ;/* y ^ ... */
+ lea ecx, [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,edx ;/* x & ... */
+ xor r11d,ebx ;/* z ^ ... */
+ ;mov r10d,DWORD PTR (15*4)[rsi] ;/* (NEXT STEP) X[15] */
+ shr r10,32
+ add ecx,r11d ;/* dst += ... */
+ rol ecx, 11 ;/* dst <<< s */
+ mov r11d , eax ;/* (NEXT STEP) z' = eax */
+ xor r11d,edx ;/* y ^ ... */
+ lea ebx, [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,ecx ;/* x & ... */
+ xor r11d,eax ;/* z ^ ... */
+ mov r10,QWORD PTR (0*4)[rsi] ;/* (NEXT STEP) X[0] */
+ add ebx,r11d ;/* dst += ... */
+ rol ebx, 19 ;/* dst <<< s */
+ mov r11d , edx ;/* (NEXT STEP) z' = edx */
+ mov r10d , [rsi] ;/* (NEXT STEP) X[1] */
+ mov r11d, ecx ;/* (NEXT STEP) z' = %edx */
+ mov r12d, ecx ;/* (NEXT STEP) z' = %edx */
+
+ lea eax,DWORD PTR 5A827999h [ eax * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d, ebx
+ or r12d, ebx
+ mov r10d , (4*4) [rsi] ;/* (NEXT STEP) X[4] */
+ and r12d, edx
+ or r11d,r12d
+ mov r12d, ebx ;/* (NEXT STEP) z' = ebx */
+ add eax,r11d
+ mov r11d, ebx ;/* (NEXT STEP) z' = ebx */
+ rol eax , 3 ;/* dst <<< s */
+
+
+ lea edx,DWORD PTR 5A827999h [ edx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d, eax
+ or r12d, eax
+ mov r10d , (8*4) [rsi] ;/* (NEXT STEP) X[8] */
+ and r12d, ecx
+ or r11d,r12d
+ mov r12d, eax ;/* (NEXT STEP) z' = eax */
+ add edx,r11d
+ mov r11d, eax ;/* (NEXT STEP) z' = eax */
+ rol edx , 5 ;/* dst <<< s */
+
+
+ lea ecx,DWORD PTR 5A827999h [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d, edx
+ or r12d, edx
+ mov r10d , (12*4) [rsi] ;/* (NEXT STEP) X[12] */
+ and r12d, ebx
+ or r11d,r12d
+ mov r12d, edx ;/* (NEXT STEP) z' = edx */
+ add ecx,r11d
+ mov r11d, edx ;/* (NEXT STEP) z' = edx */
+ rol ecx , 9 ;/* dst <<< s */
+
+
+ lea ebx,DWORD PTR 5A827999h [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d, ecx
+ or r12d, ecx
+ mov r10d , (1*4) [rsi] ;/* (NEXT STEP) X[1] */
+ and r12d, eax
+ or r11d,r12d
+ mov r12d, ecx ;/* (NEXT STEP) z' = ecx */
+ add ebx,r11d
+ mov r11d, ecx ;/* (NEXT STEP) z' = ecx */
+ rol ebx , 13 ;/* dst <<< s */
+
+
+ lea eax,DWORD PTR 5A827999h [ eax * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d, ebx
+ or r12d, ebx
+ mov r10d , (5*4) [rsi] ;/* (NEXT STEP) X[5] */
+ and r12d, edx
+ or r11d,r12d
+ mov r12d, ebx ;/* (NEXT STEP) z' = ebx */
+ add eax,r11d
+ mov r11d, ebx ;/* (NEXT STEP) z' = ebx */
+ rol eax , 3 ;/* dst <<< s */
+
+
+ lea edx,DWORD PTR 5A827999h [ edx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d, eax
+ or r12d, eax
+ mov r10d , (9*4) [rsi] ;/* (NEXT STEP) X[9] */
+ and r12d, ecx
+ or r11d,r12d
+ mov r12d, eax ;/* (NEXT STEP) z' = eax */
+ add edx,r11d
+ mov r11d, eax ;/* (NEXT STEP) z' = eax */
+ rol edx , 5 ;/* dst <<< s */
+
+
+ lea ecx,DWORD PTR 5A827999h [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d, edx
+ or r12d, edx
+ mov r10d , (13*4) [rsi] ;/* (NEXT STEP) X[13] */
+ and r12d, ebx
+ or r11d,r12d
+ mov r12d, edx ;/* (NEXT STEP) z' = edx */
+ add ecx,r11d
+ mov r11d, edx ;/* (NEXT STEP) z' = edx */
+ rol ecx , 9 ;/* dst <<< s */
+
+
+ lea ebx,DWORD PTR 5A827999h [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d, ecx
+ or r12d, ecx
+ mov r10d , (2*4) [rsi] ;/* (NEXT STEP) X[2] */
+ and r12d, eax
+ or r11d,r12d
+ mov r12d, ecx ;/* (NEXT STEP) z' = ecx */
+ add ebx,r11d
+ mov r11d, ecx ;/* (NEXT STEP) z' = ecx */
+ rol ebx , 13 ;/* dst <<< s */
+
+
+ lea eax,DWORD PTR 5A827999h [ eax * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d, ebx
+ or r12d, ebx
+ mov r10d , (6*4) [rsi] ;/* (NEXT STEP) X[6] */
+ and r12d, edx
+ or r11d,r12d
+ mov r12d, ebx ;/* (NEXT STEP) z' = ebx */
+ add eax,r11d
+ mov r11d, ebx ;/* (NEXT STEP) z' = ebx */
+ rol eax , 3 ;/* dst <<< s */
+
+
+ lea edx,DWORD PTR 5A827999h [ edx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d, eax
+ or r12d, eax
+ mov r10d , (10*4) [rsi] ;/* (NEXT STEP) X[10] */
+ and r12d, ecx
+ or r11d,r12d
+ mov r12d, eax ;/* (NEXT STEP) z' = eax */
+ add edx,r11d
+ mov r11d, eax ;/* (NEXT STEP) z' = eax */
+ rol edx , 5 ;/* dst <<< s */
+
+
+ lea ecx,DWORD PTR 5A827999h [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d, edx
+ or r12d, edx
+ mov r10d , (14*4) [rsi] ;/* (NEXT STEP) X[14] */
+ and r12d, ebx
+ or r11d,r12d
+ mov r12d, edx ;/* (NEXT STEP) z' = edx */
+ add ecx,r11d
+ mov r11d, edx ;/* (NEXT STEP) z' = edx */
+ rol ecx , 9 ;/* dst <<< s */
+
+
+ lea ebx,DWORD PTR 5A827999h [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d, ecx
+ or r12d, ecx
+ mov r10d , (3*4) [rsi] ;/* (NEXT STEP) X[3] */
+ and r12d, eax
+ or r11d,r12d
+ mov r12d, ecx ;/* (NEXT STEP) z' = ecx */
+ add ebx,r11d
+ mov r11d, ecx ;/* (NEXT STEP) z' = ecx */
+ rol ebx , 13 ;/* dst <<< s */
+
+
+ lea eax,DWORD PTR 5A827999h [ eax * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d, ebx
+ or r12d, ebx
+ mov r10d , (7*4) [rsi] ;/* (NEXT STEP) X[7] */
+ and r12d, edx
+ or r11d,r12d
+ mov r12d, ebx ;/* (NEXT STEP) z' = ebx */
+ add eax,r11d
+ mov r11d, ebx ;/* (NEXT STEP) z' = ebx */
+ rol eax , 3 ;/* dst <<< s */
+
+
+ lea edx,DWORD PTR 5A827999h [ edx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d, eax
+ or r12d, eax
+ mov r10d , (11*4) [rsi] ;/* (NEXT STEP) X[11] */
+ and r12d, ecx
+ or r11d,r12d
+ mov r12d, eax ;/* (NEXT STEP) z' = eax */
+ add edx,r11d
+ mov r11d, eax ;/* (NEXT STEP) z' = eax */
+ rol edx , 5 ;/* dst <<< s */
+
+
+ lea ecx,DWORD PTR 5A827999h [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d, edx
+ or r12d, edx
+ mov r10d , (15*4) [rsi] ;/* (NEXT STEP) X[15] */
+ and r12d, ebx
+ or r11d,r12d
+ mov r12d, edx ;/* (NEXT STEP) z' = edx */
+ add ecx,r11d
+ mov r11d, edx ;/* (NEXT STEP) z' = edx */
+ rol ecx , 9 ;/* dst <<< s */
+
+
+ lea ebx,DWORD PTR 5A827999h [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d, ecx
+ or r12d, ecx
+ mov r10d , (0*4) [rsi] ;/* (NEXT STEP) X[0] */
+ and r12d, eax
+ or r11d,r12d
+ mov r12d, ecx ;/* (NEXT STEP) z' = ecx */
+ add ebx,r11d
+ mov r11d, ecx ;/* (NEXT STEP) z' = ecx */
+ rol ebx , 13 ;/* dst <<< s */
+
+ mov r10d , [rsi] ;/* (NEXT STEP) X[5] */
+ mov r11d , ecx ;/* (NEXT STEP) y' = %ecx */
+ lea eax,DWORD PTR 6ED9EBA1H [ eax * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (8*4)[rsi] ;/* (NEXT STEP) X[8] */
+ xor r11d,edx ;/* z ^ ... */
+ xor r11d,ebx ;/* x ^ ... */
+ add eax , r11d ;/* dst += ... */
+ rol eax , 3 ;/* dst <<< s */
+ mov r11d , ebx ;/* (NEXT STEP) y' = ebx */
+ lea edx,DWORD PTR 6ED9EBA1H [ edx * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (4*4)[rsi] ;/* (NEXT STEP) X[4] */
+ xor r11d,ecx ;/* z ^ ... */
+ xor r11d,eax ;/* x ^ ... */
+ add edx , r11d ;/* dst += ... */
+ rol edx , 9 ;/* dst <<< s */
+ mov r11d , eax ;/* (NEXT STEP) y' = eax */
+ lea ecx,DWORD PTR 6ED9EBA1H [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (12*4)[rsi] ;/* (NEXT STEP) X[12] */
+ xor r11d,ebx ;/* z ^ ... */
+ xor r11d,edx ;/* x ^ ... */
+ add ecx , r11d ;/* dst += ... */
+ rol ecx , 11 ;/* dst <<< s */
+ mov r11d , edx ;/* (NEXT STEP) y' = edx */
+ lea ebx,DWORD PTR 6ED9EBA1H [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (2*4)[rsi] ;/* (NEXT STEP) X[2] */
+ xor r11d,eax ;/* z ^ ... */
+ xor r11d,ecx ;/* x ^ ... */
+ add ebx , r11d ;/* dst += ... */
+ rol ebx , 15 ;/* dst <<< s */
+ mov r11d , ecx ;/* (NEXT STEP) y' = ecx */
+ lea eax,DWORD PTR 6ED9EBA1H [ eax * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (10*4)[rsi] ;/* (NEXT STEP) X[10] */
+ xor r11d,edx ;/* z ^ ... */
+ xor r11d,ebx ;/* x ^ ... */
+ add eax , r11d ;/* dst += ... */
+ rol eax , 3 ;/* dst <<< s */
+ mov r11d , ebx ;/* (NEXT STEP) y' = ebx */
+ lea edx,DWORD PTR 6ED9EBA1H [ edx * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (6*4)[rsi] ;/* (NEXT STEP) X[6] */
+ xor r11d,ecx ;/* z ^ ... */
+ xor r11d,eax ;/* x ^ ... */
+ add edx , r11d ;/* dst += ... */
+ rol edx , 9 ;/* dst <<< s */
+ mov r11d , eax ;/* (NEXT STEP) y' = eax */
+ lea ecx,DWORD PTR 6ED9EBA1H [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (14*4)[rsi] ;/* (NEXT STEP) X[14] */
+ xor r11d,ebx ;/* z ^ ... */
+ xor r11d,edx ;/* x ^ ... */
+ add ecx , r11d ;/* dst += ... */
+ rol ecx , 11 ;/* dst <<< s */
+ mov r11d , edx ;/* (NEXT STEP) y' = edx */
+ lea ebx,DWORD PTR 6ED9EBA1H [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (1*4)[rsi] ;/* (NEXT STEP) X[1] */
+ xor r11d,eax ;/* z ^ ... */
+ xor r11d,ecx ;/* x ^ ... */
+ add ebx , r11d ;/* dst += ... */
+ rol ebx , 15 ;/* dst <<< s */
+ mov r11d , ecx ;/* (NEXT STEP) y' = ecx */
+ lea eax,DWORD PTR 6ED9EBA1H [ eax * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (9*4)[rsi] ;/* (NEXT STEP) X[9] */
+ xor r11d,edx ;/* z ^ ... */
+ xor r11d,ebx ;/* x ^ ... */
+ add eax , r11d ;/* dst += ... */
+ rol eax , 3 ;/* dst <<< s */
+ mov r11d , ebx ;/* (NEXT STEP) y' = ebx */
+ lea edx,DWORD PTR 6ED9EBA1H [ edx * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (5*4)[rsi] ;/* (NEXT STEP) X[5] */
+ xor r11d,ecx ;/* z ^ ... */
+ xor r11d,eax ;/* x ^ ... */
+ add edx , r11d ;/* dst += ... */
+ rol edx , 9 ;/* dst <<< s */
+ mov r11d , eax ;/* (NEXT STEP) y' = eax */
+ lea ecx,DWORD PTR 6ED9EBA1H [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (13*4)[rsi] ;/* (NEXT STEP) X[13] */
+ xor r11d,ebx ;/* z ^ ... */
+ xor r11d,edx ;/* x ^ ... */
+ add ecx , r11d ;/* dst += ... */
+ rol ecx , 11 ;/* dst <<< s */
+ mov r11d , edx ;/* (NEXT STEP) y' = edx */
+ lea ebx,DWORD PTR 6ED9EBA1H [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (3*4)[rsi] ;/* (NEXT STEP) X[3] */
+ xor r11d,eax ;/* z ^ ... */
+ xor r11d,ecx ;/* x ^ ... */
+ add ebx , r11d ;/* dst += ... */
+ rol ebx , 15 ;/* dst <<< s */
+ mov r11d , ecx ;/* (NEXT STEP) y' = ecx */
+ lea eax,DWORD PTR 6ED9EBA1H [ eax * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (11*4)[rsi] ;/* (NEXT STEP) X[11] */
+ xor r11d,edx ;/* z ^ ... */
+ xor r11d,ebx ;/* x ^ ... */
+ add eax , r11d ;/* dst += ... */
+ rol eax , 3 ;/* dst <<< s */
+ mov r11d , ebx ;/* (NEXT STEP) y' = ebx */
+ lea edx,DWORD PTR 6ED9EBA1H [ edx * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (7*4)[rsi] ;/* (NEXT STEP) X[7] */
+ xor r11d,ecx ;/* z ^ ... */
+ xor r11d,eax ;/* x ^ ... */
+ add edx , r11d ;/* dst += ... */
+ rol edx , 9 ;/* dst <<< s */
+ mov r11d , eax ;/* (NEXT STEP) y' = eax */
+ lea ecx,DWORD PTR 6ED9EBA1H [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (15*4)[rsi] ;/* (NEXT STEP) X[15] */
+ xor r11d,ebx ;/* z ^ ... */
+ xor r11d,edx ;/* x ^ ... */
+ add ecx , r11d ;/* dst += ... */
+ rol ecx , 11 ;/* dst <<< s */
+ mov r11d , edx ;/* (NEXT STEP) y' = edx */
+ lea ebx,DWORD PTR 6ED9EBA1H [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (0*4)[rsi] ;/* (NEXT STEP) X[0] */
+ xor r11d,eax ;/* z ^ ... */
+ xor r11d,ecx ;/* x ^ ... */
+ add ebx , r11d ;/* dst += ... */
+ rol ebx , 15 ;/* dst <<< s */
+ mov r11d , ecx ;/* (NEXT STEP) y' = ecx */
+; # add old values of A, B, C, D
+ add eax,r8d
+ add ebx,r9d
+ add ecx,r14d
+ add edx,r15d
+
+; # loop control
+ add rsi,64 ;# ptr += 64
+ cmp rsi,rdi ;# cmp end with ptr
+ jb lab2 ;# jmp if ptr < end
+; # END of loop over 16-word blocks
+
+lab1: ;pop rbp ;# restore ctx
+pop r12
+ mov DWORD PTR 0[r12],eax ;# ctx->A = A
+ mov DWORD PTR 4[r12],ebx ;# ctx->B = B
+ mov DWORD PTR 8[r12],ecx ;# ctx->C = C
+ mov DWORD PTR 12[r12],edx ;# ctx->D = D
+
+ pop rdi
+ pop rsi
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbx
+ pop rbp
+ ret
+MD4_x64 ENDP
+END;
+
diff --git a/hasher/MD4_asm.asm b/hasher/MD4x86.asm
similarity index 97%
rename from hasher/MD4_asm.asm
rename to hasher/MD4x86.asm
index 4dd31c385..a66b0f0a8 100644
--- a/hasher/MD4_asm.asm
+++ b/hasher/MD4x86.asm
@@ -1,259 +1,261 @@
-; #####################################################################################################################
-;
-; MD4_asm.asm
-;
-; Copyright (c) Shareaza Development Team, 2002-2007.
-; This file is part of SHAREAZA (shareaza.sourceforge.net)
-;
-; Shareaza is free software; you can redistribute it
-; and/or modify it under the terms of the GNU General Public License
-; as published by the Free Software Foundation; either version 2 of
-; the License, or (at your option) any later version.
-;
-; Shareaza is distributed in the hope that it will be useful,
-; but WITHOUT ANY WARRANTY; without even the implied warranty of
-; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-; GNU General Public License for more details.
-;
-; You should have received a copy of the GNU General Public License
-; along with Shareaza; if not, write to the Free Software
-; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-;
-; #####################################################################################################################
-;
-; MD4_asm - Implementation of MD4 for x86 - use together with MD4.cpp and MD4.h
-;
-; #####################################################################################################################
-
- .586p
- .model flat, stdcall
- option casemap:none ; case sensitive
- option prologue:none ; we generate our own entry/exit code
- option epilogue:none
-
-; #####################################################################################################################
-
-m_nCount0 equ 0
-m_nCount1 equ 4
-
-m_nState0 equ 8 ; offsets as found in MD4.h
-m_nState1 equ 12
-m_nState2 equ 16
-m_nState3 equ 20
-
-m_pBuffer equ 24
-
-; Some magic numbers for Transform...
-MD4_S11 equ 3
-MD4_S12 equ 7
-MD4_S13 equ 11
-MD4_S14 equ 19
-MD4_S21 equ 3
-MD4_S22 equ 5
-MD4_S23 equ 9
-MD4_S24 equ 13
-MD4_S31 equ 3
-MD4_S32 equ 9
-MD4_S33 equ 11
-MD4_S34 equ 15
-
- .data
-
-MD4FF MACRO count:REQ,s:REQ
-; a = (a+x[count]+((b&c)|(~b&d)))rol s
-; a = (a+x[count]+(d^(b&(c^d))))rol s
- mov reg_temp1, reg_c
- xor reg_c, reg_d
- add reg_a, [ebp+count*4]
- and reg_c, reg_b
- xor reg_c, reg_d
- add reg_a, reg_c
- rol reg_a, s
-reg_t textequ reg_d
-reg_d textequ reg_temp1
-reg_temp1 textequ reg_c
-reg_c textequ reg_b
-reg_b textequ reg_a
-reg_a textequ reg_t
- ENDM
-
-MD4GG MACRO count:REQ,s:REQ
-; a = (a+x[count]+((b&c)|(b&d)|(c&d))+5A827999H) rol s
-; a = (a+x[count]+((b&c)|(d&(b|c)))+5A827999H)rol s
- mov reg_temp2, reg_b
- mov reg_temp1, reg_b
- add reg_a, [ebp+count*4]
- or reg_b, reg_c
- and reg_temp2, reg_c
- and reg_b, reg_d
- add reg_a, 5A827999H
- or reg_b, reg_temp2
- add reg_a, reg_b
- rol reg_a, s
-reg_t textequ reg_d
-reg_d textequ reg_c
-reg_c textequ reg_temp1
-reg_temp1 textequ reg_b
-reg_b textequ reg_a
-reg_a textequ reg_t
- ENDM
-
-MD4HH MACRO count:REQ,s:REQ
-; a = (a+x[count]+(b^c^d)+6ED9EBA1H)rol s
- add reg_a, [ebp+count*4]
- mov reg_temp1, reg_b
- xor reg_b, reg_c
- add reg_a, 6ED9EBA1H
- xor reg_b, reg_d
- add reg_a, reg_b
- rol reg_a, s
-reg_t textequ reg_d
-reg_d textequ reg_c
-reg_c textequ reg_temp1
-reg_temp1 textequ reg_b
-reg_b textequ reg_a
-reg_a textequ reg_t
- ENDM
-
- .code
-
-MD4_Transform_p5 PROC ; we expect ebp to point to the Data stream
- ; all other registers (eax,ebx,ecx,edx,esi,edi) will be destroyed
-__this textequ <[esp+32+2*4]> ; 1*pusha+2*call
-; set alias for registers
-reg_a textequ
-reg_b textequ
-reg_c textequ
-reg_d textequ
-reg_temp1 textequ
-reg_temp2 textequ
- mov reg_temp1, __this
- mov reg_a, [reg_temp1+m_nState0]
- mov reg_b, [reg_temp1+m_nState1]
- mov reg_c, [reg_temp1+m_nState2]
- mov reg_d, [reg_temp1+m_nState3]
-; round 1
- MD4FF 0, MD4_S11
- MD4FF 1, MD4_S12
- MD4FF 2, MD4_S13
- MD4FF 3, MD4_S14
- MD4FF 4, MD4_S11
- MD4FF 5, MD4_S12
- MD4FF 6, MD4_S13
- MD4FF 7, MD4_S14
- MD4FF 8, MD4_S11
- MD4FF 9, MD4_S12
- MD4FF 10, MD4_S13
- MD4FF 11, MD4_S14
- MD4FF 12, MD4_S11
- MD4FF 13, MD4_S12
- MD4FF 14, MD4_S13
- MD4FF 15, MD4_S14
-; round 2
- MD4GG 0, MD4_S21
- MD4GG 4, MD4_S22
- MD4GG 8, MD4_S23
- MD4GG 12, MD4_S24
- MD4GG 1, MD4_S21
- MD4GG 5, MD4_S22
- MD4GG 9, MD4_S23
- MD4GG 13, MD4_S24
- MD4GG 2, MD4_S21
- MD4GG 6, MD4_S22
- MD4GG 10, MD4_S23
- MD4GG 14, MD4_S24
- MD4GG 3, MD4_S21
- MD4GG 7, MD4_S22
- MD4GG 11, MD4_S23
- MD4GG 15, MD4_S24
-; round 3
- MD4HH 0, MD4_S31
- MD4HH 8, MD4_S32
- MD4HH 4, MD4_S33
- MD4HH 12, MD4_S34
- MD4HH 2, MD4_S31
- MD4HH 10, MD4_S32
- MD4HH 6, MD4_S33
- MD4HH 14, MD4_S34
- MD4HH 1, MD4_S31
- MD4HH 9, MD4_S32
- MD4HH 5, MD4_S33
- MD4HH 13, MD4_S34
- MD4HH 3, MD4_S31
- MD4HH 11, MD4_S32
- MD4HH 7, MD4_S33
- MD4HH 15, MD4_S34
- mov reg_temp1, __this
- add [reg_temp1+m_nState0], reg_a
- add [reg_temp1+m_nState1], reg_b
- add [reg_temp1+m_nState2], reg_c
- add [reg_temp1+m_nState3], reg_d
- ret
-MD4_Transform_p5 ENDP
-
-MD4_Add_p5 PROC PUBLIC, _this:DWORD, _Data:DWORD, _nLength:DWORD
-
- pusha
-__this textequ <[esp+36]> ; different offset due to pusha
-__Data textequ <[esp+40]>
-__nLength textequ <[esp+44]>
-
- mov ecx, __nLength
- and ecx, ecx
- jz get_out
- xor edx, edx
- mov ebp, __Data
- mov edi, __this
- mov ebx, [edi+m_nCount0]
- mov eax, ebx
- add ebx, ecx
- mov [edi+m_nCount0], ebx
- adc [edi+m_nCount1], edx
-
- and eax, 63
- jnz partial_buffer
-full_blocks: mov ecx, __nLength
- and ecx, ecx
- jz get_out
- sub ecx, 64
- jb end_of_stream
- mov __nLength, ecx
- call MD4_Transform_p5
- add ebp, 64
- jmp full_blocks
-
-end_of_stream: mov edi, __this
- mov esi, ebp
- lea edi, [edi+m_pBuffer]
- add ecx, 64
- rep movsb
- jmp get_out
-
-partial_buffer: add ecx, eax ; eax = offset in buffer, ecx = _nLength
- cmp ecx, 64
- jb short_stream ; we can't fill the buffer
- mov ecx, -64
- add ecx, eax
- add __nLength, ecx ; _nlength += (offset-64)
-@@: mov bl, [ebp]
- inc ebp
- mov byte ptr [edi+m_pBuffer+64+ecx], bl
- inc ecx
- jnz @B ; offset = 64
- mov __Data, ebp
- lea ebp, [edi+m_pBuffer]
- call MD4_Transform_p5
- mov ebp, __Data
- jmp full_blocks
-
-short_stream: sub ecx, eax ; --> ecx=_nLength
- mov esi, ebp
- lea edi, [edi+m_pBuffer+eax]
- rep movsb
-
-get_out: popa
- ret 12
-
-MD4_Add_p5 ENDP
-
+; #####################################################################################################################
+;
+; MD4_asm.asm
+;
+; Copyright (c) Shareaza Development Team, 2002-2007.
+; This file is part of SHAREAZA (shareaza.sourceforge.net)
+;
+; Shareaza is free software; you can redistribute it
+; and/or modify it under the terms of the GNU General Public License
+; as published by the Free Software Foundation; either version 2 of
+; the License, or (at your option) any later version.
+;
+; Shareaza is distributed in the hope that it will be useful,
+; but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+; GNU General Public License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with Shareaza; if not, write to the Free Software
+; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+;
+; #####################################################################################################################
+;
+; MD4_asm - Implementation of MD4 for x86 - use together with MD4.cpp and MD4.h
+;
+; #####################################################################################################################
+
+ .586p
+ .model flat, stdcall
+ option casemap:none ; case sensitive
+ option prologue:none ; we generate our own entry/exit code
+ option epilogue:none
+
+; #####################################################################################################################
+
+m_nCount0 equ 0
+m_nCount1 equ 4
+
+m_nState0 equ 8 ; offsets as found in MD4.h
+m_nState1 equ 12
+m_nState2 equ 16
+m_nState3 equ 20
+
+m_pBuffer equ 24
+
+; Some magic numbers for Transform...
+MD4_S11 equ 3
+MD4_S12 equ 7
+MD4_S13 equ 11
+MD4_S14 equ 19
+
+MD4_S21 equ 3
+MD4_S22 equ 5
+MD4_S23 equ 9
+MD4_S24 equ 13
+
+MD4_S31 equ 3
+MD4_S32 equ 9
+MD4_S33 equ 11
+MD4_S34 equ 15
+
+ .data
+
+MD4FF MACRO count:REQ,s:REQ
+; a = (a+x[count]+((b&c)|(~b&d)))rol s
+; a = (a+x[count]+(d^(b&(c^d))))rol s
+ mov reg_temp1, reg_c
+ xor reg_c, reg_d
+ add reg_a, [ebp+count*4]
+ and reg_c, reg_b
+ xor reg_c, reg_d
+ add reg_a, reg_c
+ rol reg_a, s
+reg_t textequ reg_d
+reg_d textequ reg_temp1
+reg_temp1 textequ reg_c
+reg_c textequ reg_b
+reg_b textequ reg_a
+reg_a textequ reg_t
+ ENDM
+
+MD4GG MACRO count:REQ,s:REQ
+; a = (a+x[count]+((b&c)|(b&d)|(c&d))+5A827999H) rol s
+; a = (a+x[count]+((b&c)|(d&(b|c)))+5A827999H)rol s
+ mov reg_temp2, reg_b
+ mov reg_temp1, reg_b
+ add reg_a, [ebp+count*4]
+ or reg_b, reg_c
+ and reg_temp2, reg_c
+ and reg_b, reg_d
+ add reg_a, 5A827999H
+ or reg_b, reg_temp2
+ add reg_a, reg_b
+ rol reg_a, s
+reg_t textequ reg_d
+reg_d textequ reg_c
+reg_c textequ reg_temp1
+reg_temp1 textequ reg_b
+reg_b textequ reg_a
+reg_a textequ reg_t
+ ENDM
+
+MD4HH MACRO count:REQ,s:REQ
+; a = (a+x[count]+(b^c^d)+6ED9EBA1H)rol s
+ add reg_a, [ebp+count*4]
+ mov reg_temp1, reg_b
+ xor reg_b, reg_c
+ add reg_a, 6ED9EBA1H
+ xor reg_b, reg_d
+ add reg_a, reg_b
+ rol reg_a, s
+reg_t textequ reg_d
+reg_d textequ reg_c
+reg_c textequ reg_temp1
+reg_temp1 textequ reg_b
+reg_b textequ reg_a
+reg_a textequ reg_t
+ ENDM
+
+ .code
+
+MD4_Transform_p5 PROC ; we expect ebp to point to the Data stream
+ ; all other registers (eax,ebx,ecx,edx,esi,edi) will be destroyed
+__this textequ <[esp+32+2*4]> ; 1*pusha+2*call
+; set alias for registers
+reg_a textequ
+reg_b textequ
+reg_c textequ
+reg_d textequ
+reg_temp1 textequ
+reg_temp2 textequ
+ mov reg_temp1, __this
+ mov reg_a, [reg_temp1+m_nState0]
+ mov reg_b, [reg_temp1+m_nState1]
+ mov reg_c, [reg_temp1+m_nState2]
+ mov reg_d, [reg_temp1+m_nState3]
+; round 1
+ MD4FF 0, MD4_S11
+ MD4FF 1, MD4_S12
+ MD4FF 2, MD4_S13
+ MD4FF 3, MD4_S14
+ MD4FF 4, MD4_S11
+ MD4FF 5, MD4_S12
+ MD4FF 6, MD4_S13
+ MD4FF 7, MD4_S14
+ MD4FF 8, MD4_S11
+ MD4FF 9, MD4_S12
+ MD4FF 10, MD4_S13
+ MD4FF 11, MD4_S14
+ MD4FF 12, MD4_S11
+ MD4FF 13, MD4_S12
+ MD4FF 14, MD4_S13
+ MD4FF 15, MD4_S14
+; round 2
+ MD4GG 0, MD4_S21
+ MD4GG 4, MD4_S22
+ MD4GG 8, MD4_S23
+ MD4GG 12, MD4_S24
+ MD4GG 1, MD4_S21
+ MD4GG 5, MD4_S22
+ MD4GG 9, MD4_S23
+ MD4GG 13, MD4_S24
+ MD4GG 2, MD4_S21
+ MD4GG 6, MD4_S22
+ MD4GG 10, MD4_S23
+ MD4GG 14, MD4_S24
+ MD4GG 3, MD4_S21
+ MD4GG 7, MD4_S22
+ MD4GG 11, MD4_S23
+ MD4GG 15, MD4_S24
+; round 3
+ MD4HH 0, MD4_S31
+ MD4HH 8, MD4_S32
+ MD4HH 4, MD4_S33
+ MD4HH 12, MD4_S34
+ MD4HH 2, MD4_S31
+ MD4HH 10, MD4_S32
+ MD4HH 6, MD4_S33
+ MD4HH 14, MD4_S34
+ MD4HH 1, MD4_S31
+ MD4HH 9, MD4_S32
+ MD4HH 5, MD4_S33
+ MD4HH 13, MD4_S34
+ MD4HH 3, MD4_S31
+ MD4HH 11, MD4_S32
+ MD4HH 7, MD4_S33
+ MD4HH 15, MD4_S34
+ mov reg_temp1, __this
+ add [reg_temp1+m_nState0], reg_a
+ add [reg_temp1+m_nState1], reg_b
+ add [reg_temp1+m_nState2], reg_c
+ add [reg_temp1+m_nState3], reg_d
+ ret
+MD4_Transform_p5 ENDP
+
+MD4_Add_p5 PROC PUBLIC, _this:DWORD, _Data:DWORD, _nLength:DWORD
+
+ pusha
+__this textequ <[esp+36]> ; different offset due to pusha
+__Data textequ <[esp+40]>
+__nLength textequ <[esp+44]>
+
+ mov ecx, __nLength
+ and ecx, ecx
+ jz get_out
+ xor edx, edx
+ mov ebp, __Data
+ mov edi, __this
+ mov ebx, [edi+m_nCount0]
+ mov eax, ebx
+ add ebx, ecx
+ mov [edi+m_nCount0], ebx
+ adc [edi+m_nCount1], edx
+
+ and eax, 63
+ jnz partial_buffer
+full_blocks: mov ecx, __nLength
+ and ecx, ecx
+ jz get_out
+ sub ecx, 64
+ jb end_of_stream
+ mov __nLength, ecx
+ call MD4_Transform_p5
+ add ebp, 64
+ jmp full_blocks
+
+end_of_stream: mov edi, __this
+ mov esi, ebp
+ lea edi, [edi+m_pBuffer]
+ add ecx, 64
+ rep movsb
+ jmp get_out
+
+partial_buffer: add ecx, eax ; eax = offset in buffer, ecx = _nLength
+ cmp ecx, 64
+ jb short_stream ; we can't fill the buffer
+ mov ecx, -64
+ add ecx, eax
+ add __nLength, ecx ; _nlength += (offset-64)
+@@: mov bl, [ebp]
+ inc ebp
+ mov byte ptr [edi+m_pBuffer+64+ecx], bl
+ inc ecx
+ jnz @B ; offset = 64
+ mov __Data, ebp
+ lea ebp, [edi+m_pBuffer]
+ call MD4_Transform_p5
+ mov ebp, __Data
+ jmp full_blocks
+
+short_stream: sub ecx, eax ; --> ecx=_nLength
+ mov esi, ebp
+ lea edi, [edi+m_pBuffer+eax]
+ rep movsb
+
+get_out: popa
+ ret 12
+
+MD4_Add_p5 ENDP
+
end
\ No newline at end of file
diff --git a/hasher/MD5.cpp b/hasher/MD5.cpp
index 84880c957..5c8b9312f 100644
--- a/hasher/MD5.cpp
+++ b/hasher/MD5.cpp
@@ -25,9 +25,7 @@
#include "MD5.h"
-#ifdef HASHLIB_USE_ASM
-extern "C" void __stdcall MD5_Add_p5(CMD5::MD5State*, const void* pData, std::size_t nLength);
-#endif
+
const unsigned char hashPadding[64] = {
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -68,11 +66,51 @@ void CMD5::Finish()
Add(&bits, sizeof(bits));
}
+
#ifdef HASHLIB_USE_ASM
+#ifdef _WIN64 || __x86_64__
+extern "C" void __fastcall MD5_x64(const void *, const void* pData, std::size_t nLength);
+#else
+extern "C" void __stdcall MD5_Add_p5(CMD5::MD5State*, const void* pData, std::size_t nLength);
+#endif
+
void CMD5::Add(const void* pData, std::size_t nLength)
{
+#ifdef _WIN64 || __x86_64__
+ // Update number of bytes
+ const char* input = static_cast(pData);
+ {
+ uint32 index = static_cast(m_State.m_nCount % m_State.blockSize);
+ m_State.m_nCount += nLength;
+ if (index)
+ {
+ // buffer has some data already - lets fill it
+ // before doing the rest of the transformation on the original data
+ if (index + nLength < m_State.blockSize)
+ {
+ std::memcpy(m_State.m_oBuffer + index, input, nLength);
+ return;
+ }
+ std::memcpy(m_State.m_oBuffer + index, input, m_State.blockSize - index);
+ nLength -= m_State.blockSize - index;
+ input += m_State.blockSize - index;
+ MD5_x64(&(m_State.m_nState[0]), m_State.m_oBuffer, 1);
+ }
+ }
+ // Transform as many times as possible using the original data stream
+ const char* const end = input + nLength - nLength % m_State.blockSize;
+ size_t abs = nLength / m_State.blockSize;
+ MD5_x64(&(m_State.m_nState[0]), input, abs);
+ abs *= m_State.blockSize;
+ input += abs;
+ nLength %= m_State.blockSize;
+ // Buffer remaining input
+ if (nLength)
+ std::memcpy(m_State.m_oBuffer, input, nLength);
+#else
MD5_Add_p5(&m_State, pData, nLength);
+#endif
}
#else // HASHLIB_USE_ASM
@@ -221,9 +259,9 @@ void CMD5::Transform(const uint32* data)
void CMD5::Add(const void* pData, std::size_t nLength)
{
// Update number of bytes
- const char* input = static_cast< const char* >(pData);
+ const char* input = static_cast(pData);
{
- uint32 index = static_cast< uint32 >(m_State.m_nCount % m_State.blockSize);
+ uint32 index = static_cast(m_State.m_nCount % m_State.blockSize);
m_State.m_nCount += nLength;
if (index)
{
@@ -237,19 +275,22 @@ void CMD5::Add(const void* pData, std::size_t nLength)
std::memcpy(m_State.m_oBuffer + index, input, m_State.blockSize - index);
nLength -= m_State.blockSize - index;
input += m_State.blockSize - index;
- Transform(reinterpret_cast< const uint32* >(m_State.m_oBuffer));
+ Transform(reinterpret_cast(m_State.m_oBuffer));
}
}
// Transform as many times as possible using the original data stream
const char* const end = input + nLength - nLength % m_State.blockSize;
nLength %= m_State.blockSize;
for (; input != end; input += m_State.blockSize)
- Transform(reinterpret_cast< const uint32* >(input));
+ Transform(reinterpret_cast(input));
// Buffer remaining input
if (nLength)
std::memcpy(m_State.m_oBuffer, input, nLength);
}
+
+
+
#endif // HASHLIB_USE_ASM
// MD5.CPP - RSA Data Security, Inc., MD5 message-digest algorithm
diff --git a/hasher/MD5x64.asm b/hasher/MD5x64.asm
new file mode 100644
index 000000000..eaa1f69c7
--- /dev/null
+++ b/hasher/MD5x64.asm
@@ -0,0 +1,825 @@
+
+; to compile this file, I use option
+; ml64.exe /Flm5n64 /c /Zi m5n64.asm
+; with Microsoft Macro Assembler (x64) for AMD64
+;
+; ml64.exe is given with Visual Studio 2005, Windows 2003 server DDK
+;
+; (you can get Windows 2003 server DDK with ml64 and cl for AMD64 from
+; http://www.microsoft.com/whdc/devtools/ddk/default.mspx for low price)
+;
+.code
+
+
+MD5_x64 PROC
+ push rbp
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+ push rsi
+ push rdi
+; parameter 1 in rcx, param 2 in rdx , param 3 in r8
+
+; see http://weblogs.asp.net/oldnewthing/archive/2004/01/14/58579.aspx and
+; http://msdn.microsoft.com/library/en-us/kmarch/hh/kmarch/64bitAMD_8e951dd2-ee77-4728-8702-55ce4b5dd24a.xml.asp
+;
+; All registers must be preserved across the call, except for
+; rax, rcx, rdx, r8, r-9, r10, and r11, which are scratch.
+
+ ;# rdi = arg #1 (ctx, MD5_CTX pointer)
+ ;# rsi = arg #2 (ptr, data pointer)
+ ;# rdx = arg #3 (nbr, number of 16-word blocks to process)
+
+ mov rsi,rdx
+ mov edx,r8d
+
+ mov r12,rcx ;# rbp = ctx
+ shl rdx,6 ;# rdx = nbr in bytes
+ push r12
+ lea rdi,[rsi+rdx]; # rdi = end
+
+ mov eax,DWORD PTR 0[r12] ;# eax = ctx->A
+ mov ebx,DWORD PTR 4[r12] ;# ebx = ctx->B
+ mov ecx,DWORD PTR 8[r12] ;# ecx = ctx->C
+ mov edx,DWORD PTR 12[r12] ;# edx = ctx->D
+ ;push rbp ;# save ctx
+ ;# end is 'rdi'
+ ;# ptr is 'rsi'
+ ;# A is 'eax'
+ ;# B is 'ebx'
+ ;# C is 'ecx'
+ ;# D is 'edx'
+
+; it is better with align 16 here, I don't known why
+align 16
+ cmp rsi,rdi ;# cmp end with ptr
+ mov r13d,0ffffffffh
+ je lab1 ;# jmp if ptr == end
+
+ ;# BEGIN of loop over 16-word blocks
+lab2: ;# save old values of A, B, C, D
+ mov r8d,eax
+ mov r9d,ebx
+ mov r14d,ecx
+ mov r15d,edx
+; BEGIN of the round serie
+ mov r10 , QWORD PTR (0*4)[rsi] ;/* (NEXT STEP) X[0] */
+ mov r11d , edx ;/* (NEXT STEP) z' = %edx */
+
+ xor r11d,ecx ;/* y ^ ... */
+ lea eax,DWORD PTR 0d76aa478h [ eax * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,ebx ;/* x & ... */
+ xor r11d,edx ;/* z ^ ... */
+ ;mov r10d,DWORD PTR (1*4)[rsi] ;/* (NEXT STEP) X[1] */
+ shr r10,32
+ add eax,r11d ;/* dst += ... */
+ rol eax, 7 ;/* dst <<< s */
+ mov r11d , ecx ;/* (NEXT STEP) z' = ecx */
+ add eax , ebx ;/* dst += x */
+
+ xor r11d,ebx ;/* y ^ ... */
+ lea edx,DWORD PTR 0e8c7b756h [ edx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,eax ;/* x & ... */
+ xor r11d,ecx ;/* z ^ ... */
+ mov r10,QWORD PTR (2*4)[rsi] ;/* (NEXT STEP) X[2] */
+ add edx,r11d ;/* dst += ... */
+ rol edx, 12 ;/* dst <<< s */
+ mov r11d , ebx ;/* (NEXT STEP) z' = ebx */
+ add edx , eax ;/* dst += x */
+ xor r11d,eax ;/* y ^ ... */
+ lea ecx,DWORD PTR 0242070dbh [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,edx ;/* x & ... */
+
+ xor r11d,ebx ;/* z ^ ... */
+ ;mov r10d,DWORD PTR (3*4)[rsi] ;/* (NEXT STEP) X[3] */
+ shr r10,32
+ add ecx,r11d ;/* dst += ... */
+ rol ecx, 17 ;/* dst <<< s */
+ mov r11d , eax ;/* (NEXT STEP) z' = eax */
+ add ecx , edx ;/* dst += x */
+ xor r11d,edx ;/* y ^ ... */
+
+ lea ebx,DWORD PTR 0c1bdceeeh [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,ecx ;/* x & ... */
+ xor r11d,eax ;/* z ^ ... */
+ mov r10,QWORD PTR (4*4)[rsi] ;/* (NEXT STEP) X[4] */
+ add ebx,r11d ;/* dst += ... */
+ rol ebx, 22 ;/* dst <<< s */
+ mov r11d , edx ;/* (NEXT STEP) z' = edx */
+ add ebx , ecx ;/* dst += x */
+ xor r11d,ecx ;/* y ^ ... */
+
+ lea eax,DWORD PTR 0f57c0fafh [ eax * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,ebx ;/* x & ... */
+ xor r11d,edx ;/* z ^ ... */
+ ;mov r10d,DWORD PTR (5*4)[rsi] ;/* (NEXT STEP) X[5] */
+ shr r10,32
+ add eax,r11d ;/* dst += ... */
+ rol eax, 7 ;/* dst <<< s */
+ mov r11d , ecx ;/* (NEXT STEP) z' = ecx */
+ add eax , ebx ;/* dst += x */
+ xor r11d,ebx ;/* y ^ ... */
+ lea edx,DWORD PTR 04787c62ah [ edx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,eax ;/* x & ... */
+ xor r11d,ecx ;/* z ^ ... */
+ mov r10,QWORD PTR (6*4)[rsi] ;/* (NEXT STEP) X[6] */
+ add edx,r11d ;/* dst += ... */
+ rol edx, 12 ;/* dst <<< s */
+ mov r11d , ebx ;/* (NEXT STEP) z' = ebx */
+ add edx , eax ;/* dst += x */
+ xor r11d,eax ;/* y ^ ... */
+ lea ecx,DWORD PTR 0a8304613h [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,edx ;/* x & ... */
+ xor r11d,ebx ;/* z ^ ... */
+ ;mov r10d,DWORD PTR (7*4)[rsi] ;/* (NEXT STEP) X[7] */
+ shr r10,32
+ add ecx,r11d ;/* dst += ... */
+ rol ecx, 17 ;/* dst <<< s */
+ mov r11d , eax ;/* (NEXT STEP) z' = eax */
+ add ecx , edx ;/* dst += x */
+ xor r11d,edx ;/* y ^ ... */
+ lea ebx,DWORD PTR 0fd469501h [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,ecx ;/* x & ... */
+ xor r11d,eax ;/* z ^ ... */
+ mov r10,QWORD PTR (8*4)[rsi] ;/* (NEXT STEP) X[8] */
+ add ebx,r11d ;/* dst += ... */
+ rol ebx, 22 ;/* dst <<< s */
+ mov r11d , edx ;/* (NEXT STEP) z' = edx */
+ add ebx , ecx ;/* dst += x */
+ xor r11d,ecx ;/* y ^ ... */
+ lea eax,DWORD PTR 0698098d8h [ eax * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,ebx ;/* x & ... */
+ xor r11d,edx ;/* z ^ ... */
+ ;mov r10d,DWORD PTR (9*4)[rsi] ;/* (NEXT STEP) X[9] */
+ shr r10,32
+ add eax,r11d ;/* dst += ... */
+ rol eax, 7 ;/* dst <<< s */
+ mov r11d , ecx ;/* (NEXT STEP) z' = ecx */
+ add eax , ebx ;/* dst += x */
+ xor r11d,ebx ;/* y ^ ... */
+ lea edx,DWORD PTR 08b44f7afh [ edx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,eax ;/* x & ... */
+ xor r11d,ecx ;/* z ^ ... */
+ mov r10,QWORD PTR (10*4)[rsi] ;/* (NEXT STEP) X[10] */
+ add edx,r11d ;/* dst += ... */
+ rol edx, 12 ;/* dst <<< s */
+ mov r11d , ebx ;/* (NEXT STEP) z' = ebx */
+ add edx , eax ;/* dst += x */
+ xor r11d,eax ;/* y ^ ... */
+ lea ecx,DWORD PTR 0ffff5bb1h [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,edx ;/* x & ... */
+ xor r11d,ebx ;/* z ^ ... */
+ ;mov r10d,DWORD PTR (11*4)[rsi] ;/* (NEXT STEP) X[11] */
+ shr r10,32
+ add ecx,r11d ;/* dst += ... */
+ rol ecx, 17 ;/* dst <<< s */
+ mov r11d , eax ;/* (NEXT STEP) z' = eax */
+ add ecx , edx ;/* dst += x */
+ xor r11d,edx ;/* y ^ ... */
+ lea ebx,DWORD PTR 0895cd7beh [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,ecx ;/* x & ... */
+ xor r11d,eax ;/* z ^ ... */
+ mov r10,QWORD PTR (12*4)[rsi] ;/* (NEXT STEP) X[12] */
+ add ebx,r11d ;/* dst += ... */
+ rol ebx, 22 ;/* dst <<< s */
+ mov r11d , edx ;/* (NEXT STEP) z' = edx */
+ add ebx , ecx ;/* dst += x */
+ xor r11d,ecx ;/* y ^ ... */
+ lea eax,DWORD PTR 06b901122h [ eax * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,ebx ;/* x & ... */
+ xor r11d,edx ;/* z ^ ... */
+ ;mov r10d,DWORD PTR (13*4)[rsi] ;/* (NEXT STEP) X[13] */
+ shr r10,32
+ add eax,r11d ;/* dst += ... */
+ rol eax, 7 ;/* dst <<< s */
+ mov r11d , ecx ;/* (NEXT STEP) z' = ecx */
+ add eax , ebx ;/* dst += x */
+ xor r11d,ebx ;/* y ^ ... */
+ lea edx,DWORD PTR 0fd987193h [ edx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,eax ;/* x & ... */
+ xor r11d,ecx ;/* z ^ ... */
+ mov r10,QWORD PTR (14*4)[rsi] ;/* (NEXT STEP) X[14] */
+ add edx,r11d ;/* dst += ... */
+ rol edx, 12 ;/* dst <<< s */
+ mov r11d , ebx ;/* (NEXT STEP) z' = ebx */
+ add edx , eax ;/* dst += x */
+ xor r11d,eax ;/* y ^ ... */
+ lea ecx,DWORD PTR 0a679438eh [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,edx ;/* x & ... */
+ xor r11d,ebx ;/* z ^ ... */
+ ;mov r10d,DWORD PTR (15*4)[rsi] ;/* (NEXT STEP) X[15] */
+ shr r10,32
+ add ecx,r11d ;/* dst += ... */
+ rol ecx, 17 ;/* dst <<< s */
+ mov r11d , eax ;/* (NEXT STEP) z' = eax */
+ add ecx , edx ;/* dst += x */
+ xor r11d,edx ;/* y ^ ... */
+ lea ebx,DWORD PTR 049b40821h [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+ and r11d,ecx ;/* x & ... */
+ xor r11d,eax ;/* z ^ ... */
+ mov r10,QWORD PTR (0*4)[rsi] ;/* (NEXT STEP) X[0] */
+ add ebx,r11d ;/* dst += ... */
+ rol ebx, 22 ;/* dst <<< s */
+ mov r11d , edx ;/* (NEXT STEP) z' = edx */
+ add ebx , ecx ;/* dst += x */
+ mov r10d , 4 [rsi] ;/* (NEXT STEP) X[1] */
+ mov r11d, edx ;/* (NEXT STEP) z' = %edx */
+ mov r12d, edx ;/* (NEXT STEP) z' = %edx */
+ not r11d
+ lea eax,DWORD PTR 0f61e2562h [ eax * 1 +r10d ] ;/* Const + dst + ... */
+
+ and r12d,ebx ;/* x & z */
+ and r11d,ecx ;/* y & (not z) */
+
+ mov r10d , (6*4) [rsi] ;/* (NEXT STEP) X[6] */
+
+
+
+ or r12d,r11d ;/* (y & (not z)) | (x & z) */
+ mov r11d,ecx ;/* (NEXT STEP) z' = ecx */
+ add eax, r12d ; /* dst += ... */
+ mov r12d,ecx ;/* (NEXT STEP) z' = ecx */
+
+
+ rol eax , 5 ;/* dst <<< s */
+ add eax , ebx ;/* dst += x */
+ not r11d
+ lea edx,DWORD PTR 0c040b340h [ edx * 1 +r10d ] ;/* Const + dst + ... */
+
+ and r12d,eax ;/* x & z */
+ and r11d,ebx ;/* y & (not z) */
+
+ mov r10d , (11*4) [rsi] ;/* (NEXT STEP) X[11] */
+
+
+
+ or r12d,r11d ;/* (y & (not z)) | (x & z) */
+ mov r11d,ebx ;/* (NEXT STEP) z' = ebx */
+ add edx, r12d ; /* dst += ... */
+ mov r12d,ebx ;/* (NEXT STEP) z' = ebx */
+
+
+ rol edx , 9 ;/* dst <<< s */
+ add edx , eax ;/* dst += x */
+ not r11d
+ lea ecx,DWORD PTR 0265e5a51h [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+
+ and r12d,edx ;/* x & z */
+ and r11d,eax ;/* y & (not z) */
+
+ mov r10d , (0*4) [rsi] ;/* (NEXT STEP) X[0] */
+
+
+
+ or r12d,r11d ;/* (y & (not z)) | (x & z) */
+ mov r11d,eax ;/* (NEXT STEP) z' = eax */
+ add ecx, r12d ; /* dst += ... */
+ mov r12d,eax ;/* (NEXT STEP) z' = eax */
+
+
+ rol ecx , 14 ;/* dst <<< s */
+ add ecx , edx ;/* dst += x */
+ not r11d
+ lea ebx,DWORD PTR 0e9b6c7aah [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+
+ and r12d,ecx ;/* x & z */
+ and r11d,edx ;/* y & (not z) */
+
+ mov r10d , (5*4) [rsi] ;/* (NEXT STEP) X[5] */
+
+
+
+ or r12d,r11d ;/* (y & (not z)) | (x & z) */
+ mov r11d,edx ;/* (NEXT STEP) z' = edx */
+ add ebx, r12d ; /* dst += ... */
+ mov r12d,edx ;/* (NEXT STEP) z' = edx */
+
+
+ rol ebx , 20 ;/* dst <<< s */
+ add ebx , ecx ;/* dst += x */
+ not r11d
+ lea eax,DWORD PTR 0d62f105dh [ eax * 1 +r10d ] ;/* Const + dst + ... */
+
+ and r12d,ebx ;/* x & z */
+ and r11d,ecx ;/* y & (not z) */
+
+ mov r10d , (10*4) [rsi] ;/* (NEXT STEP) X[10] */
+
+
+
+ or r12d,r11d ;/* (y & (not z)) | (x & z) */
+ mov r11d,ecx ;/* (NEXT STEP) z' = ecx */
+ add eax, r12d ; /* dst += ... */
+ mov r12d,ecx ;/* (NEXT STEP) z' = ecx */
+
+
+ rol eax , 5 ;/* dst <<< s */
+ add eax , ebx ;/* dst += x */
+ not r11d
+ lea edx,DWORD PTR 02441453h [ edx * 1 +r10d ] ;/* Const + dst + ... */
+
+ and r12d,eax ;/* x & z */
+ and r11d,ebx ;/* y & (not z) */
+
+ mov r10d , (15*4) [rsi] ;/* (NEXT STEP) X[15] */
+
+
+
+ or r12d,r11d ;/* (y & (not z)) | (x & z) */
+ mov r11d,ebx ;/* (NEXT STEP) z' = ebx */
+ add edx, r12d ; /* dst += ... */
+ mov r12d,ebx ;/* (NEXT STEP) z' = ebx */
+
+
+ rol edx , 9 ;/* dst <<< s */
+ add edx , eax ;/* dst += x */
+ not r11d
+ lea ecx,DWORD PTR 0d8a1e681h [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+
+ and r12d,edx ;/* x & z */
+ and r11d,eax ;/* y & (not z) */
+
+ mov r10d , (4*4) [rsi] ;/* (NEXT STEP) X[4] */
+
+
+
+ or r12d,r11d ;/* (y & (not z)) | (x & z) */
+ mov r11d,eax ;/* (NEXT STEP) z' = eax */
+ add ecx, r12d ; /* dst += ... */
+ mov r12d,eax ;/* (NEXT STEP) z' = eax */
+
+
+ rol ecx , 14 ;/* dst <<< s */
+ add ecx , edx ;/* dst += x */
+ not r11d
+ lea ebx,DWORD PTR 0e7d3fbc8h [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+
+ and r12d,ecx ;/* x & z */
+ and r11d,edx ;/* y & (not z) */
+
+ mov r10d , (9*4) [rsi] ;/* (NEXT STEP) X[9] */
+
+
+
+ or r12d,r11d ;/* (y & (not z)) | (x & z) */
+ mov r11d,edx ;/* (NEXT STEP) z' = edx */
+ add ebx, r12d ; /* dst += ... */
+ mov r12d,edx ;/* (NEXT STEP) z' = edx */
+
+
+ rol ebx , 20 ;/* dst <<< s */
+ add ebx , ecx ;/* dst += x */
+ not r11d
+ lea eax,DWORD PTR 021e1cde6h [ eax * 1 +r10d ] ;/* Const + dst + ... */
+
+ and r12d,ebx ;/* x & z */
+ and r11d,ecx ;/* y & (not z) */
+
+ mov r10d , (14*4) [rsi] ;/* (NEXT STEP) X[14] */
+
+
+
+ or r12d,r11d ;/* (y & (not z)) | (x & z) */
+ mov r11d,ecx ;/* (NEXT STEP) z' = ecx */
+ add eax, r12d ; /* dst += ... */
+ mov r12d,ecx ;/* (NEXT STEP) z' = ecx */
+
+
+ rol eax , 5 ;/* dst <<< s */
+ add eax , ebx ;/* dst += x */
+ not r11d
+ lea edx,DWORD PTR 0c33707d6h [ edx * 1 +r10d ] ;/* Const + dst + ... */
+
+ and r12d,eax ;/* x & z */
+ and r11d,ebx ;/* y & (not z) */
+
+ mov r10d , (3*4) [rsi] ;/* (NEXT STEP) X[3] */
+
+
+
+ or r12d,r11d ;/* (y & (not z)) | (x & z) */
+ mov r11d,ebx ;/* (NEXT STEP) z' = ebx */
+ add edx, r12d ; /* dst += ... */
+ mov r12d,ebx ;/* (NEXT STEP) z' = ebx */
+
+
+ rol edx , 9 ;/* dst <<< s */
+ add edx , eax ;/* dst += x */
+ not r11d
+ lea ecx,DWORD PTR 0f4d50d87h [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+
+ and r12d,edx ;/* x & z */
+ and r11d,eax ;/* y & (not z) */
+
+ mov r10d , (8*4) [rsi] ;/* (NEXT STEP) X[8] */
+
+
+
+ or r12d,r11d ;/* (y & (not z)) | (x & z) */
+ mov r11d,eax ;/* (NEXT STEP) z' = eax */
+ add ecx, r12d ; /* dst += ... */
+ mov r12d,eax ;/* (NEXT STEP) z' = eax */
+
+
+ rol ecx , 14 ;/* dst <<< s */
+ add ecx , edx ;/* dst += x */
+ not r11d
+ lea ebx,DWORD PTR 0455a14edh [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+
+ and r12d,ecx ;/* x & z */
+ and r11d,edx ;/* y & (not z) */
+
+ mov r10d , (13*4) [rsi] ;/* (NEXT STEP) X[13] */
+
+
+
+ or r12d,r11d ;/* (y & (not z)) | (x & z) */
+ mov r11d,edx ;/* (NEXT STEP) z' = edx */
+ add ebx, r12d ; /* dst += ... */
+ mov r12d,edx ;/* (NEXT STEP) z' = edx */
+
+
+ rol ebx , 20 ;/* dst <<< s */
+ add ebx , ecx ;/* dst += x */
+ not r11d
+ lea eax,DWORD PTR 0a9e3e905h [ eax * 1 +r10d ] ;/* Const + dst + ... */
+
+ and r12d,ebx ;/* x & z */
+ and r11d,ecx ;/* y & (not z) */
+
+ mov r10d , (2*4) [rsi] ;/* (NEXT STEP) X[2] */
+
+
+
+ or r12d,r11d ;/* (y & (not z)) | (x & z) */
+ mov r11d,ecx ;/* (NEXT STEP) z' = ecx */
+ add eax, r12d ; /* dst += ... */
+ mov r12d,ecx ;/* (NEXT STEP) z' = ecx */
+
+
+ rol eax , 5 ;/* dst <<< s */
+ add eax , ebx ;/* dst += x */
+ not r11d
+ lea edx,DWORD PTR 0fcefa3f8h [ edx * 1 +r10d ] ;/* Const + dst + ... */
+
+ and r12d,eax ;/* x & z */
+ and r11d,ebx ;/* y & (not z) */
+
+ mov r10d , (7*4) [rsi] ;/* (NEXT STEP) X[7] */
+
+
+
+ or r12d,r11d ;/* (y & (not z)) | (x & z) */
+ mov r11d,ebx ;/* (NEXT STEP) z' = ebx */
+ add edx, r12d ; /* dst += ... */
+ mov r12d,ebx ;/* (NEXT STEP) z' = ebx */
+
+
+ rol edx , 9 ;/* dst <<< s */
+ add edx , eax ;/* dst += x */
+ not r11d
+ lea ecx,DWORD PTR 0676f02d9h [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+
+ and r12d,edx ;/* x & z */
+ and r11d,eax ;/* y & (not z) */
+
+ mov r10d , (12*4) [rsi] ;/* (NEXT STEP) X[12] */
+
+
+
+ or r12d,r11d ;/* (y & (not z)) | (x & z) */
+ mov r11d,eax ;/* (NEXT STEP) z' = eax */
+ add ecx, r12d ; /* dst += ... */
+ mov r12d,eax ;/* (NEXT STEP) z' = eax */
+
+
+ rol ecx , 14 ;/* dst <<< s */
+ add ecx , edx ;/* dst += x */
+ not r11d
+ lea ebx,DWORD PTR 08d2a4c8ah [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+
+ and r12d,ecx ;/* x & z */
+ and r11d,edx ;/* y & (not z) */
+
+ mov r10d , (0*4) [rsi] ;/* (NEXT STEP) X[0] */
+
+
+
+ or r12d,r11d ;/* (y & (not z)) | (x & z) */
+ mov r11d,edx ;/* (NEXT STEP) z' = edx */
+ add ebx, r12d ; /* dst += ... */
+ mov r12d,edx ;/* (NEXT STEP) z' = edx */
+
+
+ rol ebx , 20 ;/* dst <<< s */
+ add ebx , ecx ;/* dst += x */
+ mov r10d , (5*4)[rsi] ;/* (NEXT STEP) X[5] */
+ mov r11d , ecx ;/* (NEXT STEP) y' = %ecx */
+ lea eax,DWORD PTR 0fffa3942h [ eax * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (8*4)[rsi] ;/* (NEXT STEP) X[8] */
+ xor r11d,edx ;/* z ^ ... */
+ xor r11d,ebx ;/* x ^ ... */
+ add eax , r11d ;/* dst += ... */
+ rol eax , 4 ;/* dst <<< s */
+ mov r11d , ebx ;/* (NEXT STEP) y' = ebx */
+ add eax , ebx ;/* dst += x */
+ lea edx,DWORD PTR 08771f681h [ edx * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (11*4)[rsi] ;/* (NEXT STEP) X[11] */
+ xor r11d,ecx ;/* z ^ ... */
+ xor r11d,eax ;/* x ^ ... */
+ add edx , r11d ;/* dst += ... */
+ rol edx , 11 ;/* dst <<< s */
+ mov r11d , eax ;/* (NEXT STEP) y' = eax */
+ add edx , eax ;/* dst += x */
+ lea ecx,DWORD PTR 06d9d6122h [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (14*4)[rsi] ;/* (NEXT STEP) X[14] */
+ xor r11d,ebx ;/* z ^ ... */
+ xor r11d,edx ;/* x ^ ... */
+ add ecx , r11d ;/* dst += ... */
+ rol ecx , 16 ;/* dst <<< s */
+ mov r11d , edx ;/* (NEXT STEP) y' = edx */
+ add ecx , edx ;/* dst += x */
+ lea ebx,DWORD PTR 0fde5380ch [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (1*4)[rsi] ;/* (NEXT STEP) X[1] */
+ xor r11d,eax ;/* z ^ ... */
+ xor r11d,ecx ;/* x ^ ... */
+ add ebx , r11d ;/* dst += ... */
+ rol ebx , 23 ;/* dst <<< s */
+ mov r11d , ecx ;/* (NEXT STEP) y' = ecx */
+ add ebx , ecx ;/* dst += x */
+ lea eax,DWORD PTR 0a4beea44h [ eax * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (4*4)[rsi] ;/* (NEXT STEP) X[4] */
+ xor r11d,edx ;/* z ^ ... */
+ xor r11d,ebx ;/* x ^ ... */
+ add eax , r11d ;/* dst += ... */
+ rol eax , 4 ;/* dst <<< s */
+ mov r11d , ebx ;/* (NEXT STEP) y' = ebx */
+ add eax , ebx ;/* dst += x */
+ lea edx,DWORD PTR 04bdecfa9h [ edx * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (7*4)[rsi] ;/* (NEXT STEP) X[7] */
+ xor r11d,ecx ;/* z ^ ... */
+ xor r11d,eax ;/* x ^ ... */
+ add edx , r11d ;/* dst += ... */
+ rol edx , 11 ;/* dst <<< s */
+ mov r11d , eax ;/* (NEXT STEP) y' = eax */
+ add edx , eax ;/* dst += x */
+ lea ecx,DWORD PTR 0f6bb4b60h [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (10*4)[rsi] ;/* (NEXT STEP) X[10] */
+ xor r11d,ebx ;/* z ^ ... */
+ xor r11d,edx ;/* x ^ ... */
+ add ecx , r11d ;/* dst += ... */
+ rol ecx , 16 ;/* dst <<< s */
+ mov r11d , edx ;/* (NEXT STEP) y' = edx */
+ add ecx , edx ;/* dst += x */
+ lea ebx,DWORD PTR 0bebfbc70h [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (13*4)[rsi] ;/* (NEXT STEP) X[13] */
+ xor r11d,eax ;/* z ^ ... */
+ xor r11d,ecx ;/* x ^ ... */
+ add ebx , r11d ;/* dst += ... */
+ rol ebx , 23 ;/* dst <<< s */
+ mov r11d , ecx ;/* (NEXT STEP) y' = ecx */
+ add ebx , ecx ;/* dst += x */
+ lea eax,DWORD PTR 0289b7ec6h [ eax * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (0*4)[rsi] ;/* (NEXT STEP) X[0] */
+ xor r11d,edx ;/* z ^ ... */
+ xor r11d,ebx ;/* x ^ ... */
+ add eax , r11d ;/* dst += ... */
+ rol eax , 4 ;/* dst <<< s */
+ mov r11d , ebx ;/* (NEXT STEP) y' = ebx */
+ add eax , ebx ;/* dst += x */
+ lea edx,DWORD PTR 0eaa127fah [ edx * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (3*4)[rsi] ;/* (NEXT STEP) X[3] */
+ xor r11d,ecx ;/* z ^ ... */
+ xor r11d,eax ;/* x ^ ... */
+ add edx , r11d ;/* dst += ... */
+ rol edx , 11 ;/* dst <<< s */
+ mov r11d , eax ;/* (NEXT STEP) y' = eax */
+ add edx , eax ;/* dst += x */
+ lea ecx,DWORD PTR 0d4ef3085h [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (6*4)[rsi] ;/* (NEXT STEP) X[6] */
+ xor r11d,ebx ;/* z ^ ... */
+ xor r11d,edx ;/* x ^ ... */
+ add ecx , r11d ;/* dst += ... */
+ rol ecx , 16 ;/* dst <<< s */
+ mov r11d , edx ;/* (NEXT STEP) y' = edx */
+ add ecx , edx ;/* dst += x */
+ lea ebx,DWORD PTR 04881d05h [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (9*4)[rsi] ;/* (NEXT STEP) X[9] */
+ xor r11d,eax ;/* z ^ ... */
+ xor r11d,ecx ;/* x ^ ... */
+ add ebx , r11d ;/* dst += ... */
+ rol ebx , 23 ;/* dst <<< s */
+ mov r11d , ecx ;/* (NEXT STEP) y' = ecx */
+ add ebx , ecx ;/* dst += x */
+ lea eax,DWORD PTR 0d9d4d039h [ eax * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (12*4)[rsi] ;/* (NEXT STEP) X[12] */
+ xor r11d,edx ;/* z ^ ... */
+ xor r11d,ebx ;/* x ^ ... */
+ add eax , r11d ;/* dst += ... */
+ rol eax , 4 ;/* dst <<< s */
+ mov r11d , ebx ;/* (NEXT STEP) y' = ebx */
+ add eax , ebx ;/* dst += x */
+ lea edx,DWORD PTR 0e6db99e5h [ edx * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (15*4)[rsi] ;/* (NEXT STEP) X[15] */
+ xor r11d,ecx ;/* z ^ ... */
+ xor r11d,eax ;/* x ^ ... */
+ add edx , r11d ;/* dst += ... */
+ rol edx , 11 ;/* dst <<< s */
+ mov r11d , eax ;/* (NEXT STEP) y' = eax */
+ add edx , eax ;/* dst += x */
+ lea ecx,DWORD PTR 01fa27cf8h [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (2*4)[rsi] ;/* (NEXT STEP) X[2] */
+ xor r11d,ebx ;/* z ^ ... */
+ xor r11d,edx ;/* x ^ ... */
+ add ecx , r11d ;/* dst += ... */
+ rol ecx , 16 ;/* dst <<< s */
+ mov r11d , edx ;/* (NEXT STEP) y' = edx */
+ add ecx , edx ;/* dst += x */
+ lea ebx,DWORD PTR 0c4ac5665h [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+ mov r10d,DWORD PTR (0*4)[rsi] ;/* (NEXT STEP) X[0] */
+ xor r11d,eax ;/* z ^ ... */
+ xor r11d,ecx ;/* x ^ ... */
+ add ebx , r11d ;/* dst += ... */
+ rol ebx , 23 ;/* dst <<< s */
+ mov r11d , ecx ;/* (NEXT STEP) y' = ecx */
+ add ebx , ecx ;/* dst += x */
+ mov r10d , (0*4)[rsi] ;/* (NEXT STEP) X[0] */
+ mov r11d , r13d ;0ffffffffh ;%r11d
+ xor r11d , edx ;/* (NEXT STEP) not z' = not %edx*/
+ lea eax,DWORD PTR 0f4292244h [ eax * 1 +r10d ] ;/* Const + dst + ... */
+ or r11d , ebx ;/* x | ... */
+ xor r11d , ecx ;/* y ^ ... */
+ add eax , r11d ;/* dst += ... */
+ mov r10d , DWORD PTR (7*4)[rsi] ;/* (NEXT STEP) X[7] */
+ mov r11d , r13d ; 0ffffffffh
+ rol eax , 6 ;/* dst <<< s */
+ xor r11d , ecx ;/* (NEXT STEP) not z' = not ecx */
+ add eax , ebx ;/* dst += x */
+ lea edx,DWORD PTR 0432aff97h [ edx * 1 +r10d ] ;/* Const + dst + ... */
+ or r11d , eax ;/* x | ... */
+ xor r11d , ebx ;/* y ^ ... */
+ add edx , r11d ;/* dst += ... */
+ mov r10d , DWORD PTR (14*4)[rsi] ;/* (NEXT STEP) X[14] */
+ mov r11d , r13d ; 0ffffffffh
+ rol edx , 10 ;/* dst <<< s */
+ xor r11d , ebx ;/* (NEXT STEP) not z' = not ebx */
+ add edx , eax ;/* dst += x */
+ lea ecx,DWORD PTR 0ab9423a7h [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+ or r11d , edx ;/* x | ... */
+ xor r11d , eax ;/* y ^ ... */
+ add ecx , r11d ;/* dst += ... */
+ mov r10d , DWORD PTR (5*4)[rsi] ;/* (NEXT STEP) X[5] */
+ mov r11d , r13d ; 0ffffffffh
+ rol ecx , 15 ;/* dst <<< s */
+ xor r11d , eax ;/* (NEXT STEP) not z' = not eax */
+ add ecx , edx ;/* dst += x */
+ lea ebx,DWORD PTR 0fc93a039h [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+ or r11d , ecx ;/* x | ... */
+ xor r11d , edx ;/* y ^ ... */
+ add ebx , r11d ;/* dst += ... */
+ mov r10d , DWORD PTR (12*4)[rsi] ;/* (NEXT STEP) X[12] */
+ mov r11d , r13d ; 0ffffffffh
+ rol ebx , 21 ;/* dst <<< s */
+ xor r11d , edx ;/* (NEXT STEP) not z' = not edx */
+ add ebx , ecx ;/* dst += x */
+ lea eax,DWORD PTR 0655b59c3h [ eax * 1 +r10d ] ;/* Const + dst + ... */
+ or r11d , ebx ;/* x | ... */
+ xor r11d , ecx ;/* y ^ ... */
+ add eax , r11d ;/* dst += ... */
+ mov r10d , DWORD PTR (3*4)[rsi] ;/* (NEXT STEP) X[3] */
+ mov r11d , r13d ; 0ffffffffh
+ rol eax , 6 ;/* dst <<< s */
+ xor r11d , ecx ;/* (NEXT STEP) not z' = not ecx */
+ add eax , ebx ;/* dst += x */
+ lea edx,DWORD PTR 08f0ccc92h [ edx * 1 +r10d ] ;/* Const + dst + ... */
+ or r11d , eax ;/* x | ... */
+ xor r11d , ebx ;/* y ^ ... */
+ add edx , r11d ;/* dst += ... */
+ mov r10d , DWORD PTR (10*4)[rsi] ;/* (NEXT STEP) X[10] */
+ mov r11d , r13d ; 0ffffffffh
+ rol edx , 10 ;/* dst <<< s */
+ xor r11d , ebx ;/* (NEXT STEP) not z' = not ebx */
+ add edx , eax ;/* dst += x */
+ lea ecx,DWORD PTR 0ffeff47dh [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+ or r11d , edx ;/* x | ... */
+ xor r11d , eax ;/* y ^ ... */
+ add ecx , r11d ;/* dst += ... */
+ mov r10d , DWORD PTR (1*4)[rsi] ;/* (NEXT STEP) X[1] */
+ mov r11d , r13d ; 0ffffffffh
+ rol ecx , 15 ;/* dst <<< s */
+ xor r11d , eax ;/* (NEXT STEP) not z' = not eax */
+ add ecx , edx ;/* dst += x */
+ lea ebx,DWORD PTR 085845dd1h [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+ or r11d , ecx ;/* x | ... */
+ xor r11d , edx ;/* y ^ ... */
+ add ebx , r11d ;/* dst += ... */
+ mov r10d , DWORD PTR (8*4)[rsi] ;/* (NEXT STEP) X[8] */
+ mov r11d , r13d ; 0ffffffffh
+ rol ebx , 21 ;/* dst <<< s */
+ xor r11d , edx ;/* (NEXT STEP) not z' = not edx */
+ add ebx , ecx ;/* dst += x */
+ lea eax,DWORD PTR 06fa87e4fh [ eax * 1 +r10d ] ;/* Const + dst + ... */
+ or r11d , ebx ;/* x | ... */
+ xor r11d , ecx ;/* y ^ ... */
+ add eax , r11d ;/* dst += ... */
+ mov r10d , DWORD PTR (15*4)[rsi] ;/* (NEXT STEP) X[15] */
+ mov r11d , r13d ; 0ffffffffh
+ rol eax , 6 ;/* dst <<< s */
+ xor r11d , ecx ;/* (NEXT STEP) not z' = not ecx */
+ add eax , ebx ;/* dst += x */
+ lea edx,DWORD PTR 0fe2ce6e0h [ edx * 1 +r10d ] ;/* Const + dst + ... */
+ or r11d , eax ;/* x | ... */
+ xor r11d , ebx ;/* y ^ ... */
+ add edx , r11d ;/* dst += ... */
+ mov r10d , DWORD PTR (6*4)[rsi] ;/* (NEXT STEP) X[6] */
+ mov r11d , r13d ; 0ffffffffh
+ rol edx , 10 ;/* dst <<< s */
+ xor r11d , ebx ;/* (NEXT STEP) not z' = not ebx */
+ add edx , eax ;/* dst += x */
+ lea ecx,DWORD PTR 0a3014314h [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+ or r11d , edx ;/* x | ... */
+ xor r11d , eax ;/* y ^ ... */
+ add ecx , r11d ;/* dst += ... */
+ mov r10d , DWORD PTR (13*4)[rsi] ;/* (NEXT STEP) X[13] */
+ mov r11d , r13d ; 0ffffffffh
+ rol ecx , 15 ;/* dst <<< s */
+ xor r11d , eax ;/* (NEXT STEP) not z' = not eax */
+ add ecx , edx ;/* dst += x */
+ lea ebx,DWORD PTR 04e0811a1h [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+ or r11d , ecx ;/* x | ... */
+ xor r11d , edx ;/* y ^ ... */
+ add ebx , r11d ;/* dst += ... */
+ mov r10d , DWORD PTR (4*4)[rsi] ;/* (NEXT STEP) X[4] */
+ mov r11d , r13d ; 0ffffffffh
+ rol ebx , 21 ;/* dst <<< s */
+ xor r11d , edx ;/* (NEXT STEP) not z' = not edx */
+ add ebx , ecx ;/* dst += x */
+ lea eax,DWORD PTR 0f7537e82h [ eax * 1 +r10d ] ;/* Const + dst + ... */
+ or r11d , ebx ;/* x | ... */
+ xor r11d , ecx ;/* y ^ ... */
+ add eax , r11d ;/* dst += ... */
+ mov r10d , DWORD PTR (11*4)[rsi] ;/* (NEXT STEP) X[11] */
+ mov r11d , r13d ; 0ffffffffh
+ rol eax , 6 ;/* dst <<< s */
+ xor r11d , ecx ;/* (NEXT STEP) not z' = not ecx */
+ add eax , ebx ;/* dst += x */
+ lea edx,DWORD PTR 0bd3af235h [ edx * 1 +r10d ] ;/* Const + dst + ... */
+ or r11d , eax ;/* x | ... */
+ xor r11d , ebx ;/* y ^ ... */
+ add edx , r11d ;/* dst += ... */
+ mov r10d , DWORD PTR (2*4)[rsi] ;/* (NEXT STEP) X[2] */
+ mov r11d , r13d ; 0ffffffffh
+ rol edx , 10 ;/* dst <<< s */
+ xor r11d , ebx ;/* (NEXT STEP) not z' = not ebx */
+ add edx , eax ;/* dst += x */
+ lea ecx,DWORD PTR 02ad7d2bbh [ ecx * 1 +r10d ] ;/* Const + dst + ... */
+ or r11d , edx ;/* x | ... */
+ xor r11d , eax ;/* y ^ ... */
+ add ecx , r11d ;/* dst += ... */
+ mov r10d , DWORD PTR (9*4)[rsi] ;/* (NEXT STEP) X[9] */
+ mov r11d , r13d ; 0ffffffffh
+ rol ecx , 15 ;/* dst <<< s */
+ xor r11d , eax ;/* (NEXT STEP) not z' = not eax */
+ add ecx , edx ;/* dst += x */
+ lea ebx,DWORD PTR 0eb86d391h [ ebx * 1 +r10d ] ;/* Const + dst + ... */
+ or r11d , ecx ;/* x | ... */
+ xor r11d , edx ;/* y ^ ... */
+ add ebx , r11d ;/* dst += ... */
+ mov r10d , DWORD PTR (0*4)[rsi] ;/* (NEXT STEP) X[0] */
+ mov r11d , r13d ; 0ffffffffh
+ rol ebx , 21 ;/* dst <<< s */
+ xor r11d , edx ;/* (NEXT STEP) not z' = not edx */
+ add ebx , ecx ;/* dst += x */
+; # add old values of A, B, C, D
+ add eax,r8d
+ add ebx,r9d
+ add ecx,r14d
+ add edx,r15d
+
+; # loop control
+ add rsi,64 ;# ptr += 64
+ cmp rsi,rdi ;# cmp end with ptr
+ jb lab2 ;# jmp if ptr < end
+; # END of loop over 16-word blocks
+
+lab1: ;pop rbp ;# restore ctx
+pop r12
+ mov DWORD PTR 0[r12],eax ;# ctx->A = A
+ mov DWORD PTR 4[r12],ebx ;# ctx->B = B
+ mov DWORD PTR 8[r12],ecx ;# ctx->C = C
+ mov DWORD PTR 12[r12],edx ;# ctx->D = D
+
+ pop rdi
+ pop rsi
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbx
+ pop rbp
+ ret
+MD5_x64 ENDP
+END;
+
diff --git a/hasher/MD5_asm.asm b/hasher/MD5x86.asm
similarity index 99%
rename from hasher/MD5_asm.asm
rename to hasher/MD5x86.asm
index 24152d9a6..42de94df0 100644
--- a/hasher/MD5_asm.asm
+++ b/hasher/MD5x86.asm
@@ -57,6 +57,7 @@ MD5_S31 equ 4
MD5_S32 equ 11
MD5_S33 equ 16
MD5_S34 equ 23
+
MD5_S41 equ 6
MD5_S42 equ 10
MD5_S43 equ 15
diff --git a/hasher/Release_x64/Hasher.tlog/link-VCTIP.delete.15.tlog b/hasher/Release_x64/Hasher.tlog/link-VCTIP.delete.15.tlog
deleted file mode 100644
index 2cfa8620e..000000000
Binary files a/hasher/Release_x64/Hasher.tlog/link-VCTIP.delete.15.tlog and /dev/null differ
diff --git a/hasher/Release_x64/Hasher.tlog/link-VCTIP.delete.39.tlog b/hasher/Release_x64/Hasher.tlog/link-VCTIP.delete.39.tlog
deleted file mode 100644
index c584e130d..000000000
Binary files a/hasher/Release_x64/Hasher.tlog/link-VCTIP.delete.39.tlog and /dev/null differ
diff --git a/hasher/Release_x64/Hasher.tlog/link-VCTIP.delete.79.tlog b/hasher/Release_x64/Hasher.tlog/link-VCTIP.delete.79.tlog
deleted file mode 100644
index 19358d3c3..000000000
Binary files a/hasher/Release_x64/Hasher.tlog/link-VCTIP.delete.79.tlog and /dev/null differ
diff --git a/hasher/SHA.cpp b/hasher/SHA1.cpp
similarity index 92%
rename from hasher/SHA.cpp
rename to hasher/SHA1.cpp
index a1237eb96..129183f30 100644
--- a/hasher/SHA.cpp
+++ b/hasher/SHA1.cpp
@@ -25,9 +25,7 @@
#include "SHA.h"
-#ifdef HASHLIB_USE_ASM
-extern "C" void __stdcall SHA1_Add_p5(CSHA::SHA1State*, const void* pData, std::size_t nLength);
-#endif
+
const unsigned char hashPadding[64] = {
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -70,11 +68,50 @@ void CSHA::Finish()
#ifdef HASHLIB_USE_ASM
+#ifdef _WIN64 || __x86_64__
+extern "C" void __fastcall sha1_block_asm_data_order(const void *, const void* pData, std::size_t nLength);
+#else
+extern "C" void __stdcall SHA1_Add_p5(CSHA::SHA1State*, const void* pData, std::size_t nLength);
+#endif
void CSHA::Add(const void* pData, std::size_t nLength)
{
+#ifdef _WIN64 || __x86_64__
+ // Update number of bytes
+ const char* input = static_cast< const char* >(pData);
+ {
+ uint32 index = static_cast< uint32 >(m_State.m_nCount % m_State.blockSize);
+ m_State.m_nCount += nLength;
+ if (index)
+ {
+ // buffer has some data already - lets fill it
+ // before doing the rest of the transformation on the original data
+ if (index + nLength < m_State.blockSize)
+ {
+ std::memcpy(m_State.m_oBuffer + index, input, nLength);
+ return;
+ }
+ std::memcpy(m_State.m_oBuffer + index, input, m_State.blockSize - index);
+ nLength -= m_State.blockSize - index;
+ input += m_State.blockSize - index;
+ sha1_block_asm_data_order(&(m_State.m_nState[0]), m_State.m_oBuffer, 1);
+ }
+ }
+ // Transform as many times as possible using the original data stream
+ const char* const end = input + nLength - nLength % m_State.blockSize;
+ size_t abs = nLength / m_State.blockSize;
+ sha1_block_asm_data_order(&(m_State.m_nState[0]), input, abs);
+ abs *= m_State.blockSize;
+ input += abs;
+ nLength %= m_State.blockSize;
+ // Buffer remaining input
+ if (nLength)
+ std::memcpy(m_State.m_oBuffer, input, nLength);
+#else
SHA1_Add_p5(&m_State, pData, nLength);
-}
+#endif
+
+}
#else // HASHLIB_USE_ASM
CSHA::TransformArray::TransformArray(const uint32* const buffer)
@@ -301,6 +338,7 @@ void CSHA::Transform(TransformArray w)
m_State.m_nState[4] += e;
}
+
void CSHA::Add(const void* pData, std::size_t nLength)
{
// Update number of bytes
diff --git a/hasher/SHA1x64.asm b/hasher/SHA1x64.asm
new file mode 100644
index 000000000..accdacd76
--- /dev/null
+++ b/hasher/SHA1x64.asm
@@ -0,0 +1,1580 @@
+ ; Don't even think of reading this code
+ ; It was automatically generated by sha1-586.pl
+ ; Which is a perl program used to generate the x86 assember for
+ ; any of ELF, a.out, COFF, Win32, ...
+ ; eric
+ ;
+; TITLE sha1-586.asm
+; .486
+;.model FLAT
+;_TEXT$ SEGMENT PAGE 'CODE'
+;PUBLIC _sha1_block_asm_data_order
+
+
+; void sha1_block_host_order (SHA_CTX *c, const void *p,size_t num);
+; void sha1_block_data_order (SHA_CTX *c, const void *p,size_t num);
+; see http://weblogs.asp.net/oldnewthing/archive/2004/01/14/58579.aspx
+; All registers must be preserved across the call, except for
+; rax, rcx, rdx, r8, r9, r10, and r11, which are scratch.
+; param on rcx, rdx, r8 and r9
+
+; 32 bits : num=[esp+12], p=[esp+8], c=[esp+4]
+.code
+sha1_block_asm_data_order PROC
+
+push rdi
+push rsi
+push rbx
+push rbp
+
+
+push r12
+push r13
+push r14
+push r15
+mov r9,rcx
+
+; c = rcx
+; p = rdx
+; num = r8
+
+ shl r8, 6
+ mov r11,rdx
+
+ add r8, r11
+ mov rbp, rcx
+
+ mov edx, DWORD PTR 12[rbp]
+ sub rsp, 120
+ mov edi, DWORD PTR 16[rbp]
+ mov ebx, DWORD PTR 8[rbp]
+ mov QWORD PTR 112[rsp],r8
+ ; First we need to setup the X array
+$L000start:
+ ; First, load the words onto the stack in network byte order
+ mov r12d, DWORD PTR [r11]
+ mov r13d, DWORD PTR 4[r11]
+ bswap r12d
+ bswap r13d
+
+ mov r14d, DWORD PTR 8[r11]
+ mov r15d, DWORD PTR 12[r11]
+ bswap r14d
+ bswap r15d
+
+ mov r10d, DWORD PTR 16[r11]
+ mov r8d, DWORD PTR 20[r11]
+ bswap r10d
+ bswap r8d
+
+ mov eax, DWORD PTR 24[r11]
+ mov ecx, DWORD PTR 28[r11]
+ bswap eax
+ bswap ecx
+ mov DWORD PTR 24[rsp],eax
+ mov DWORD PTR 28[rsp],ecx
+ mov eax, DWORD PTR 32[r11]
+ mov ecx, DWORD PTR 36[r11]
+ bswap eax
+ bswap ecx
+ mov DWORD PTR 32[rsp],eax
+ mov DWORD PTR 36[rsp],ecx
+ mov eax, DWORD PTR 40[r11]
+ mov ecx, DWORD PTR 44[r11]
+ bswap eax
+ bswap ecx
+ mov DWORD PTR 40[rsp],eax
+ mov DWORD PTR 44[rsp],ecx
+ mov eax, DWORD PTR 48[r11]
+ mov ecx, DWORD PTR 52[r11]
+ bswap eax
+ bswap ecx
+ mov DWORD PTR 48[rsp],eax
+ mov DWORD PTR 52[rsp],ecx
+ mov eax, DWORD PTR 56[r11]
+ mov ecx, DWORD PTR 60[r11]
+ bswap eax
+ bswap ecx
+ mov DWORD PTR 56[rsp],eax
+ mov DWORD PTR 60[rsp],ecx
+ ; We now have the X array on the stack
+ ; starting at sp-4
+ ;;;;;mov DWORD PTR 132[rsp],esi
+
+$L001shortcut::
+ ;
+ ; Start processing
+ mov eax, DWORD PTR [r9]
+ mov ecx, DWORD PTR 4[r9]
+ ; 00_15 0
+ mov esi, ebx
+ mov ebp, eax
+ rol ebp, 5
+ xor esi, edx
+ and esi, ecx
+ add ebp, edi
+; mov edi, r12d
+ xor esi, edx
+ ror ecx, 2
+ lea ebp, DWORD PTR 1518500249[r12d*1+ebp]
+ add ebp, esi
+ ; 00_15 1
+ mov edi, ecx
+ mov esi, ebp
+ rol ebp, 5
+ xor edi, ebx
+ and edi, eax
+ add ebp, edx
+; mov edx, r13d
+ xor edi, ebx
+ ror eax, 2
+ lea ebp, DWORD PTR 1518500249[r13d*1+ebp]
+ add ebp, edi
+ ; 00_15 2
+ mov edx, eax
+ mov edi, ebp
+ rol ebp, 5
+ xor edx, ecx
+ and edx, esi
+ add ebp, ebx
+; mov ebx, r14d
+ xor edx, ecx
+ ror esi, 2
+ lea ebp, DWORD PTR 1518500249[r14d*1+ebp]
+ add ebp, edx
+ ; 00_15 3
+ mov ebx, esi
+ mov edx, ebp
+ rol ebp, 5
+ xor ebx, eax
+ and ebx, edi
+ add ebp, ecx
+ mov ecx, r15d
+ xor ebx, eax
+ ror edi, 2
+ lea ebp, DWORD PTR 1518500249[ecx*1+ebp]
+ add ebp, ebx
+ ; 00_15 4
+ mov ecx, edi
+ mov ebx, ebp
+ rol ebp, 5
+ xor ecx, esi
+ and ecx, edx
+ add ebp, eax
+ mov eax, r10d
+ xor ecx, esi
+ ror edx, 2
+ lea ebp, DWORD PTR 1518500249[eax*1+ebp]
+ add ebp, ecx
+ ; 00_15 5
+ mov eax, edx
+ mov ecx, ebp
+ rol ebp, 5
+ xor eax, edi
+ and eax, ebx
+ add ebp, esi
+ mov esi, r8d
+ xor eax, edi
+ ror ebx, 2
+ lea ebp, DWORD PTR 1518500249[esi*1+ebp]
+ add ebp, eax
+ ; 00_15 6
+ mov esi, ebx
+ mov eax, ebp
+ rol ebp, 5
+ xor esi, edx
+ and esi, ecx
+ add ebp, edi
+ mov edi, DWORD PTR 24[rsp]
+ xor esi, edx
+ ror ecx, 2
+ lea ebp, DWORD PTR 1518500249[edi*1+ebp]
+ add ebp, esi
+ ; 00_15 7
+ mov edi, ecx
+ mov esi, ebp
+ rol ebp, 5
+ xor edi, ebx
+ and edi, eax
+ add ebp, edx
+ mov edx, DWORD PTR 28[rsp]
+ xor edi, ebx
+ ror eax, 2
+ lea ebp, DWORD PTR 1518500249[edx*1+ebp]
+ add ebp, edi
+ ; 00_15 8
+ mov edx, eax
+ mov edi, ebp
+ rol ebp, 5
+ xor edx, ecx
+ and edx, esi
+ add ebp, ebx
+ mov ebx, DWORD PTR 32[rsp]
+ xor edx, ecx
+ ror esi, 2
+ lea ebp, DWORD PTR 1518500249[ebx*1+ebp]
+ add ebp, edx
+ ; 00_15 9
+ mov ebx, esi
+ mov edx, ebp
+ rol ebp, 5
+ xor ebx, eax
+ and ebx, edi
+ add ebp, ecx
+ mov ecx, DWORD PTR 36[rsp]
+ xor ebx, eax
+ ror edi, 2
+ lea ebp, DWORD PTR 1518500249[ecx*1+ebp]
+ add ebp, ebx
+ ; 00_15 10
+ mov ecx, edi
+ mov ebx, ebp
+ rol ebp, 5
+ xor ecx, esi
+ and ecx, edx
+ add ebp, eax
+ mov eax, DWORD PTR 40[rsp]
+ xor ecx, esi
+ ror edx, 2
+ lea ebp, DWORD PTR 1518500249[eax*1+ebp]
+ add ebp, ecx
+ ; 00_15 11
+ mov eax, edx
+ mov ecx, ebp
+ rol ebp, 5
+ xor eax, edi
+ and eax, ebx
+ add ebp, esi
+ mov esi, DWORD PTR 44[rsp]
+ xor eax, edi
+ ror ebx, 2
+ lea ebp, DWORD PTR 1518500249[esi*1+ebp]
+ add ebp, eax
+ ; 00_15 12
+ mov esi, ebx
+ mov eax, ebp
+ rol ebp, 5
+ xor esi, edx
+ and esi, ecx
+ add ebp, edi
+ mov edi, DWORD PTR 48[rsp]
+ xor esi, edx
+ ror ecx, 2
+ lea ebp, DWORD PTR 1518500249[edi*1+ebp]
+ add ebp, esi
+ ; 00_15 13
+ mov edi, ecx
+ mov esi, ebp
+ rol ebp, 5
+ xor edi, ebx
+ and edi, eax
+ add ebp, edx
+ mov edx, DWORD PTR 52[rsp]
+ xor edi, ebx
+ ror eax, 2
+ lea ebp, DWORD PTR 1518500249[edx*1+ebp]
+ add ebp, edi
+ ; 00_15 14
+ mov edx, eax
+ mov edi, ebp
+ rol ebp, 5
+ xor edx, ecx
+ and edx, esi
+ add ebp, ebx
+ mov ebx, DWORD PTR 56[rsp]
+ xor edx, ecx
+ ror esi, 2
+ lea ebp, DWORD PTR 1518500249[ebx*1+ebp]
+ add ebp, edx
+ ; 00_15 15
+ mov ebx, esi
+ mov edx, ebp
+ rol ebp, 5
+ xor ebx, eax
+ and ebx, edi
+ add ebp, ecx
+ mov ecx, DWORD PTR 60[rsp]
+ xor ebx, eax
+ ror edi, 2
+ lea ebp, DWORD PTR 1518500249[ecx*1+ebp]
+ add ebx, ebp
+ ; 16_19 16
+ mov ecx, r14d
+ mov ebp, edi
+ xor ecx, r12d
+ xor ebp, esi
+ xor ecx, DWORD PTR 32[rsp]
+ and ebp, edx
+ ror edx, 2
+ xor ecx, DWORD PTR 52[rsp]
+ rol ecx, 1
+ xor ebp, esi
+ mov r12d,ecx
+ lea ecx, DWORD PTR 1518500249[eax*1+ecx]
+ mov eax, ebx
+ rol eax, 5
+ add ecx, ebp
+ add ecx, eax
+ ; 16_19 17
+ mov eax, r15d
+ mov ebp, edx
+ xor eax, r13d
+ xor ebp, edi
+ xor eax, DWORD PTR 36[rsp]
+ and ebp, ebx
+ ror ebx, 2
+ xor eax, DWORD PTR 56[rsp]
+ rol eax, 1
+ xor ebp, edi
+ mov r13d,eax
+ lea eax, DWORD PTR 1518500249[esi*1+eax]
+ mov esi, ecx
+ rol esi, 5
+ add eax, ebp
+ add eax, esi
+ ; 16_19 18
+ mov esi, r10d
+ mov ebp, ebx
+ xor esi, r14d
+ xor ebp, edx
+ xor esi, DWORD PTR 40[rsp]
+ and ebp, ecx
+ ror ecx, 2
+ xor esi, DWORD PTR 60[rsp]
+ rol esi, 1
+ xor ebp, edx
+ mov r14d,esi
+ lea esi, DWORD PTR 1518500249[edi*1+esi]
+ mov edi, eax
+ rol edi, 5
+ add esi, ebp
+ add esi, edi
+ ; 16_19 19
+ mov edi, r8d
+ mov ebp, ecx
+ xor edi, r15d
+ xor ebp, ebx
+ xor edi, DWORD PTR 44[rsp]
+ and ebp, eax
+ ror eax, 2
+ xor edi, r12d
+ rol edi, 1
+ xor ebp, ebx
+ mov r15d,edi
+ lea edi, DWORD PTR 1518500249[edx*1+edi]
+ mov edx, esi
+ rol edx, 5
+ add edi, ebp
+ add edi, edx
+ ; 20_39 20
+ mov ebp, esi
+ mov edx, r10d
+ ror esi, 2
+ xor edx, DWORD PTR 24[rsp]
+ xor ebp, eax
+ xor edx, DWORD PTR 48[rsp]
+ xor ebp, ecx
+ xor edx, r13d
+ rol edx, 1
+ add ebp, ebx
+ mov r10d,edx
+ mov ebx, edi
+ rol ebx, 5
+ lea edx, DWORD PTR 1859775393[ebp*1+edx]
+ add edx, ebx
+ ; 20_39 21
+ mov ebp, edi
+ mov ebx, r8d
+ ror edi, 2
+ xor ebx, DWORD PTR 28[rsp]
+ xor ebp, esi
+ xor ebx, DWORD PTR 52[rsp]
+ xor ebp, eax
+ xor ebx, r14d
+ rol ebx, 1
+ add ebp, ecx
+ mov r8d,ebx
+ mov ecx, edx
+ rol ecx, 5
+ lea ebx, DWORD PTR 1859775393[ebp*1+ebx]
+ add ebx, ecx
+ ; 20_39 22
+ mov ebp, edx
+ mov ecx, DWORD PTR 24[rsp]
+ ror edx, 2
+ xor ecx, DWORD PTR 32[rsp]
+ xor ebp, edi
+ xor ecx, DWORD PTR 56[rsp]
+ xor ebp, esi
+ xor ecx, r15d
+ rol ecx, 1
+ add ebp, eax
+ mov DWORD PTR 24[rsp],ecx
+ mov eax, ebx
+ rol eax, 5
+ lea ecx, DWORD PTR 1859775393[ebp*1+ecx]
+ add ecx, eax
+ ; 20_39 23
+ mov ebp, ebx
+ mov eax, DWORD PTR 28[rsp]
+ ror ebx, 2
+ xor eax, DWORD PTR 36[rsp]
+ xor ebp, edx
+ xor eax, DWORD PTR 60[rsp]
+ xor ebp, edi
+ xor eax, r10d
+ rol eax, 1
+ add ebp, esi
+ mov DWORD PTR 28[rsp],eax
+ mov esi, ecx
+ rol esi, 5
+ lea eax, DWORD PTR 1859775393[ebp*1+eax]
+ add eax, esi
+ ; 20_39 24
+ mov ebp, ecx
+ mov esi, DWORD PTR 32[rsp]
+ ror ecx, 2
+ xor esi, DWORD PTR 40[rsp]
+ xor ebp, ebx
+ xor esi, r12d
+ xor ebp, edx
+ xor esi, r8d
+ rol esi, 1
+ add ebp, edi
+ mov DWORD PTR 32[rsp],esi
+ mov edi, eax
+ rol edi, 5
+ lea esi, DWORD PTR 1859775393[ebp*1+esi]
+ add esi, edi
+ ; 20_39 25
+ mov ebp, eax
+ mov edi, DWORD PTR 36[rsp]
+ ror eax, 2
+ xor edi, DWORD PTR 44[rsp]
+ xor ebp, ecx
+ xor edi, r13d
+ xor ebp, ebx
+ xor edi, DWORD PTR 24[rsp]
+ rol edi, 1
+ add ebp, edx
+ mov DWORD PTR 36[rsp],edi
+ mov edx, esi
+ rol edx, 5
+ lea edi, DWORD PTR 1859775393[ebp*1+edi]
+ add edi, edx
+ ; 20_39 26
+ mov ebp, esi
+ mov edx, DWORD PTR 40[rsp]
+ ror esi, 2
+ xor edx, DWORD PTR 48[rsp]
+ xor ebp, eax
+ xor edx, r14d
+ xor ebp, ecx
+ xor edx, DWORD PTR 28[rsp]
+ rol edx, 1
+ add ebp, ebx
+ mov DWORD PTR 40[rsp],edx
+ mov ebx, edi
+ rol ebx, 5
+ lea edx, DWORD PTR 1859775393[ebp*1+edx]
+ add edx, ebx
+ ; 20_39 27
+ mov ebp, edi
+ mov ebx, DWORD PTR 44[rsp]
+ ror edi, 2
+ xor ebx, DWORD PTR 52[rsp]
+ xor ebp, esi
+ xor ebx, r15d
+ xor ebp, eax
+ xor ebx, DWORD PTR 32[rsp]
+ rol ebx, 1
+ add ebp, ecx
+ mov DWORD PTR 44[rsp],ebx
+ mov ecx, edx
+ rol ecx, 5
+ lea ebx, DWORD PTR 1859775393[ebp*1+ebx]
+ add ebx, ecx
+ ; 20_39 28
+ mov ebp, edx
+ mov ecx, DWORD PTR 48[rsp]
+ ror edx, 2
+ xor ecx, DWORD PTR 56[rsp]
+ xor ebp, edi
+ xor ecx, r10d
+ xor ebp, esi
+ xor ecx, DWORD PTR 36[rsp]
+ rol ecx, 1
+ add ebp, eax
+ mov DWORD PTR 48[rsp],ecx
+ mov eax, ebx
+ rol eax, 5
+ lea ecx, DWORD PTR 1859775393[ebp*1+ecx]
+ add ecx, eax
+ ; 20_39 29
+ mov ebp, ebx
+ mov eax, DWORD PTR 52[rsp]
+ ror ebx, 2
+ xor eax, DWORD PTR 60[rsp]
+ xor ebp, edx
+ xor eax, r8d
+ xor ebp, edi
+ xor eax, DWORD PTR 40[rsp]
+ rol eax, 1
+ add ebp, esi
+ mov DWORD PTR 52[rsp],eax
+ mov esi, ecx
+ rol esi, 5
+ lea eax, DWORD PTR 1859775393[ebp*1+eax]
+ add eax, esi
+ ; 20_39 30
+ mov ebp, ecx
+ mov esi, DWORD PTR 56[rsp]
+ ror ecx, 2
+ xor esi, r12d
+ xor ebp, ebx
+ xor esi, DWORD PTR 24[rsp]
+ xor ebp, edx
+ xor esi, DWORD PTR 44[rsp]
+ rol esi, 1
+ add ebp, edi
+ mov DWORD PTR 56[rsp],esi
+ mov edi, eax
+ rol edi, 5
+ lea esi, DWORD PTR 1859775393[ebp*1+esi]
+ add esi, edi
+ ; 20_39 31
+ mov ebp, eax
+ mov edi, DWORD PTR 60[rsp]
+ ror eax, 2
+ xor edi, r13d
+ xor ebp, ecx
+ xor edi, DWORD PTR 28[rsp]
+ xor ebp, ebx
+ xor edi, DWORD PTR 48[rsp]
+ rol edi, 1
+ add ebp, edx
+ mov DWORD PTR 60[rsp],edi
+ mov edx, esi
+ rol edx, 5
+ lea edi, DWORD PTR 1859775393[ebp*1+edi]
+ add edi, edx
+ ; 20_39 32
+ mov ebp, esi
+; mov edx, r12d
+ ror esi, 2
+ xor r12d, r14d
+ xor ebp, eax
+ xor r12d, DWORD PTR 32[rsp]
+ xor ebp, ecx
+ xor r12d, DWORD PTR 52[rsp]
+ rol r12d, 1
+ add ebp, ebx
+; mov r12d,edx
+ mov ebx, edi
+ rol ebx, 5
+ lea edx, DWORD PTR 1859775393[ebp*1+r12d]
+ add edx, ebx
+ ; 20_39 33
+ mov ebp, edi
+ mov ebx, r13d
+ ror edi, 2
+ xor ebx, r15d
+ xor ebp, esi
+ xor ebx, DWORD PTR 36[rsp]
+ xor ebp, eax
+ xor ebx, DWORD PTR 56[rsp]
+ rol ebx, 1
+ add ebp, ecx
+ mov r13d,ebx
+ mov ecx, edx
+ rol ecx, 5
+ lea ebx, DWORD PTR 1859775393[ebp*1+ebx]
+ add ebx, ecx
+ ; 20_39 34
+ mov ebp, edx
+ mov ecx, r14d
+ ror edx, 2
+ xor ecx, r10d
+ xor ebp, edi
+ xor ecx, DWORD PTR 40[rsp]
+ xor ebp, esi
+ xor ecx, DWORD PTR 60[rsp]
+ rol ecx, 1
+ add ebp, eax
+ mov r14d,ecx
+ mov eax, ebx
+ rol eax, 5
+ lea ecx, DWORD PTR 1859775393[ebp*1+ecx]
+ add ecx, eax
+ ; 20_39 35
+ mov ebp, ebx
+ mov eax, r15d
+ ror ebx, 2
+ xor eax, r8d
+ xor ebp, edx
+ xor eax, DWORD PTR 44[rsp]
+ xor ebp, edi
+ xor eax, r12d
+ rol eax, 1
+ add ebp, esi
+ mov r15d,eax
+ mov esi, ecx
+ rol esi, 5
+ lea eax, DWORD PTR 1859775393[ebp*1+eax]
+ add eax, esi
+ ; 20_39 36
+ mov ebp, ecx
+ mov esi, r10d
+ ror ecx, 2
+ xor esi, DWORD PTR 24[rsp]
+ xor ebp, ebx
+ xor esi, DWORD PTR 48[rsp]
+ xor ebp, edx
+ xor esi, r13d
+ rol esi, 1
+ add ebp, edi
+ mov r10d,esi
+ mov edi, eax
+ rol edi, 5
+ lea esi, DWORD PTR 1859775393[ebp*1+esi]
+ add esi, edi
+ ; 20_39 37
+ mov ebp, eax
+ mov edi, r8d
+ ror eax, 2
+ xor edi, DWORD PTR 28[rsp]
+ xor ebp, ecx
+ xor edi, DWORD PTR 52[rsp]
+ xor ebp, ebx
+ xor edi, r14d
+ rol edi, 1
+ add ebp, edx
+ mov r8d,edi
+ mov edx, esi
+ rol edx, 5
+ lea edi, DWORD PTR 1859775393[ebp*1+edi]
+ add edi, edx
+ ; 20_39 38
+ mov ebp, esi
+ mov edx, DWORD PTR 24[rsp]
+ ror esi, 2
+ xor edx, DWORD PTR 32[rsp]
+ xor ebp, eax
+ xor edx, DWORD PTR 56[rsp]
+ xor ebp, ecx
+ xor edx, r15d
+ rol edx, 1
+ add ebp, ebx
+ mov DWORD PTR 24[rsp],edx
+ mov ebx, edi
+ rol ebx, 5
+ lea edx, DWORD PTR 1859775393[ebp*1+edx]
+ add edx, ebx
+ ; 20_39 39
+ mov ebp, edi
+ mov ebx, DWORD PTR 28[rsp]
+ ror edi, 2
+ xor ebx, DWORD PTR 36[rsp]
+ xor ebp, esi
+ xor ebx, DWORD PTR 60[rsp]
+ xor ebp, eax
+ xor ebx, r10d
+ rol ebx, 1
+ add ebp, ecx
+ mov DWORD PTR 28[rsp],ebx
+ mov ecx, edx
+ rol ecx, 5
+ lea ebx, DWORD PTR 1859775393[ebp*1+ebx]
+ add ebx, ecx
+ ; 40_59 40
+ mov ecx, DWORD PTR 32[rsp]
+ mov ebp, DWORD PTR 40[rsp]
+ xor ecx, ebp
+; mov ebp, r12d
+ xor ecx, r12d
+; mov ebp, r8d
+ xor ecx, r8d
+ mov ebp, edx
+ rol ecx, 1
+ or ebp, edi
+ mov DWORD PTR 32[rsp],ecx
+ and ebp, esi
+ lea ecx, DWORD PTR 2400959708[eax*1+ecx]
+ mov eax, edx
+ ror edx, 2
+ and eax, edi
+ or ebp, eax
+ mov eax, ebx
+ rol eax, 5
+ add ecx, ebp
+ add ecx, eax
+ ; 40_59 41
+ mov eax, DWORD PTR 36[rsp]
+ mov ebp, DWORD PTR 44[rsp]
+ xor eax, ebp
+; mov ebp, r13d
+ xor eax, r13d
+ mov ebp, DWORD PTR 24[rsp]
+ xor eax, ebp
+ mov ebp, ebx
+ rol eax, 1
+ or ebp, edx
+ mov DWORD PTR 36[rsp],eax
+ and ebp, edi
+ lea eax, DWORD PTR 2400959708[esi*1+eax]
+ mov esi, ebx
+ ror ebx, 2
+ and esi, edx
+ or ebp, esi
+ mov esi, ecx
+ rol esi, 5
+ add eax, ebp
+ add eax, esi
+ ; 40_59 42
+ mov esi, DWORD PTR 40[rsp]
+ mov ebp, DWORD PTR 48[rsp]
+ xor esi, ebp
+; mov ebp, r14d
+ xor esi, r14d
+ mov ebp, DWORD PTR 28[rsp]
+ xor esi, ebp
+ mov ebp, ecx
+ rol esi, 1
+ or ebp, ebx
+ mov DWORD PTR 40[rsp],esi
+ and ebp, edx
+ lea esi, DWORD PTR 2400959708[edi*1+esi]
+ mov edi, ecx
+ ror ecx, 2
+ and edi, ebx
+ or ebp, edi
+ mov edi, eax
+ rol edi, 5
+ add esi, ebp
+ add esi, edi
+ ; 40_59 43
+ mov edi, DWORD PTR 44[rsp]
+ mov ebp, DWORD PTR 52[rsp]
+ xor edi, ebp
+; mov ebp, r15d
+ xor edi, r15d
+ mov ebp, DWORD PTR 32[rsp]
+ xor edi, ebp
+ mov ebp, eax
+ rol edi, 1
+ or ebp, ecx
+ mov DWORD PTR 44[rsp],edi
+ and ebp, ebx
+ lea edi, DWORD PTR 2400959708[edx*1+edi]
+ mov edx, eax
+ ror eax, 2
+ and edx, ecx
+ or ebp, edx
+ mov edx, esi
+ rol edx, 5
+ add edi, ebp
+ add edi, edx
+ ; 40_59 44
+ mov edx, DWORD PTR 48[rsp]
+ mov ebp, DWORD PTR 56[rsp]
+ xor edx, ebp
+; mov ebp, r10d
+ xor edx, r10d
+ mov ebp, DWORD PTR 36[rsp]
+ xor edx, ebp
+ mov ebp, esi
+ rol edx, 1
+ or ebp, eax
+ mov DWORD PTR 48[rsp],edx
+ and ebp, ecx
+ lea edx, DWORD PTR 2400959708[ebx*1+edx]
+ mov ebx, esi
+ ror esi, 2
+ and ebx, eax
+ or ebp, ebx
+ mov ebx, edi
+ rol ebx, 5
+ add edx, ebp
+ add edx, ebx
+ ; 40_59 45
+ mov ebx, DWORD PTR 52[rsp]
+ mov ebp, DWORD PTR 60[rsp]
+ xor ebx, ebp
+; mov ebp, r8d
+ xor ebx, r8d
+ mov ebp, DWORD PTR 40[rsp]
+ xor ebx, ebp
+ mov ebp, edi
+ rol ebx, 1
+ or ebp, esi
+ mov DWORD PTR 52[rsp],ebx
+ and ebp, eax
+ lea ebx, DWORD PTR 2400959708[ecx*1+ebx]
+ mov ecx, edi
+ ror edi, 2
+ and ecx, esi
+ or ebp, ecx
+ mov ecx, edx
+ rol ecx, 5
+ add ebx, ebp
+ add ebx, ecx
+ ; 40_59 46
+ mov ecx, DWORD PTR 56[rsp]
+; mov ebp, r12d
+ xor ecx, r12d
+ mov ebp, DWORD PTR 24[rsp]
+ xor ecx, ebp
+ mov ebp, DWORD PTR 44[rsp]
+ xor ecx, ebp
+ mov ebp, edx
+ rol ecx, 1
+ or ebp, edi
+ mov DWORD PTR 56[rsp],ecx
+ and ebp, esi
+ lea ecx, DWORD PTR 2400959708[eax*1+ecx]
+ mov eax, edx
+ ror edx, 2
+ and eax, edi
+ or ebp, eax
+ mov eax, ebx
+ rol eax, 5
+ add ecx, ebp
+ add ecx, eax
+ ; 40_59 47
+ mov eax, DWORD PTR 60[rsp]
+; mov ebp, r13d
+ xor eax, r13d
+ mov ebp, DWORD PTR 28[rsp]
+ xor eax, ebp
+ mov ebp, DWORD PTR 48[rsp]
+ xor eax, ebp
+ mov ebp, ebx
+ rol eax, 1
+ or ebp, edx
+ mov DWORD PTR 60[rsp],eax
+ and ebp, edi
+ lea eax, DWORD PTR 2400959708[esi*1+eax]
+ mov esi, ebx
+ ror ebx, 2
+ and esi, edx
+ or ebp, esi
+ mov esi, ecx
+ rol esi, 5
+ add eax, ebp
+ add eax, esi
+ ; 40_59 48
+ mov esi, r12d
+; mov ebp, r14d
+ xor esi, r14d
+ mov ebp, DWORD PTR 32[rsp]
+ xor esi, ebp
+ mov ebp, DWORD PTR 52[rsp]
+ xor esi, ebp
+ mov ebp, ecx
+ rol esi, 1
+ or ebp, ebx
+ mov r12d,esi
+ and ebp, edx
+ lea esi, DWORD PTR 2400959708[edi*1+esi]
+ mov edi, ecx
+ ror ecx, 2
+ and edi, ebx
+ or ebp, edi
+ mov edi, eax
+ rol edi, 5
+ add esi, ebp
+ add esi, edi
+ ; 40_59 49
+ mov edi, r13d
+; mov ebp, r15d
+ xor edi, r15d
+ mov ebp, DWORD PTR 36[rsp]
+ xor edi, ebp
+ mov ebp, DWORD PTR 56[rsp]
+ xor edi, ebp
+ mov ebp, eax
+ rol edi, 1
+ or ebp, ecx
+ mov r13d,edi
+ and ebp, ebx
+ lea edi, DWORD PTR 2400959708[edx*1+edi]
+ mov edx, eax
+ ror eax, 2
+ and edx, ecx
+ or ebp, edx
+ mov edx, esi
+ rol edx, 5
+ add edi, ebp
+ add edi, edx
+ ; 40_59 50
+ mov edx, r14d
+; mov ebp, r10d
+ xor edx, r10d
+ mov ebp, DWORD PTR 40[rsp]
+ xor edx, ebp
+ mov ebp, DWORD PTR 60[rsp]
+ xor edx, ebp
+ mov ebp, esi
+ rol edx, 1
+ or ebp, eax
+ mov r14d,edx
+ and ebp, ecx
+ lea edx, DWORD PTR 2400959708[ebx*1+edx]
+ mov ebx, esi
+ ror esi, 2
+ and ebx, eax
+ or ebp, ebx
+ mov ebx, edi
+ rol ebx, 5
+ add edx, ebp
+ add edx, ebx
+ ; 40_59 51
+ mov ebx, r15d
+; mov ebp, r8d
+ xor ebx, r8d
+ mov ebp, DWORD PTR 44[rsp]
+ xor ebx, ebp
+; mov ebp, r12d
+ xor ebx, r12d
+ mov ebp, edi
+ rol ebx, 1
+ or ebp, esi
+ mov r15d,ebx
+ and ebp, eax
+ lea ebx, DWORD PTR 2400959708[ecx*1+ebx]
+ mov ecx, edi
+ ror edi, 2
+ and ecx, esi
+ or ebp, ecx
+ mov ecx, edx
+ rol ecx, 5
+ add ebx, ebp
+ add ebx, ecx
+ ; 40_59 52
+ mov ecx, r10d
+ mov ebp, DWORD PTR 24[rsp]
+ xor ecx, ebp
+ mov ebp, DWORD PTR 48[rsp]
+ xor ecx, ebp
+; mov ebp, r13d
+ xor ecx, r13d
+ mov ebp, edx
+ rol ecx, 1
+ or ebp, edi
+ mov r10d,ecx
+ and ebp, esi
+ lea ecx, DWORD PTR 2400959708[eax*1+ecx]
+ mov eax, edx
+ ror edx, 2
+ and eax, edi
+ or ebp, eax
+ mov eax, ebx
+ rol eax, 5
+ add ecx, ebp
+ add ecx, eax
+ ; 40_59 53
+ mov eax, r8d
+ mov ebp, DWORD PTR 28[rsp]
+ xor eax, ebp
+ mov ebp, DWORD PTR 52[rsp]
+ xor eax, ebp
+; mov ebp, r14d
+ xor eax, r14d
+ mov ebp, ebx
+ rol eax, 1
+ or ebp, edx
+ mov r8d,eax
+ and ebp, edi
+ lea eax, DWORD PTR 2400959708[esi*1+eax]
+ mov esi, ebx
+ ror ebx, 2
+ and esi, edx
+ or ebp, esi
+ mov esi, ecx
+ rol esi, 5
+ add eax, ebp
+ add eax, esi
+ ; 40_59 54
+ mov esi, DWORD PTR 24[rsp]
+ mov ebp, DWORD PTR 32[rsp]
+ xor esi, ebp
+ mov ebp, DWORD PTR 56[rsp]
+ xor esi, ebp
+; mov ebp, r15d
+ xor esi, r15d
+ mov ebp, ecx
+ rol esi, 1
+ or ebp, ebx
+ mov DWORD PTR 24[rsp],esi
+ and ebp, edx
+ lea esi, DWORD PTR 2400959708[edi*1+esi]
+ mov edi, ecx
+ ror ecx, 2
+ and edi, ebx
+ or ebp, edi
+ mov edi, eax
+ rol edi, 5
+ add esi, ebp
+ add esi, edi
+ ; 40_59 55
+ mov edi, DWORD PTR 28[rsp]
+ mov ebp, DWORD PTR 36[rsp]
+ xor edi, ebp
+ mov ebp, DWORD PTR 60[rsp]
+ xor edi, ebp
+; mov ebp, r10d
+ xor edi, r10d
+ mov ebp, eax
+ rol edi, 1
+ or ebp, ecx
+ mov DWORD PTR 28[rsp],edi
+ and ebp, ebx
+ lea edi, DWORD PTR 2400959708[edx*1+edi]
+ mov edx, eax
+ ror eax, 2
+ and edx, ecx
+ or ebp, edx
+ mov edx, esi
+ rol edx, 5
+ add edi, ebp
+ add edi, edx
+ ; 40_59 56
+ mov edx, DWORD PTR 32[rsp]
+ mov ebp, DWORD PTR 40[rsp]
+ xor edx, ebp
+; mov ebp, r12d
+ xor edx, r12d
+; mov ebp, r8d
+ xor edx, r8d
+ mov ebp, esi
+ rol edx, 1
+ or ebp, eax
+ mov DWORD PTR 32[rsp],edx
+ and ebp, ecx
+ lea edx, DWORD PTR 2400959708[ebx*1+edx]
+ mov ebx, esi
+ ror esi, 2
+ and ebx, eax
+ or ebp, ebx
+ mov ebx, edi
+ rol ebx, 5
+ add edx, ebp
+ add edx, ebx
+ ; 40_59 57
+ mov ebx, DWORD PTR 36[rsp]
+ mov ebp, DWORD PTR 44[rsp]
+ xor ebx, ebp
+; mov ebp, r13d
+ xor ebx, r13d
+ mov ebp, DWORD PTR 24[rsp]
+ xor ebx, ebp
+ mov ebp, edi
+ rol ebx, 1
+ or ebp, esi
+ mov DWORD PTR 36[rsp],ebx
+ and ebp, eax
+ lea ebx, DWORD PTR 2400959708[ecx*1+ebx]
+ mov ecx, edi
+ ror edi, 2
+ and ecx, esi
+ or ebp, ecx
+ mov ecx, edx
+ rol ecx, 5
+ add ebx, ebp
+ add ebx, ecx
+ ; 40_59 58
+ mov ecx, DWORD PTR 40[rsp]
+ mov ebp, DWORD PTR 48[rsp]
+ xor ecx, ebp
+; mov ebp, r14d
+ xor ecx, r14d
+ mov ebp, DWORD PTR 28[rsp]
+ xor ecx, ebp
+ mov ebp, edx
+ rol ecx, 1
+ or ebp, edi
+ mov DWORD PTR 40[rsp],ecx
+ and ebp, esi
+ lea ecx, DWORD PTR 2400959708[eax*1+ecx]
+ mov eax, edx
+ ror edx, 2
+ and eax, edi
+ or ebp, eax
+ mov eax, ebx
+ rol eax, 5
+ add ecx, ebp
+ add ecx, eax
+ ; 40_59 59
+ mov eax, DWORD PTR 44[rsp]
+ mov ebp, DWORD PTR 52[rsp]
+ xor eax, ebp
+; mov ebp, r15d
+ xor eax, r15d
+ mov ebp, DWORD PTR 32[rsp]
+ xor eax, ebp
+ mov ebp, ebx
+ rol eax, 1
+ or ebp, edx
+ mov DWORD PTR 44[rsp],eax
+ and ebp, edi
+ lea eax, DWORD PTR 2400959708[esi*1+eax]
+ mov esi, ebx
+ ror ebx, 2
+ and esi, edx
+ or ebp, esi
+ mov esi, ecx
+ rol esi, 5
+ add eax, ebp
+ add eax, esi
+ ; 20_39 60
+ mov ebp, ecx
+ mov esi, DWORD PTR 48[rsp]
+ ror ecx, 2
+ xor esi, DWORD PTR 56[rsp]
+ xor ebp, ebx
+ xor esi, r10d
+ xor ebp, edx
+ xor esi, DWORD PTR 36[rsp]
+ rol esi, 1
+ add ebp, edi
+ mov DWORD PTR 48[rsp],esi
+ mov edi, eax
+ rol edi, 5
+ lea esi, DWORD PTR 3395469782[ebp*1+esi]
+ add esi, edi
+ ; 20_39 61
+ mov ebp, eax
+ mov edi, DWORD PTR 52[rsp]
+ ror eax, 2
+ xor edi, DWORD PTR 60[rsp]
+ xor ebp, ecx
+ xor edi, r8d
+ xor ebp, ebx
+ xor edi, DWORD PTR 40[rsp]
+ rol edi, 1
+ add ebp, edx
+ mov DWORD PTR 52[rsp],edi
+ mov edx, esi
+ rol edx, 5
+ lea edi, DWORD PTR 3395469782[ebp*1+edi]
+ add edi, edx
+ ; 20_39 62
+ mov ebp, esi
+ mov edx, DWORD PTR 56[rsp]
+ ror esi, 2
+ xor edx, r12d
+ xor ebp, eax
+ xor edx, DWORD PTR 24[rsp]
+ xor ebp, ecx
+ xor edx, DWORD PTR 44[rsp]
+ rol edx, 1
+ add ebp, ebx
+ mov DWORD PTR 56[rsp],edx
+ mov ebx, edi
+ rol ebx, 5
+ lea edx, DWORD PTR 3395469782[ebp*1+edx]
+ add edx, ebx
+ ; 20_39 63
+ mov ebp, edi
+ mov ebx, DWORD PTR 60[rsp]
+ ror edi, 2
+ xor ebx, r13d
+ xor ebp, esi
+ xor ebx, DWORD PTR 28[rsp]
+ xor ebp, eax
+ xor ebx, DWORD PTR 48[rsp]
+ rol ebx, 1
+ add ebp, ecx
+ mov DWORD PTR 60[rsp],ebx
+ mov ecx, edx
+ rol ecx, 5
+ lea ebx, DWORD PTR 3395469782[ebp*1+ebx]
+ add ebx, ecx
+ ; 20_39 64
+ mov ebp, edx
+ mov ecx, r12d
+ ror edx, 2
+ xor ecx, r14d
+ xor ebp, edi
+ xor ecx, DWORD PTR 32[rsp]
+ xor ebp, esi
+ xor ecx, DWORD PTR 52[rsp]
+ rol ecx, 1
+ add ebp, eax
+ mov r12d,ecx
+ mov eax, ebx
+ rol eax, 5
+ lea ecx, DWORD PTR 3395469782[ebp*1+ecx]
+ add ecx, eax
+ ; 20_39 65
+ mov ebp, ebx
+ mov eax, r13d
+ ror ebx, 2
+ xor eax, r15d
+ xor ebp, edx
+ xor eax, DWORD PTR 36[rsp]
+ xor ebp, edi
+ xor eax, DWORD PTR 56[rsp]
+ rol eax, 1
+ add ebp, esi
+ mov r13d,eax
+ mov esi, ecx
+ rol esi, 5
+ lea eax, DWORD PTR 3395469782[ebp*1+eax]
+ add eax, esi
+ ; 20_39 66
+ mov ebp, ecx
+ mov esi, r14d
+ ror ecx, 2
+ xor esi, r10d
+ xor ebp, ebx
+ xor esi, DWORD PTR 40[rsp]
+ xor ebp, edx
+ xor esi, DWORD PTR 60[rsp]
+ rol esi, 1
+ add ebp, edi
+ mov r14d,esi
+ mov edi, eax
+ rol edi, 5
+ lea esi, DWORD PTR 3395469782[ebp*1+esi]
+ add esi, edi
+ ; 20_39 67
+ mov ebp, eax
+ mov edi, r15d
+ ror eax, 2
+ xor edi, r8d
+ xor ebp, ecx
+ xor edi, DWORD PTR 44[rsp]
+ xor ebp, ebx
+ xor edi, r12d
+ rol edi, 1
+ add ebp, edx
+ mov r15d,edi
+ mov edx, esi
+ rol edx, 5
+ lea edi, DWORD PTR 3395469782[ebp*1+edi]
+ add edi, edx
+ ; 20_39 68
+ mov ebp, esi
+ mov edx, r10d
+ ror esi, 2
+ xor edx, DWORD PTR 24[rsp]
+ xor ebp, eax
+ xor edx, DWORD PTR 48[rsp]
+ xor ebp, ecx
+ xor edx, r13d
+ rol edx, 1
+ add ebp, ebx
+ mov r10d,edx
+ mov ebx, edi
+ rol ebx, 5
+ lea edx, DWORD PTR 3395469782[ebp*1+edx]
+ add edx, ebx
+ ; 20_39 69
+ mov ebp, edi
+ mov ebx, r8d
+ ror edi, 2
+ xor ebx, DWORD PTR 28[rsp]
+ xor ebp, esi
+ xor ebx, DWORD PTR 52[rsp]
+ xor ebp, eax
+ xor ebx, r14d
+ rol ebx, 1
+ add ebp, ecx
+ mov r8d,ebx
+ mov ecx, edx
+ rol ecx, 5
+ lea ebx, DWORD PTR 3395469782[ebp*1+ebx]
+ add ebx, ecx
+ ; 20_39 70
+ mov ebp, edx
+ mov ecx, DWORD PTR 24[rsp]
+ ror edx, 2
+ xor ecx, DWORD PTR 32[rsp]
+ xor ebp, edi
+ xor ecx, DWORD PTR 56[rsp]
+ xor ebp, esi
+ xor ecx, r15d
+ rol ecx, 1
+ add ebp, eax
+ mov DWORD PTR 24[rsp],ecx
+ mov eax, ebx
+ rol eax, 5
+ lea ecx, DWORD PTR 3395469782[ebp*1+ecx]
+ add ecx, eax
+ ; 20_39 71
+ mov ebp, ebx
+ mov eax, DWORD PTR 28[rsp]
+ ror ebx, 2
+ xor eax, DWORD PTR 36[rsp]
+ xor ebp, edx
+ xor eax, DWORD PTR 60[rsp]
+ xor ebp, edi
+ xor eax, r10d
+ rol eax, 1
+ add ebp, esi
+ mov DWORD PTR 28[rsp],eax
+ mov esi, ecx
+ rol esi, 5
+ lea eax, DWORD PTR 3395469782[ebp*1+eax]
+ add eax, esi
+ ; 20_39 72
+ mov ebp, ecx
+ mov esi, DWORD PTR 32[rsp]
+ ror ecx, 2
+ xor esi, DWORD PTR 40[rsp]
+ xor ebp, ebx
+ xor esi, r12d
+ xor ebp, edx
+ xor esi, r8d
+ rol esi, 1
+ add ebp, edi
+ mov DWORD PTR 32[rsp],esi
+ mov edi, eax
+ rol edi, 5
+ lea esi, DWORD PTR 3395469782[ebp*1+esi]
+ add esi, edi
+ ; 20_39 73
+ mov ebp, eax
+ mov edi, DWORD PTR 36[rsp]
+ ror eax, 2
+ xor edi, DWORD PTR 44[rsp]
+ xor ebp, ecx
+ xor edi, r13d
+ xor ebp, ebx
+ xor edi, DWORD PTR 24[rsp]
+ rol edi, 1
+ add ebp, edx
+ mov DWORD PTR 36[rsp],edi
+ mov edx, esi
+ rol edx, 5
+ lea edi, DWORD PTR 3395469782[ebp*1+edi]
+ add edi, edx
+ ; 20_39 74
+ mov ebp, esi
+ mov edx, DWORD PTR 40[rsp]
+ ror esi, 2
+ xor edx, DWORD PTR 48[rsp]
+ xor ebp, eax
+ xor edx, r14d
+ xor ebp, ecx
+ xor edx, DWORD PTR 28[rsp]
+ rol edx, 1
+ add ebp, ebx
+ mov DWORD PTR 40[rsp],edx
+ mov ebx, edi
+ rol ebx, 5
+ lea edx, DWORD PTR 3395469782[ebp*1+edx]
+ add edx, ebx
+ ; 20_39 75
+ mov ebp, edi
+ mov ebx, DWORD PTR 44[rsp]
+ ror edi, 2
+ xor ebx, DWORD PTR 52[rsp]
+ xor ebp, esi
+ xor ebx, r15d
+ xor ebp, eax
+ xor ebx, DWORD PTR 32[rsp]
+ rol ebx, 1
+ add ebp, ecx
+ mov DWORD PTR 44[rsp],ebx
+ mov ecx, edx
+ rol ecx, 5
+ lea ebx, DWORD PTR 3395469782[ebp*1+ebx]
+ add ebx, ecx
+ ; 20_39 76
+ mov ebp, edx
+ mov ecx, DWORD PTR 48[rsp]
+ ror edx, 2
+ xor ecx, DWORD PTR 56[rsp]
+ xor ebp, edi
+ xor ecx, r10d
+ xor ebp, esi
+ xor ecx, DWORD PTR 36[rsp]
+ rol ecx, 1
+ add ebp, eax
+ mov DWORD PTR 48[rsp],ecx
+ mov eax, ebx
+ rol eax, 5
+ lea ecx, DWORD PTR 3395469782[ebp*1+ecx]
+ add ecx, eax
+ ; 20_39 77
+ mov ebp, ebx
+ mov eax, DWORD PTR 52[rsp]
+ ror ebx, 2
+ xor eax, DWORD PTR 60[rsp]
+ xor ebp, edx
+ xor eax, r8d
+ xor ebp, edi
+ xor eax, DWORD PTR 40[rsp]
+ rol eax, 1
+ add ebp, esi
+ mov DWORD PTR 52[rsp],eax
+ mov esi, ecx
+ rol esi, 5
+ lea eax, DWORD PTR 3395469782[ebp*1+eax]
+ add eax, esi
+ ; 20_39 78
+ mov ebp, ecx
+ mov esi, DWORD PTR 56[rsp]
+ ror ecx, 2
+ xor esi, r12d
+ xor ebp, ebx
+ xor esi, DWORD PTR 24[rsp]
+ xor ebp, edx
+ xor esi, DWORD PTR 44[rsp]
+ rol esi, 1
+ add ebp, edi
+ mov DWORD PTR 56[rsp],esi
+ mov edi, eax
+ rol edi, 5
+ lea esi, DWORD PTR 3395469782[ebp*1+esi]
+ add esi, edi
+ ; 20_39 79
+prefetcht1 [r9]
+ mov ebp, eax
+ mov edi, DWORD PTR 60[rsp]
+ ror eax, 2
+ xor edi, r13d
+ xor ebp, ecx
+ xor edi, DWORD PTR 28[rsp]
+ xor ebp, ebx
+ xor edi, DWORD PTR 48[rsp]
+ rol edi, 1
+ add ebp, edx
+ mov DWORD PTR 60[rsp],edi
+ mov edx, esi
+ rol edx, 5
+ lea edi, DWORD PTR 3395469782[ebp*1+edi]
+ add edi, edx
+ ; End processing
+ ;
+prefetcht1 [r11+64]
+; mov ebp, DWORD PTR 128[rsp]
+;mov rbp,r9
+
+
+ mov edx, DWORD PTR 12[r9]
+ add edx, ecx
+ mov ecx, DWORD PTR 4[r9]
+ add ecx, esi
+ mov esi, eax
+ mov eax, DWORD PTR [r9]
+ mov DWORD PTR 12[r9],edx
+ add eax, edi
+ mov edi, DWORD PTR 16[r9]
+ add edi, ebx
+ mov ebx, DWORD PTR 8[r9]
+ add ebx, esi
+ mov DWORD PTR [r9],eax
+ add r11, 64
+
+ mov DWORD PTR 8[r9],ebx
+
+
+ mov DWORD PTR 16[r9],edi
+ cmp r11,QWORD PTR 112[rsp]
+ mov DWORD PTR 4[r9],ecx
+ jb $L000start
+
+mov DWORD PTR [rsp],r12d
+mov DWORD PTR 4[rsp],r13d
+mov DWORD PTR 8[rsp],r14d
+mov DWORD PTR 12[rsp],r15d
+mov DWORD PTR 16[rsp],r10d
+mov DWORD PTR 20[rsp],r8d
+ add rsp, 120
+
+pop r15
+pop r14
+pop r13
+pop r12
+
+
+
+
+pop rbp
+pop rbx
+pop rsi
+pop rdi
+
+ ret
+sha1_block_asm_data_order ENDP
+;_TEXT$ ENDS
+;_TEXT$ SEGMENT PAGE 'CODE'
+;PUBLIC _sha1_block_asm_host_order
+
+sha1_block_asm_host_order PROC
+
+push rdi
+push rsi
+push rbx
+push rbp
+
+
+push r12
+push r13
+push r14
+push r15
+
+; c = rcx
+; p = rdx
+; num = r8
+
+
+mov r9,rcx
+
+ shl r8, 6
+ mov r11, rdx
+ add r8, r11
+
+ mov rbp,rcx
+ mov edx, DWORD PTR 12[rbp]
+ sub rsp, 120
+ mov edi, DWORD PTR 16[rbp]
+ mov ebx, DWORD PTR 8[rbp]
+
+ mov QWORD PTR 112[rsp],r8
+ ; First we need to setup the X array
+ mov eax, DWORD PTR [r11]
+ mov ecx, DWORD PTR 4[r11]
+ mov r12d,eax
+ mov r13d,ecx
+ mov eax, DWORD PTR 8[r11]
+ mov ecx, DWORD PTR 12[r11]
+ mov r14d,eax
+ mov r15d,ecx
+ mov eax, DWORD PTR 16[r11]
+ mov ecx, DWORD PTR 20[r11]
+
+ mov r10d,eax
+ mov r8d,ecx
+
+ mov eax, DWORD PTR 24[r11]
+ mov ecx, DWORD PTR 28[r11]
+ mov DWORD PTR 24[rsp],eax
+ mov DWORD PTR 28[rsp],ecx
+ mov eax, DWORD PTR 32[r11]
+ mov ecx, DWORD PTR 36[r11]
+ mov DWORD PTR 32[rsp],eax
+ mov DWORD PTR 36[rsp],ecx
+ mov eax, DWORD PTR 40[r11]
+ mov ecx, DWORD PTR 44[r11]
+ mov DWORD PTR 40[rsp],eax
+ mov DWORD PTR 44[rsp],ecx
+ mov eax, DWORD PTR 48[r11]
+ mov ecx, DWORD PTR 52[r11]
+ mov DWORD PTR 48[rsp],eax
+ mov DWORD PTR 52[rsp],ecx
+ mov eax, DWORD PTR 56[r11]
+ mov ecx, DWORD PTR 60[r11]
+ mov DWORD PTR 56[rsp],eax
+ mov DWORD PTR 60[rsp],ecx
+
+ jmp $L001shortcut
+
+
+
+sha1_block_asm_host_order ENDP
+;_TEXT$ ENDS
+END
diff --git a/hasher/SHA_asm.asm b/hasher/SHA1x86.asm
similarity index 100%
rename from hasher/SHA_asm.asm
rename to hasher/SHA1x86.asm
diff --git a/hasher/hash_crc.cpp b/hasher/hash_crc.cpp
index d857d6e4a..81b909298 100644
--- a/hasher/hash_crc.cpp
+++ b/hasher/hash_crc.cpp
@@ -20,7 +20,7 @@
#include
#include
-#ifdef _WIN64
+#ifdef _WIN64 || __x86_64__
extern "C" void __fastcall crcCalc(DWORD *pdwCrc32, DWORD *ptrCrc32Table, BYTE *bufferAsm, DWORD dwBytesReadAsm);
#else
extern "C" void crcCalc(DWORD *pdwCrc32, DWORD *ptrCrc32Table, BYTE *bufferAsm, DWORD dwBytesReadAsm);
diff --git a/hasher/hasher.cpp b/hasher/hasher.cpp
index 4fe4851a7..44ea95573 100644
--- a/hasher/hasher.cpp
+++ b/hasher/hasher.cpp
@@ -299,6 +299,7 @@ extern "C" __declspec(dllexport) int __cdecl CalculateHashes_AsyncIO(LPCWSTR psz
iReaderPos++;
iPos += dwBytesRead;
+
//report progress
int nProgress = (int)(iPos / (float)statFile.st_size * 100);
if (pHashProgress && nLastProgress != nProgress)