Skip to content

Commit

Permalink
Enable building with MSVC and clang-cl on Windows
Browse files Browse the repository at this point in the history
  • Loading branch information
mmozeiko authored and fwojcik committed Nov 28, 2023
1 parent 40f8909 commit ada3017
Show file tree
Hide file tree
Showing 41 changed files with 264,367 additions and 264,299 deletions.
35 changes: 25 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,21 +43,32 @@ if(NOT CMAKE_BUILD_TYPE)
CACHE STRING "Choose the type of build, options are: Release Debug Asan" FORCE)
endif(NOT CMAKE_BUILD_TYPE)

set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG -g")
set(CMAKE_CXX_FLAGS_DEBUG "-O1 -DDEBUG -g")
set(CMAKE_CXX_FLAGS_ASAN "${CMAKE_CXX_FLAGS_DEBUG}")

if(MSVC)
# using Visual Studio C++, already the default with VS17
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:SSE2")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL GNU
# /bigobj required because of wordlist in array.h
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Zc:__cplusplus -D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE -DNOMINMAX -bigobj")
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
# -MP allows to MSVC to build project with multiple processes - running multiple cl.exe in parallel
# clang does support this, the buildsystem (make/ninja) controls explicitly how compiler is launched
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -MP")
endif()
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
# using Visual Studio C++, already the default with VS17
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:SSE2")
endif()
endif()

if(CMAKE_CXX_COMPILER_ID STREQUAL GNU
OR (CMAKE_CXX_COMPILER_ID STREQUAL AppleClang)
OR (CMAKE_CXX_COMPILER_ID STREQUAL Clang)
OR (CMAKE_CXX_COMPILER_ID STREQUAL Intel))

set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -ggdb3")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -ggdb3")
set(CMAKE_CXX_FLAGS_ASAN "${CMAKE_CXX_FLAGS_ASAN} -Og -ggdb3 \
if(NOT MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -ggdb3")
endif()

set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG")
set(CMAKE_CXX_FLAGS_DEBUG "-O1 -DNDEBUG")
set(CMAKE_CXX_FLAGS_ASAN "${CMAKE_CXX_FLAGS_DEBUG} -Og -ggdb3 \
-fsanitize=address,undefined -fno-optimize-sibling-calls \
-fsanitize-address-use-after-scope -fno-omit-frame-pointer")

Expand Down Expand Up @@ -181,6 +192,10 @@ else()
set_source_files_properties(lib/Hashinfo.cpp PROPERTIES INCLUDE_DIRECTORIES "${CMAKE_SOURCE_DIR}/util")
endif()

if(WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL Clang)
target_link_libraries(SMHasher3Hashlib PUBLIC clang_rt.builtins-x86_64.lib)
endif()

########################################
# Build all the tests
########################################
Expand Down
9 changes: 5 additions & 4 deletions hashes/aesnihash-majek.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@ static void aesnihash( const void * inv, const size_t len, const seed_t seed, vo
uint64_t src_sz = len;

uint8_t tmp_buf[16] = { 0 };
__m128i rk0 = { UINT64_C(0x736f6d6570736575), UINT64_C(0x646f72616e646f6d) };
__m128i rk1 = { UINT64_C(0x1231236570743245), UINT64_C(0x126f12321321456d) };
__m128i rk0 = _mm_set_epi64x(UINT64_C(0x646f72616e646f6d), UINT64_C(0x736f6d6570736575));
__m128i rk1 = _mm_set_epi64x(UINT64_C(0x126f12321321456d), UINT64_C(0x1231236570743245));
// Homegrown seeding for SMHasher3
__m128i seed128 = { (int64_t)seed, 0 };
__m128i seed128 = _mm_set_epi64x(0, (int64_t)seed);
__m128i hash = _mm_xor_si128(rk0, seed128);

while (src_sz >= 16) {
Expand Down Expand Up @@ -75,7 +75,8 @@ static void aesnihash( const void * inv, const size_t len, const seed_t seed, vo
// Of course the xor below will cancel out _any_ value...
hash = _mm_aesenc_si128(hash, _mm_set_epi64x(src_sz, src_sz));

uint64_t result = hash[0] ^ hash[1];
// _mm_extract_epi64 assumes SSE4.1 is available
uint64_t result = _mm_cvtsi128_si64(hash) ^ _mm_extract_epi64(hash, 1);
memcpy(out, &result, 8);
}

Expand Down
2 changes: 1 addition & 1 deletion hashes/blake3/compress-sse2.h
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ static FORCE_INLINE void transpose_msg_vecs( const uint8_t * const * inputs, siz
out[14] = loadu(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
out[15] = loadu(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
for (size_t i = 0; i < 4; ++i) {
_mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
_mm_prefetch((const char *)&inputs[i][block_offset + 256], _MM_HINT_T0);
}
transpose_vecs(&out[ 0]);
transpose_vecs(&out[ 4]);
Expand Down
2 changes: 1 addition & 1 deletion hashes/blake3/compress-sse41.h
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ static FORCE_INLINE void transpose_msg_vecs( const uint8_t * const * inputs, siz
out[14] = loadu(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
out[15] = loadu(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
for (size_t i = 0; i < 4; ++i) {
_mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
_mm_prefetch((const char *)&inputs[i][block_offset + 256], _MM_HINT_T0);
}
transpose_vecs(&out[ 0]);
transpose_vecs(&out[ 4]);
Expand Down
8 changes: 6 additions & 2 deletions hashes/halftimehash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,8 @@ namespace halftime_hash {
return Minus(zero, a);
}

inline uint64_t Sum( u128 a ) { return (uint64_t)a[0] + (uint64_t)a[1]; }
// _mm_extract_epi64 assumes SSE4.1 is also available
inline uint64_t Sum( u128 a ) { return _mm_cvtsi128_si64(a) + _mm_extract_epi64(a, 1); }

template <bool bswap>
struct BlockWrapper128 {
Expand Down Expand Up @@ -218,9 +219,12 @@ namespace halftime_hash {
auto d = _mm256_extracti128_si256(a, 1);

c = _mm_add_epi64(c, d);
#ifndef _MSC_VER
static_assert(sizeof(c[0]) == sizeof(uint64_t) , "u256 too granular");
static_assert(sizeof(c) == 2 * sizeof(uint64_t), "u256 too granular");
return (uint64_t)c[0] + (uint64_t)c[1];
#endif
// _mm_extract_epi64 assumes SSE4.1 is also available (should be always present when AVX2 is enabled)
return _mm_cvtsi128_si64(c) ^ _mm_extract_epi64(c, 1);
}

template <bool bswap>
Expand Down
6 changes: 3 additions & 3 deletions hashes/khashv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ static KHASH_FINLINE int khashv_is_little_endian() {

struct khashv_block_s {
union {
uint8_t bytes[16];
uint32_t words[4];
uint8_t bytes[16];
#if defined(HAVE_SSSE_3)
__m128i vec;
#endif
Expand All @@ -53,15 +53,15 @@ typedef struct khashv_block_s khashvBlock;
typedef struct khashv_block_s khashvSeed;

static const khashvBlock khash_v_init = {
.words = {
{ {
// Really this could basically be almost anything
// So just using some bytes of the SHA-256 hashes
// of 1, 2, 3, and 4
0x7785459a, // SHA256 of the byte 0x01, using the last 4 bytes
0x6457d986, // SHA256 of the byte 0x02, using the last 4 bytes
0xadff29c5, // SHA256 of the byte 0x03, using the last 4 bytes
0x81c89e71, // SHA256 of the byte 0x04, using the last 4 bytes
}
} }
};

//------------------------------------------------------------
Expand Down
8 changes: 4 additions & 4 deletions hashes/nmhash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@
#endif

const char * nmh_impl_str[] = {
[NMH_SCALAR] = "scalar",
[NMH_SSE2] = "sse2",
[NMH_AVX2] = "avx2",
[NMH_AVX512] = "avx512",
"scalar", // NMH_SCALAR
"sse2", // NMH_SSE2
"avx2", // NMH_AVX2
"avx512", // NMH_AVX512
};

#if NMH_VECTOR > NMH_SCALAR
Expand Down
29 changes: 16 additions & 13 deletions hashes/t1ha.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,10 @@
#endif

#if !defined(__has_builtin)
#define __has_builtin(x) (0)
#define __has_builtin(x) (0)
#endif
#if !defined(__has_attribute)
#define __has_attribute(x) (0)
#endif

#if __GNUC_PREREQ(4, 8) || __has_builtin(__builtin_assume_aligned)
Expand Down Expand Up @@ -1606,18 +1609,18 @@ static bool t1ha0_aes_selftest( void ) {
#endif

const char * t1ha_impl_str[] = {
[0] = "1N+a0",
[1] = "1N+a1",
[2] = "1N+a2",
[3] = "1Y+a0",
[4] = "1Y+a1",
[5] = "1Y+a2",
[6] = "1N+a0+aes",
[7] = "1N+a1+aes",
[8] = "1N+a2+aes",
[9] = "1Y+a0+aes",
[10] = "1Y+a1+aes",
[11] = "1Y+a2+aes",
"1N+a0",
"1N+a1",
"1N+a2",
"1Y+a0",
"1Y+a1",
"1Y+a2",
"1N+a0+aes",
"1N+a1+aes",
"1N+a2+aes",
"1Y+a0+aes",
"1Y+a1+aes",
"1Y+a2+aes",
};

REGISTER_FAMILY(t1ha,
Expand Down
Loading

0 comments on commit ada3017

Please sign in to comment.