summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Kryczka <andrewkr@fb.com>2022-03-09 21:07:31 -0800
committerAndrew Kryczka <andrewkr@fb.com>2022-03-09 22:51:01 -0800
commitbbae679ce7bd53d267ac8310f60fa86491716666 (patch)
tree84260cdfbbf3871be09d993ed5d4c8a34d13c99c
parentfe9e363ba1774f1627a933b5115b3c05840712c1 (diff)
Avoid popcnt on Windows when unavailable and in portable builds (#9680)v7.0.1
Summary: Fixes https://github.com/facebook/rocksdb/issues/9560. Only use popcnt intrinsic when HAVE_SSE42 is set. Also avoid setting it based on compiler test in portable builds because such test will pass on MSVC even without proper arch flags (ref: https://devblogs.microsoft.com/oldnewthing/20201026-00/?p=104397). Pull Request resolved: https://github.com/facebook/rocksdb/pull/9680 Test Plan: verified the combinations of -DPORTABLE and -DFORCE_SSE42 produce expected compiler flags on Linux. Verified MSVC build using PORTABLE=1 (in CircleCI) does not set HAVE_SSE42. Reviewed By: pdillinger Differential Revision: D34739033 Pulled By: ajkr fbshipit-source-id: d10456f3392945fc3e59430a1777840f7b60b276
-rw-r--r--CMakeLists.txt14
-rw-r--r--util/math.h22
2 files changed, 23 insertions, 13 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4ac7b5628..774c7a188 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -321,7 +321,8 @@ if(NOT MSVC)
set(CMAKE_REQUIRED_FLAGS "-msse4.2 -mpclmul")
endif()
-CHECK_CXX_SOURCE_COMPILES("
+if (NOT PORTABLE OR FORCE_SSE42)
+ CHECK_CXX_SOURCE_COMPILES("
#include <cstdint>
#include <nmmintrin.h>
#include <wmmintrin.h>
@@ -333,11 +334,12 @@ int main() {
auto d = _mm_cvtsi128_si64(c);
}
" HAVE_SSE42)
-if(HAVE_SSE42)
- add_definitions(-DHAVE_SSE42)
- add_definitions(-DHAVE_PCLMUL)
-elseif(FORCE_SSE42)
- message(FATAL_ERROR "FORCE_SSE42=ON but unable to compile with SSE4.2 enabled")
+ if(HAVE_SSE42)
+ add_definitions(-DHAVE_SSE42)
+ add_definitions(-DHAVE_PCLMUL)
+ elseif(FORCE_SSE42)
+ message(FATAL_ERROR "FORCE_SSE42=ON but unable to compile with SSE4.2 enabled")
+ endif()
endif()
# Check if -latomic is required or not
diff --git a/util/math.h b/util/math.h
index c6dc2a29f..2254b4aab 100644
--- a/util/math.h
+++ b/util/math.h
@@ -92,18 +92,25 @@ inline int CountTrailingZeroBits(T v) {
#endif
}
-#if defined(_MSC_VER) && !defined(_M_X64)
+// Not all MSVC compile settings will use `BitsSetToOneFallback()`. We include
+// the following code at coarse granularity for simpler macros. It's important
+// to exclude at least so our non-MSVC unit test coverage tool doesn't see it.
+#ifdef _MSC_VER
+
namespace detail {
+
template <typename T>
int BitsSetToOneFallback(T v) {
const int kBits = static_cast<int>(sizeof(T)) * 8;
static_assert((kBits & (kBits - 1)) == 0, "must be power of two bits");
// we static_cast these bit patterns in order to truncate them to the correct
- // size
+ // size. Warning C4309 dislikes this technique, so disable it here.
+#pragma warning(disable : 4309)
v = static_cast<T>(v - ((v >> 1) & static_cast<T>(0x5555555555555555ull)));
v = static_cast<T>((v & static_cast<T>(0x3333333333333333ull)) +
((v >> 2) & static_cast<T>(0x3333333333333333ull)));
v = static_cast<T>((v + (v >> 4)) & static_cast<T>(0x0F0F0F0F0F0F0F0Full));
+#pragma warning(default : 4309)
for (int shift_bits = 8; shift_bits < kBits; shift_bits <<= 1) {
v += static_cast<T>(v >> shift_bits);
}
@@ -113,7 +120,8 @@ int BitsSetToOneFallback(T v) {
}
} // namespace detail
-#endif
+
+#endif // _MSC_VER
// Number of bits set to 1. Also known as "population count".
template <typename T>
@@ -126,21 +134,21 @@ inline int BitsSetToOne(T v) {
constexpr auto mm = 8 * sizeof(uint32_t) - 1;
// The bit mask is to neutralize sign extension on small signed types
constexpr uint32_t m = (uint32_t{1} << ((8 * sizeof(T)) & mm)) - 1;
-#if defined(_M_X64) || defined(_M_IX86)
+#if defined(HAVE_SSE42) && (defined(_M_X64) || defined(_M_IX86))
return static_cast<int>(__popcnt(static_cast<uint32_t>(v) & m));
#else
return static_cast<int>(detail::BitsSetToOneFallback(v) & m);
#endif
} else if (sizeof(T) == sizeof(uint32_t)) {
-#if defined(_M_X64) || defined(_M_IX86)
+#if defined(HAVE_SSE42) && (defined(_M_X64) || defined(_M_IX86))
return static_cast<int>(__popcnt(static_cast<uint32_t>(v)));
#else
return detail::BitsSetToOneFallback(static_cast<uint32_t>(v));
#endif
} else {
-#ifdef _M_X64
+#if defined(HAVE_SSE42) && defined(_M_X64)
return static_cast<int>(__popcnt64(static_cast<uint64_t>(v)));
-#elif defined(_M_IX86)
+#elif defined(HAVE_SSE42) && defined(_M_IX86)
return static_cast<int>(
__popcnt(static_cast<uint32_t>(static_cast<uint64_t>(v) >> 32) +
__popcnt(static_cast<uint32_t>(v))));