// Copyright 2022 The Abseil Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef ABSL_BASE_INTERNAL_PREFETCH_H_ #define ABSL_BASE_INTERNAL_PREFETCH_H_ #include "absl/base/config.h" #ifdef __SSE__ #include #endif #if defined(_MSC_VER) && defined(ABSL_INTERNAL_HAVE_SSE) #include #pragma intrinsic(_mm_prefetch) #endif // Compatibility wrappers around __builtin_prefetch, to prefetch data // for read if supported by the toolchain. // Move data into the cache before it is read, or "prefetch" it. // // The value of `addr` is the address of the memory to prefetch. If // the target and compiler support it, data prefetch instructions are // generated. If the prefetch is done some time before the memory is // read, it may be in the cache by the time the read occurs. // // The function names specify the temporal locality heuristic applied, // using the names of Intel prefetch instructions: // // T0 - high degree of temporal locality; data should be left in as // many levels of the cache possible // T1 - moderate degree of temporal locality // T2 - low degree of temporal locality // Nta - no temporal locality, data need not be left in the cache // after the read // // Incorrect or gratuitous use of these functions can degrade // performance, so use them only when representative benchmarks show // an improvement. // // Example usage: // // absl::base_internal::PrefetchT0(addr); // // Currently, the different prefetch calls behave on some Intel // architectures as follows: // // SNB..SKL SKX // PrefetchT0() L1/L2/L3 L1/L2 // PrefetchT1() L2/L3 L2 // PrefetchT2() L2/L3 L2 // PrefetchNta() L1/--/L3 L1* // // * On SKX PrefetchNta() will bring the line into L1 but will evict // from L3 cache. This might result in surprising behavior. // // SNB = Sandy Bridge, SKL = Skylake, SKX = Skylake Xeon. // namespace absl { ABSL_NAMESPACE_BEGIN namespace base_internal { void PrefetchT0(const void* addr); void PrefetchT1(const void* addr); void PrefetchT2(const void* addr); void PrefetchNta(const void* addr); // Implementation details follow. #if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__) #define ABSL_INTERNAL_HAVE_PREFETCH 1 // See __builtin_prefetch: // https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html. // // These functions speculatively load for read only. This is // safe for all currently supported platforms. However, prefetch for // store may have problems depending on the target platform. // inline void PrefetchT0(const void* addr) { // Note: this uses prefetcht0 on Intel. __builtin_prefetch(addr, 0, 3); } inline void PrefetchT1(const void* addr) { // Note: this uses prefetcht1 on Intel. __builtin_prefetch(addr, 0, 2); } inline void PrefetchT2(const void* addr) { // Note: this uses prefetcht2 on Intel. __builtin_prefetch(addr, 0, 1); } inline void PrefetchNta(const void* addr) { // Note: this uses prefetchtnta on Intel. __builtin_prefetch(addr, 0, 0); } #elif defined(ABSL_INTERNAL_HAVE_SSE) #define ABSL_INTERNAL_HAVE_PREFETCH 1 inline void PrefetchT0(const void* addr) { _mm_prefetch(reinterpret_cast(addr), _MM_HINT_T0); } inline void PrefetchT1(const void* addr) { _mm_prefetch(reinterpret_cast(addr), _MM_HINT_T1); } inline void PrefetchT2(const void* addr) { _mm_prefetch(reinterpret_cast(addr), _MM_HINT_T2); } inline void PrefetchNta(const void* addr) { _mm_prefetch(reinterpret_cast(addr), _MM_HINT_NTA); } #else inline void PrefetchT0(const void*) {} inline void PrefetchT1(const void*) {} inline void PrefetchT2(const void*) {} inline void PrefetchNta(const void*) {} #endif } // namespace base_internal ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_BASE_INTERNAL_PREFETCH_H_