Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-09 02:22:21

0001 #ifndef HeterogeneousCore_AlpakaInterface_interface_atomicMaxF_h
0002 #define HeterogeneousCore_AlpakaInterface_interface_atomicMaxF_h
0003 
0004 #include <alpaka/alpaka.hpp>
0005 
0006 #include "FWCore/Utilities/interface/bit_cast.h"
0007 
0008 // FIXME: this should be rewritten using the correct template specialisation for the different accelerator types
0009 
0010 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0011 ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static float atomicMaxF(const TAcc& acc, float* address, float val) {
0012 #if defined(__CUDA_ARCH__) or defined(__HIP_DEVICE_COMPILE__)
0013   // GPU implementation uses __float_as_int / __int_as_float
0014   int ret = __float_as_int(*address);
0015   while (val > __int_as_float(ret)) {
0016     int old = ret;
0017     if ((ret = atomicCAS((int*)address, old, __float_as_int(val))) == old)
0018       break;
0019   }
0020   return __int_as_float(ret);
0021 #else
0022   // CPU implementation uses edm::bit_cast
0023   int ret = edm::bit_cast<int>(*address);
0024   while (val > edm::bit_cast<float>(ret)) {
0025     int old = ret;
0026     if ((ret = alpaka::atomicCas(acc, (int*)address, old, edm::bit_cast<int>(val))) == old)
0027       break;
0028   }
0029   return edm::bit_cast<float>(ret);
0030 #endif  // __CUDA_ARCH__ or __HIP_DEVICE_COMPILE__
0031 }
0032 
0033 #endif  // HeterogeneousCore_AlpakaInterface_interface_atomicMaxF_h