Warning, /DataFormats/Math/test/cudaAtan2Test.cu is written in an unsupported language. File is not indexed.
0001 /**
0002 * Derived from the nVIDIA CUDA 8.0 samples by
0003 *
0004 * Eyal Rozenberg <E.Rozenberg@cwi.nl>
0005 *
0006 * The derivation is specifically permitted in the nVIDIA CUDA Samples EULA
0007 * and the deriver is the owner of this code according to the EULA.
0008 *
0009 * Use this reasonably. If you want to discuss licensing formalities, please
0010 * contact the author.
0011 *
0012 * Modified by VinInn for testing math funcs
0013 */
0014
0015 /* to run test
0016 foreach f ( $CMSSW_BASE/test/$SCRAM_ARCH/DFM_Vector* )
0017 echo $f; $f
0018 end
0019 */
0020
0021 #include <algorithm>
0022 #include <cassert>
0023 #include <chrono>
0024 #include <iomanip>
0025 #include <iostream>
0026 #include <memory>
0027 #include <random>
0028 #include <stdexcept>
0029
0030 #include "DataFormats/Math/interface/approx_atan2.h"
0031 #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
0032 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0033 #include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h"
0034 #include "HeterogeneousCore/CUDAUtilities/interface/launch.h"
0035
0036 constexpr float xmin = -100.001; // avoid 0
0037 constexpr float incr = 0.04;
0038 constexpr int Nsteps = 2. * std::abs(xmin) / incr;
0039
0040 template <int DEGREE>
0041 __global__ void diffAtan(int *diffs) {
0042 auto mdiff = &diffs[0];
0043 auto idiff = &diffs[1];
0044 auto sdiff = &diffs[2];
0045
0046 int i = blockDim.x * blockIdx.x + threadIdx.x;
0047 int j = blockDim.y * blockIdx.y + threadIdx.y;
0048
0049 auto x = xmin + incr * i;
0050 auto y = xmin + incr * j;
0051
0052 auto approx = unsafe_atan2f<DEGREE>(y, x);
0053 auto iapprox = unsafe_atan2i<DEGREE>(y, x);
0054 auto sapprox = unsafe_atan2s<DEGREE>(y, x);
0055 auto std = std::atan2(y, x);
0056 auto fd = std::abs(std - approx);
0057 atomicMax(mdiff, int(fd * 1.e7));
0058 atomicMax(idiff, std::abs(phi2int(std) - iapprox));
0059 short dd = std::abs(phi2short(std) - sapprox);
0060 atomicMax(sdiff, int(dd));
0061 }
0062
0063 template <int DEGREE>
0064 void go() {
0065 auto start = std::chrono::high_resolution_clock::now();
0066 auto delta = start - start;
0067
0068 // atan2
0069 delta -= (std::chrono::high_resolution_clock::now() - start);
0070
0071 auto diff_d = cms::cuda::make_device_unique<int[]>(3, nullptr);
0072
0073 int diffs[3];
0074 cudaCheck(cudaMemset(diff_d.get(), 0, 3 * 4));
0075
0076 // Launch the diff CUDA Kernel
0077 dim3 threadsPerBlock(32, 32, 1);
0078 dim3 blocksPerGrid(
0079 (Nsteps + threadsPerBlock.x - 1) / threadsPerBlock.x, (Nsteps + threadsPerBlock.y - 1) / threadsPerBlock.y, 1);
0080 std::cout << "CUDA kernel 'diff' launch with " << blocksPerGrid.x << " blocks of " << threadsPerBlock.y
0081 << " threads\n";
0082
0083 cms::cuda::launch(diffAtan<DEGREE>, {blocksPerGrid, threadsPerBlock}, diff_d.get());
0084
0085 cudaCheck(cudaMemcpy(diffs, diff_d.get(), 3 * 4, cudaMemcpyDeviceToHost));
0086 delta += (std::chrono::high_resolution_clock::now() - start);
0087
0088 float mdiff = diffs[0] * 1.e-7;
0089 int idiff = diffs[1];
0090 int sdiff = diffs[2];
0091
0092 std::cout << "for degree " << DEGREE << " max diff is " << mdiff << ' ' << idiff << ' ' << int2phi(idiff) << ' '
0093 << sdiff << ' ' << short2phi(sdiff) << std::endl;
0094 std::cout << "cuda computation took " << std::chrono::duration_cast<std::chrono::milliseconds>(delta).count() << " ms"
0095 << std::endl;
0096 }
0097
0098 int main() {
0099 cms::cudatest::requireDevices();
0100
0101 try {
0102 go<3>();
0103 go<5>();
0104 go<7>();
0105 go<9>();
0106 } catch (std::runtime_error &ex) {
0107 std::cerr << "CUDA or std runtime error: " << ex.what() << std::endl;
0108 exit(EXIT_FAILURE);
0109 } catch (...) {
0110 std::cerr << "A non-CUDA error occurred" << std::endl;
0111 exit(EXIT_FAILURE);
0112 }
0113
0114 return EXIT_SUCCESS;
0115 }