Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2021-07-01 02:23:31

0001 #ifndef RecoPixelVertexing_PixelVertexFinding_plugins_gpuFitVertices_h
0002 #define RecoPixelVertexing_PixelVertexFinding_plugins_gpuFitVertices_h
0003 
0004 #include <algorithm>
0005 #include <cmath>
0006 #include <cstdint>
0007 
0008 #include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h"
0009 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
0010 
0011 #include "gpuVertexFinder.h"
0012 
0013 namespace gpuVertexFinder {
0014 
0015   __device__ __forceinline__ void fitVertices(ZVertices* pdata,
0016                                               WorkSpace* pws,
0017                                               float chi2Max  // for outlier rejection
0018   ) {
0019     constexpr bool verbose = false;  // in principle the compiler should optmize out if false
0020 
0021     auto& __restrict__ data = *pdata;
0022     auto& __restrict__ ws = *pws;
0023     auto nt = ws.ntrks;
0024     float const* __restrict__ zt = ws.zt;
0025     float const* __restrict__ ezt2 = ws.ezt2;
0026     float* __restrict__ zv = data.zv;
0027     float* __restrict__ wv = data.wv;
0028     float* __restrict__ chi2 = data.chi2;
0029     uint32_t& nvFinal = data.nvFinal;
0030     uint32_t& nvIntermediate = ws.nvIntermediate;
0031 
0032     int32_t* __restrict__ nn = data.ndof;
0033     int32_t* __restrict__ iv = ws.iv;
0034 
0035     assert(pdata);
0036     assert(zt);
0037 
0038     assert(nvFinal <= nvIntermediate);
0039     nvFinal = nvIntermediate;
0040     auto foundClusters = nvFinal;
0041 
0042     // zero
0043     for (auto i = threadIdx.x; i < foundClusters; i += blockDim.x) {
0044       zv[i] = 0;
0045       wv[i] = 0;
0046       chi2[i] = 0;
0047     }
0048 
0049     // only for test
0050     __shared__ int noise;
0051     if (verbose && 0 == threadIdx.x)
0052       noise = 0;
0053 
0054     __syncthreads();
0055 
0056     // compute cluster location
0057     for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
0058       if (iv[i] > 9990) {
0059         if (verbose)
0060           atomicAdd(&noise, 1);
0061         continue;
0062       }
0063       assert(iv[i] >= 0);
0064       assert(iv[i] < int(foundClusters));
0065       auto w = 1.f / ezt2[i];
0066       atomicAdd_block(&zv[iv[i]], zt[i] * w);
0067       atomicAdd_block(&wv[iv[i]], w);
0068     }
0069 
0070     __syncthreads();
0071     // reuse nn
0072     for (auto i = threadIdx.x; i < foundClusters; i += blockDim.x) {
0073       assert(wv[i] > 0.f);
0074       zv[i] /= wv[i];
0075       nn[i] = -1;  // ndof
0076     }
0077     __syncthreads();
0078 
0079     // compute chi2
0080     for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
0081       if (iv[i] > 9990)
0082         continue;
0083 
0084       auto c2 = zv[iv[i]] - zt[i];
0085       c2 *= c2 / ezt2[i];
0086       if (c2 > chi2Max) {
0087         iv[i] = 9999;
0088         continue;
0089       }
0090       atomicAdd_block(&chi2[iv[i]], c2);
0091       atomicAdd_block(&nn[iv[i]], 1);
0092     }
0093     __syncthreads();
0094     for (auto i = threadIdx.x; i < foundClusters; i += blockDim.x)
0095       if (nn[i] > 0)
0096         wv[i] *= float(nn[i]) / chi2[i];
0097 
0098     if (verbose && 0 == threadIdx.x)
0099       printf("found %d proto clusters ", foundClusters);
0100     if (verbose && 0 == threadIdx.x)
0101       printf("and %d noise\n", noise);
0102   }
0103 
0104   __global__ void fitVerticesKernel(ZVertices* pdata,
0105                                     WorkSpace* pws,
0106                                     float chi2Max  // for outlier rejection
0107   ) {
0108     fitVertices(pdata, pws, chi2Max);
0109   }
0110 
0111 }  // namespace gpuVertexFinder
0112 
0113 #endif  // RecoPixelVertexing_PixelVertexFinding_plugins_gpuFitVertices_h