Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-09-07 04:38:06

0001 #include <cmath>
0002 #include <cstdint>
0003 #include <iostream>
0004 #include <random>
0005 #include <vector>
0006 
0007 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0008 #include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h"
0009 #include "HeterogeneousCore/CUDAUtilities/interface/launch.h"
0010 #include "HeterogeneousCore/CUDAUtilities/interface/allocate_device.h"
0011 #include "HeterogeneousCore/CUDAUtilities/interface/currentDevice.h"
0012 // PixelTrackUtilities only included in order to compile SoALayout with Eigen columns
0013 #include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
0014 #include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h"
0015 #include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h"
0016 #include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h"
0017 
0018 #include "RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceUtilities.h"
0019 #include "RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoAHost.h"
0020 #include "RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoADevice.h"
0021 #ifdef USE_DBSCAN
0022 #include "RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksDBSCAN.h"
0023 #define CLUSTERIZE gpuVertexFinder::clusterTracksDBSCAN
0024 #elif USE_ITERATIVE
0025 #include "RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksIterative.h"
0026 #define CLUSTERIZE gpuVertexFinder::clusterTracksIterative
0027 #else
0028 #include "RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksByDensity.h"
0029 #define CLUSTERIZE gpuVertexFinder::clusterTracksByDensityKernel
0030 #endif
0031 #include "RecoVertex/PixelVertexFinding/plugins/gpuFitVertices.h"
0032 #include "RecoVertex/PixelVertexFinding/plugins/gpuSortByPt2.h"
0033 #include "RecoVertex/PixelVertexFinding/plugins/gpuSplitVertices.h"
0034 
0035 #ifdef ONE_KERNEL
0036 #ifdef __CUDACC__
0037 __global__ void vertexFinderOneKernel(gpuVertexFinder::VtxSoAView pdata,
0038                                       gpuVertexFinder::WsSoAView pws,
0039                                       int minT,      // min number of neighbours to be "seed"
0040                                       float eps,     // max absolute distance to cluster
0041                                       float errmax,  // max error to be "seed"
0042                                       float chi2max  // max normalized distance to cluster,
0043 ) {
0044   gpuVertexFinder::clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max);
0045   __syncthreads();
0046   gpuVertexFinder::fitVertices(pdata, pws, 50.);
0047   __syncthreads();
0048   gpuVertexFinder::splitVertices(pdata, pws, 9.f);
0049   __syncthreads();
0050   gpuVertexFinder::fitVertices(pdata, pws, 5000.);
0051   __syncthreads();
0052   gpuVertexFinder::sortByPt2(pdata, pws);
0053 }
0054 #endif
0055 #endif
0056 
0057 struct Event {
0058   std::vector<float> zvert;
0059   std::vector<uint16_t> itrack;
0060   std::vector<float> ztrack;
0061   std::vector<float> eztrack;
0062   std::vector<float> pttrack;
0063   std::vector<uint16_t> ivert;
0064 };
0065 
0066 struct ClusterGenerator {
0067   explicit ClusterGenerator(float nvert, float ntrack)
0068       : rgen(-13., 13), errgen(0.005, 0.025), clusGen(nvert), trackGen(ntrack), gauss(0., 1.), ptGen(1.) {}
0069 
0070   void operator()(Event& ev) {
0071     int nclus = clusGen(reng);
0072     ev.zvert.resize(nclus);
0073     ev.itrack.resize(nclus);
0074     for (auto& z : ev.zvert) {
0075       z = 3.5f * gauss(reng);
0076     }
0077 
0078     ev.ztrack.clear();
0079     ev.eztrack.clear();
0080     ev.ivert.clear();
0081     ev.pttrack.clear();
0082     for (int iv = 0; iv < nclus; ++iv) {
0083       auto nt = trackGen(reng);
0084       ev.itrack[iv] = nt;
0085       for (int it = 0; it < nt; ++it) {
0086         auto err = errgen(reng);  // reality is not flat....
0087         ev.ztrack.push_back(ev.zvert[iv] + err * gauss(reng));
0088         ev.eztrack.push_back(err * err);
0089         ev.ivert.push_back(iv);
0090         ev.pttrack.push_back((iv == 5 ? 1.f : 0.5f) + ptGen(reng));
0091         ev.pttrack.back() *= ev.pttrack.back();
0092       }
0093     }
0094     // add noise
0095     auto nt = 2 * trackGen(reng);
0096     for (int it = 0; it < nt; ++it) {
0097       auto err = 0.03f;
0098       ev.ztrack.push_back(rgen(reng));
0099       ev.eztrack.push_back(err * err);
0100       ev.ivert.push_back(9999);
0101       ev.pttrack.push_back(0.5f + ptGen(reng));
0102       ev.pttrack.back() *= ev.pttrack.back();
0103     }
0104   }
0105 
0106   std::mt19937 reng;
0107   std::uniform_real_distribution<float> rgen;
0108   std::uniform_real_distribution<float> errgen;
0109   std::poisson_distribution<int> clusGen;
0110   std::poisson_distribution<int> trackGen;
0111   std::normal_distribution<float> gauss;
0112   std::exponential_distribution<float> ptGen;
0113 };
0114 
0115 __global__ void print(gpuVertexFinder::VtxSoAView pdata, gpuVertexFinder::WsSoAView pws) {
0116   auto& __restrict__ ws = pws;
0117   printf("nt,nv %d %d,%d\n", ws.ntrks(), pdata.nvFinal(), ws.nvIntermediate());
0118 }
0119 
0120 int main() {
0121 #ifdef __CUDACC__
0122   cudaStream_t stream;
0123   cms::cudatest::requireDevices();
0124   cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
0125 
0126   ZVertexSoADevice onGPU_d(stream);
0127   gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoADevice ws_d(stream);
0128 #else
0129 
0130   ZVertexSoAHost onGPU_d;
0131   gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoAHost ws_d;
0132 #endif
0133 
0134   Event ev;
0135 
0136   float eps = 0.1f;
0137   std::array<float, 3> par{{eps, 0.01f, 9.0f}};
0138   for (int nav = 30; nav < 80; nav += 20) {
0139     ClusterGenerator gen(nav, 10);
0140 
0141     for (int i = 8; i < 20; ++i) {
0142       auto kk = i / 4;  // M param
0143 
0144       gen(ev);
0145 
0146 #ifdef __CUDACC__
0147       gpuVertexFinder::init<<<1, 1, 0, stream>>>(onGPU_d.view(), ws_d.view());
0148 #else
0149       gpuVertexFinder::init(onGPU_d.view(), ws_d.view());
0150 #endif
0151 
0152       std::cout << "v,t size " << ev.zvert.size() << ' ' << ev.ztrack.size() << std::endl;
0153       auto nt = ev.ztrack.size();
0154 #ifdef __CUDACC__
0155       cudaCheck(cudaMemcpy(&ws_d.view().ntrks(), &nt, sizeof(uint32_t), cudaMemcpyHostToDevice));
0156       cudaCheck(
0157           cudaMemcpy(ws_d.view().zt(), ev.ztrack.data(), sizeof(float) * ev.ztrack.size(), cudaMemcpyHostToDevice));
0158       cudaCheck(
0159           cudaMemcpy(ws_d.view().ezt2(), ev.eztrack.data(), sizeof(float) * ev.eztrack.size(), cudaMemcpyHostToDevice));
0160       cudaCheck(
0161           cudaMemcpy(ws_d.view().ptt2(), ev.pttrack.data(), sizeof(float) * ev.eztrack.size(), cudaMemcpyHostToDevice));
0162 #else
0163       ::memcpy(&ws_d.view().ntrks(), &nt, sizeof(uint32_t));
0164       ::memcpy(ws_d.view().zt(), ev.ztrack.data(), sizeof(float) * ev.ztrack.size());
0165       ::memcpy(ws_d.view().ezt2(), ev.eztrack.data(), sizeof(float) * ev.eztrack.size());
0166       ::memcpy(ws_d.view().ptt2(), ev.pttrack.data(), sizeof(float) * ev.eztrack.size());
0167 #endif
0168 
0169       std::cout << "M eps, pset " << kk << ' ' << eps << ' ' << (i % 4) << std::endl;
0170 
0171       if ((i % 4) == 0)
0172         par = {{eps, 0.02f, 12.0f}};
0173       if ((i % 4) == 1)
0174         par = {{eps, 0.02f, 9.0f}};
0175       if ((i % 4) == 2)
0176         par = {{eps, 0.01f, 9.0f}};
0177       if ((i % 4) == 3)
0178         par = {{0.7f * eps, 0.01f, 9.0f}};
0179 
0180       uint32_t nv = 0;
0181 #ifdef __CUDACC__
0182       print<<<1, 1, 0, stream>>>(onGPU_d.view(), ws_d.view());
0183       cudaCheck(cudaGetLastError());
0184       cudaDeviceSynchronize();
0185 
0186 #ifdef ONE_KERNEL
0187       cms::cuda::launch(vertexFinderOneKernel, {1, 512 + 256}, onGPU_d.view(), ws_d.view(), kk, par[0], par[1], par[2]);
0188 #else
0189       cms::cuda::launch(CLUSTERIZE, {1, 512 + 256}, onGPU_d.view(), ws_d.view(), kk, par[0], par[1], par[2]);
0190 #endif
0191       print<<<1, 1, 0, stream>>>(onGPU_d.view(), ws_d.view());
0192 
0193       cudaCheck(cudaGetLastError());
0194       cudaDeviceSynchronize();
0195 
0196       cms::cuda::launch(gpuVertexFinder::fitVerticesKernel, {1, 1024 - 256}, onGPU_d.view(), ws_d.view(), 50.f);
0197       cudaCheck(cudaGetLastError());
0198       cudaCheck(cudaMemcpy(&nv, &onGPU_d.view().nvFinal(), sizeof(uint32_t), cudaMemcpyDeviceToHost));
0199 
0200 #else
0201       print(onGPU_d.view(), ws_d.view());
0202       CLUSTERIZE(onGPU_d.view(), ws_d.view(), kk, par[0], par[1], par[2]);
0203       print(onGPU_d.view(), ws_d.view());
0204       gpuVertexFinder::fitVertices(onGPU_d.view(), ws_d.view(), 50.f);
0205       nv = onGPU_d.view().nvFinal();
0206 #endif
0207 
0208       if (nv == 0) {
0209         std::cout << "NO VERTICES???" << std::endl;
0210         continue;
0211       }
0212 
0213       float* zv = nullptr;
0214       float* wv = nullptr;
0215       float* ptv2 = nullptr;
0216       int32_t* nn = nullptr;
0217       uint16_t* ind = nullptr;
0218 
0219       // keep chi2 separated...
0220       float chi2[2 * nv];  // make space for splitting...
0221 
0222 #ifdef __CUDACC__
0223       float hzv[2 * nv];
0224       float hwv[2 * nv];
0225       float hptv2[2 * nv];
0226       int32_t hnn[2 * nv];
0227       uint16_t hind[2 * nv];
0228 
0229       zv = hzv;
0230       wv = hwv;
0231       ptv2 = hptv2;
0232       nn = hnn;
0233       ind = hind;
0234 #else
0235       zv = onGPU_d.view().zv();
0236       wv = onGPU_d.view().wv();
0237       ptv2 = onGPU_d.view().ptv2();
0238       nn = onGPU_d.view().ndof();
0239       ind = onGPU_d.view().sortInd();
0240 #endif
0241 
0242 #ifdef __CUDACC__
0243       cudaCheck(cudaMemcpy(nn, onGPU_d.view().ndof(), nv * sizeof(int32_t), cudaMemcpyDeviceToHost));
0244       cudaCheck(cudaMemcpy(chi2, onGPU_d.view().chi2(), nv * sizeof(float), cudaMemcpyDeviceToHost));
0245 #else
0246       memcpy(chi2, onGPU_d.view().chi2(), nv * sizeof(float));
0247 #endif
0248 
0249       for (auto j = 0U; j < nv; ++j)
0250         if (nn[j] > 0)
0251           chi2[j] /= float(nn[j]);
0252       {
0253         auto mx = std::minmax_element(chi2, chi2 + nv);
0254         std::cout << "after fit nv, min max chi2 " << nv << " " << *mx.first << ' ' << *mx.second << std::endl;
0255       }
0256 
0257 #ifdef __CUDACC__
0258       cms::cuda::launch(gpuVertexFinder::fitVerticesKernel, {1, 1024 - 256}, onGPU_d.view(), ws_d.view(), 50.f);
0259       cudaCheck(cudaMemcpy(&nv, &onGPU_d.view().nvFinal(), sizeof(uint32_t), cudaMemcpyDeviceToHost));
0260       cudaCheck(cudaMemcpy(nn, onGPU_d.view().ndof(), nv * sizeof(int32_t), cudaMemcpyDeviceToHost));
0261       cudaCheck(cudaMemcpy(chi2, onGPU_d.view().chi2(), nv * sizeof(float), cudaMemcpyDeviceToHost));
0262 #else
0263       gpuVertexFinder::fitVertices(onGPU_d.view(), ws_d.view(), 50.f);
0264       nv = onGPU_d.view().nvFinal();
0265       memcpy(chi2, onGPU_d.view().chi2(), nv * sizeof(float));
0266 #endif
0267 
0268       for (auto j = 0U; j < nv; ++j)
0269         if (nn[j] > 0)
0270           chi2[j] /= float(nn[j]);
0271       {
0272         auto mx = std::minmax_element(chi2, chi2 + nv);
0273         std::cout << "before splitting nv, min max chi2 " << nv << " " << *mx.first << ' ' << *mx.second << std::endl;
0274       }
0275 
0276 #ifdef __CUDACC__
0277       // one vertex per block!!!
0278       cms::cuda::launch(gpuVertexFinder::splitVerticesKernel, {1024, 64}, onGPU_d.view(), ws_d.view(), 9.f);
0279       cudaCheck(cudaMemcpy(&nv, &ws_d.view().nvIntermediate(), sizeof(uint32_t), cudaMemcpyDeviceToHost));
0280 #else
0281       gpuVertexFinder::splitVertices(onGPU_d.view(), ws_d.view(), 9.f);
0282       nv = ws_d.view().nvIntermediate();
0283 #endif
0284       std::cout << "after split " << nv << std::endl;
0285 
0286 #ifdef __CUDACC__
0287       cms::cuda::launch(gpuVertexFinder::fitVerticesKernel, {1, 1024 - 256}, onGPU_d.view(), ws_d.view(), 5000.f);
0288       cudaCheck(cudaGetLastError());
0289 
0290       cms::cuda::launch(gpuVertexFinder::sortByPt2Kernel, {1, 256}, onGPU_d.view(), ws_d.view());
0291       cudaCheck(cudaGetLastError());
0292       cudaCheck(cudaMemcpy(&nv, &onGPU_d.view().nvFinal(), sizeof(uint32_t), cudaMemcpyDeviceToHost));
0293 #else
0294       gpuVertexFinder::fitVertices(onGPU_d.view(), ws_d.view(), 5000.f);
0295       gpuVertexFinder::sortByPt2(onGPU_d.view(), ws_d.view());
0296       nv = onGPU_d.view().nvFinal();
0297       memcpy(chi2, onGPU_d.view().chi2(), nv * sizeof(float));
0298 #endif
0299 
0300       if (nv == 0) {
0301         std::cout << "NO VERTICES???" << std::endl;
0302         continue;
0303       }
0304 
0305 #ifdef __CUDACC__
0306       cudaCheck(cudaMemcpy(zv, onGPU_d.view().zv(), nv * sizeof(float), cudaMemcpyDeviceToHost));
0307       cudaCheck(cudaMemcpy(wv, onGPU_d.view().wv(), nv * sizeof(float), cudaMemcpyDeviceToHost));
0308       cudaCheck(cudaMemcpy(chi2, onGPU_d.view().chi2(), nv * sizeof(float), cudaMemcpyDeviceToHost));
0309       cudaCheck(cudaMemcpy(ptv2, onGPU_d.view().ptv2(), nv * sizeof(float), cudaMemcpyDeviceToHost));
0310       cudaCheck(cudaMemcpy(nn, onGPU_d.view().ndof(), nv * sizeof(int32_t), cudaMemcpyDeviceToHost));
0311       cudaCheck(cudaMemcpy(ind, onGPU_d.view().sortInd(), nv * sizeof(uint16_t), cudaMemcpyDeviceToHost));
0312 #endif
0313       for (auto j = 0U; j < nv; ++j)
0314         if (nn[j] > 0)
0315           chi2[j] /= float(nn[j]);
0316       {
0317         auto mx = std::minmax_element(chi2, chi2 + nv);
0318         std::cout << "nv, min max chi2 " << nv << " " << *mx.first << ' ' << *mx.second << std::endl;
0319       }
0320 
0321       {
0322         auto mx = std::minmax_element(wv, wv + nv);
0323         std::cout << "min max error " << 1. / std::sqrt(*mx.first) << ' ' << 1. / std::sqrt(*mx.second) << std::endl;
0324       }
0325 
0326       {
0327         auto mx = std::minmax_element(ptv2, ptv2 + nv);
0328         std::cout << "min max ptv2 " << *mx.first << ' ' << *mx.second << std::endl;
0329         std::cout << "min max ptv2 " << ptv2[ind[0]] << ' ' << ptv2[ind[nv - 1]] << " at " << ind[0] << ' '
0330                   << ind[nv - 1] << std::endl;
0331       }
0332 
0333       float dd[nv];
0334       for (auto kv = 0U; kv < nv; ++kv) {
0335         auto zr = zv[kv];
0336         auto md = 500.0f;
0337         for (auto zs : ev.ztrack) {
0338           auto d = std::abs(zr - zs);
0339           md = std::min(d, md);
0340         }
0341         dd[kv] = md;
0342       }
0343       if (i == 6) {
0344         for (auto d : dd)
0345           std::cout << d << ' ';
0346         std::cout << std::endl;
0347       }
0348       auto mx = std::minmax_element(dd, dd + nv);
0349       float rms = 0;
0350       for (auto d : dd)
0351         rms += d * d;
0352       rms = std::sqrt(rms) / (nv - 1);
0353       std::cout << "min max rms " << *mx.first << ' ' << *mx.second << ' ' << rms << std::endl;
0354 
0355     }  // loop on events
0356   }    // lopp on ave vert
0357 
0358   return 0;
0359 }