File indexing completed on 2024-09-07 04:38:06
0001 #include <cmath>
0002 #include <cstdint>
0003 #include <iostream>
0004 #include <random>
0005 #include <vector>
0006
0007 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0008 #include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h"
0009 #include "HeterogeneousCore/CUDAUtilities/interface/launch.h"
0010 #include "HeterogeneousCore/CUDAUtilities/interface/allocate_device.h"
0011 #include "HeterogeneousCore/CUDAUtilities/interface/currentDevice.h"
0012
0013 #include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
0014 #include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h"
0015 #include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h"
0016 #include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h"
0017
0018 #include "RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceUtilities.h"
0019 #include "RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoAHost.h"
0020 #include "RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoADevice.h"
0021 #ifdef USE_DBSCAN
0022 #include "RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksDBSCAN.h"
0023 #define CLUSTERIZE gpuVertexFinder::clusterTracksDBSCAN
0024 #elif USE_ITERATIVE
0025 #include "RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksIterative.h"
0026 #define CLUSTERIZE gpuVertexFinder::clusterTracksIterative
0027 #else
0028 #include "RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksByDensity.h"
0029 #define CLUSTERIZE gpuVertexFinder::clusterTracksByDensityKernel
0030 #endif
0031 #include "RecoVertex/PixelVertexFinding/plugins/gpuFitVertices.h"
0032 #include "RecoVertex/PixelVertexFinding/plugins/gpuSortByPt2.h"
0033 #include "RecoVertex/PixelVertexFinding/plugins/gpuSplitVertices.h"
0034
0035 #ifdef ONE_KERNEL
0036 #ifdef __CUDACC__
0037 __global__ void vertexFinderOneKernel(gpuVertexFinder::VtxSoAView pdata,
0038 gpuVertexFinder::WsSoAView pws,
0039 int minT,
0040 float eps,
0041 float errmax,
0042 float chi2max
0043 ) {
0044 gpuVertexFinder::clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max);
0045 __syncthreads();
0046 gpuVertexFinder::fitVertices(pdata, pws, 50.);
0047 __syncthreads();
0048 gpuVertexFinder::splitVertices(pdata, pws, 9.f);
0049 __syncthreads();
0050 gpuVertexFinder::fitVertices(pdata, pws, 5000.);
0051 __syncthreads();
0052 gpuVertexFinder::sortByPt2(pdata, pws);
0053 }
0054 #endif
0055 #endif
0056
0057 struct Event {
0058 std::vector<float> zvert;
0059 std::vector<uint16_t> itrack;
0060 std::vector<float> ztrack;
0061 std::vector<float> eztrack;
0062 std::vector<float> pttrack;
0063 std::vector<uint16_t> ivert;
0064 };
0065
0066 struct ClusterGenerator {
0067 explicit ClusterGenerator(float nvert, float ntrack)
0068 : rgen(-13., 13), errgen(0.005, 0.025), clusGen(nvert), trackGen(ntrack), gauss(0., 1.), ptGen(1.) {}
0069
0070 void operator()(Event& ev) {
0071 int nclus = clusGen(reng);
0072 ev.zvert.resize(nclus);
0073 ev.itrack.resize(nclus);
0074 for (auto& z : ev.zvert) {
0075 z = 3.5f * gauss(reng);
0076 }
0077
0078 ev.ztrack.clear();
0079 ev.eztrack.clear();
0080 ev.ivert.clear();
0081 ev.pttrack.clear();
0082 for (int iv = 0; iv < nclus; ++iv) {
0083 auto nt = trackGen(reng);
0084 ev.itrack[iv] = nt;
0085 for (int it = 0; it < nt; ++it) {
0086 auto err = errgen(reng);
0087 ev.ztrack.push_back(ev.zvert[iv] + err * gauss(reng));
0088 ev.eztrack.push_back(err * err);
0089 ev.ivert.push_back(iv);
0090 ev.pttrack.push_back((iv == 5 ? 1.f : 0.5f) + ptGen(reng));
0091 ev.pttrack.back() *= ev.pttrack.back();
0092 }
0093 }
0094
0095 auto nt = 2 * trackGen(reng);
0096 for (int it = 0; it < nt; ++it) {
0097 auto err = 0.03f;
0098 ev.ztrack.push_back(rgen(reng));
0099 ev.eztrack.push_back(err * err);
0100 ev.ivert.push_back(9999);
0101 ev.pttrack.push_back(0.5f + ptGen(reng));
0102 ev.pttrack.back() *= ev.pttrack.back();
0103 }
0104 }
0105
0106 std::mt19937 reng;
0107 std::uniform_real_distribution<float> rgen;
0108 std::uniform_real_distribution<float> errgen;
0109 std::poisson_distribution<int> clusGen;
0110 std::poisson_distribution<int> trackGen;
0111 std::normal_distribution<float> gauss;
0112 std::exponential_distribution<float> ptGen;
0113 };
0114
0115 __global__ void print(gpuVertexFinder::VtxSoAView pdata, gpuVertexFinder::WsSoAView pws) {
0116 auto& __restrict__ ws = pws;
0117 printf("nt,nv %d %d,%d\n", ws.ntrks(), pdata.nvFinal(), ws.nvIntermediate());
0118 }
0119
0120 int main() {
0121 #ifdef __CUDACC__
0122 cudaStream_t stream;
0123 cms::cudatest::requireDevices();
0124 cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
0125
0126 ZVertexSoADevice onGPU_d(stream);
0127 gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoADevice ws_d(stream);
0128 #else
0129
0130 ZVertexSoAHost onGPU_d;
0131 gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoAHost ws_d;
0132 #endif
0133
0134 Event ev;
0135
0136 float eps = 0.1f;
0137 std::array<float, 3> par{{eps, 0.01f, 9.0f}};
0138 for (int nav = 30; nav < 80; nav += 20) {
0139 ClusterGenerator gen(nav, 10);
0140
0141 for (int i = 8; i < 20; ++i) {
0142 auto kk = i / 4;
0143
0144 gen(ev);
0145
0146 #ifdef __CUDACC__
0147 gpuVertexFinder::init<<<1, 1, 0, stream>>>(onGPU_d.view(), ws_d.view());
0148 #else
0149 gpuVertexFinder::init(onGPU_d.view(), ws_d.view());
0150 #endif
0151
0152 std::cout << "v,t size " << ev.zvert.size() << ' ' << ev.ztrack.size() << std::endl;
0153 auto nt = ev.ztrack.size();
0154 #ifdef __CUDACC__
0155 cudaCheck(cudaMemcpy(&ws_d.view().ntrks(), &nt, sizeof(uint32_t), cudaMemcpyHostToDevice));
0156 cudaCheck(
0157 cudaMemcpy(ws_d.view().zt(), ev.ztrack.data(), sizeof(float) * ev.ztrack.size(), cudaMemcpyHostToDevice));
0158 cudaCheck(
0159 cudaMemcpy(ws_d.view().ezt2(), ev.eztrack.data(), sizeof(float) * ev.eztrack.size(), cudaMemcpyHostToDevice));
0160 cudaCheck(
0161 cudaMemcpy(ws_d.view().ptt2(), ev.pttrack.data(), sizeof(float) * ev.eztrack.size(), cudaMemcpyHostToDevice));
0162 #else
0163 ::memcpy(&ws_d.view().ntrks(), &nt, sizeof(uint32_t));
0164 ::memcpy(ws_d.view().zt(), ev.ztrack.data(), sizeof(float) * ev.ztrack.size());
0165 ::memcpy(ws_d.view().ezt2(), ev.eztrack.data(), sizeof(float) * ev.eztrack.size());
0166 ::memcpy(ws_d.view().ptt2(), ev.pttrack.data(), sizeof(float) * ev.eztrack.size());
0167 #endif
0168
0169 std::cout << "M eps, pset " << kk << ' ' << eps << ' ' << (i % 4) << std::endl;
0170
0171 if ((i % 4) == 0)
0172 par = {{eps, 0.02f, 12.0f}};
0173 if ((i % 4) == 1)
0174 par = {{eps, 0.02f, 9.0f}};
0175 if ((i % 4) == 2)
0176 par = {{eps, 0.01f, 9.0f}};
0177 if ((i % 4) == 3)
0178 par = {{0.7f * eps, 0.01f, 9.0f}};
0179
0180 uint32_t nv = 0;
0181 #ifdef __CUDACC__
0182 print<<<1, 1, 0, stream>>>(onGPU_d.view(), ws_d.view());
0183 cudaCheck(cudaGetLastError());
0184 cudaDeviceSynchronize();
0185
0186 #ifdef ONE_KERNEL
0187 cms::cuda::launch(vertexFinderOneKernel, {1, 512 + 256}, onGPU_d.view(), ws_d.view(), kk, par[0], par[1], par[2]);
0188 #else
0189 cms::cuda::launch(CLUSTERIZE, {1, 512 + 256}, onGPU_d.view(), ws_d.view(), kk, par[0], par[1], par[2]);
0190 #endif
0191 print<<<1, 1, 0, stream>>>(onGPU_d.view(), ws_d.view());
0192
0193 cudaCheck(cudaGetLastError());
0194 cudaDeviceSynchronize();
0195
0196 cms::cuda::launch(gpuVertexFinder::fitVerticesKernel, {1, 1024 - 256}, onGPU_d.view(), ws_d.view(), 50.f);
0197 cudaCheck(cudaGetLastError());
0198 cudaCheck(cudaMemcpy(&nv, &onGPU_d.view().nvFinal(), sizeof(uint32_t), cudaMemcpyDeviceToHost));
0199
0200 #else
0201 print(onGPU_d.view(), ws_d.view());
0202 CLUSTERIZE(onGPU_d.view(), ws_d.view(), kk, par[0], par[1], par[2]);
0203 print(onGPU_d.view(), ws_d.view());
0204 gpuVertexFinder::fitVertices(onGPU_d.view(), ws_d.view(), 50.f);
0205 nv = onGPU_d.view().nvFinal();
0206 #endif
0207
0208 if (nv == 0) {
0209 std::cout << "NO VERTICES???" << std::endl;
0210 continue;
0211 }
0212
0213 float* zv = nullptr;
0214 float* wv = nullptr;
0215 float* ptv2 = nullptr;
0216 int32_t* nn = nullptr;
0217 uint16_t* ind = nullptr;
0218
0219
0220 float chi2[2 * nv];
0221
0222 #ifdef __CUDACC__
0223 float hzv[2 * nv];
0224 float hwv[2 * nv];
0225 float hptv2[2 * nv];
0226 int32_t hnn[2 * nv];
0227 uint16_t hind[2 * nv];
0228
0229 zv = hzv;
0230 wv = hwv;
0231 ptv2 = hptv2;
0232 nn = hnn;
0233 ind = hind;
0234 #else
0235 zv = onGPU_d.view().zv();
0236 wv = onGPU_d.view().wv();
0237 ptv2 = onGPU_d.view().ptv2();
0238 nn = onGPU_d.view().ndof();
0239 ind = onGPU_d.view().sortInd();
0240 #endif
0241
0242 #ifdef __CUDACC__
0243 cudaCheck(cudaMemcpy(nn, onGPU_d.view().ndof(), nv * sizeof(int32_t), cudaMemcpyDeviceToHost));
0244 cudaCheck(cudaMemcpy(chi2, onGPU_d.view().chi2(), nv * sizeof(float), cudaMemcpyDeviceToHost));
0245 #else
0246 memcpy(chi2, onGPU_d.view().chi2(), nv * sizeof(float));
0247 #endif
0248
0249 for (auto j = 0U; j < nv; ++j)
0250 if (nn[j] > 0)
0251 chi2[j] /= float(nn[j]);
0252 {
0253 auto mx = std::minmax_element(chi2, chi2 + nv);
0254 std::cout << "after fit nv, min max chi2 " << nv << " " << *mx.first << ' ' << *mx.second << std::endl;
0255 }
0256
0257 #ifdef __CUDACC__
0258 cms::cuda::launch(gpuVertexFinder::fitVerticesKernel, {1, 1024 - 256}, onGPU_d.view(), ws_d.view(), 50.f);
0259 cudaCheck(cudaMemcpy(&nv, &onGPU_d.view().nvFinal(), sizeof(uint32_t), cudaMemcpyDeviceToHost));
0260 cudaCheck(cudaMemcpy(nn, onGPU_d.view().ndof(), nv * sizeof(int32_t), cudaMemcpyDeviceToHost));
0261 cudaCheck(cudaMemcpy(chi2, onGPU_d.view().chi2(), nv * sizeof(float), cudaMemcpyDeviceToHost));
0262 #else
0263 gpuVertexFinder::fitVertices(onGPU_d.view(), ws_d.view(), 50.f);
0264 nv = onGPU_d.view().nvFinal();
0265 memcpy(chi2, onGPU_d.view().chi2(), nv * sizeof(float));
0266 #endif
0267
0268 for (auto j = 0U; j < nv; ++j)
0269 if (nn[j] > 0)
0270 chi2[j] /= float(nn[j]);
0271 {
0272 auto mx = std::minmax_element(chi2, chi2 + nv);
0273 std::cout << "before splitting nv, min max chi2 " << nv << " " << *mx.first << ' ' << *mx.second << std::endl;
0274 }
0275
0276 #ifdef __CUDACC__
0277
0278 cms::cuda::launch(gpuVertexFinder::splitVerticesKernel, {1024, 64}, onGPU_d.view(), ws_d.view(), 9.f);
0279 cudaCheck(cudaMemcpy(&nv, &ws_d.view().nvIntermediate(), sizeof(uint32_t), cudaMemcpyDeviceToHost));
0280 #else
0281 gpuVertexFinder::splitVertices(onGPU_d.view(), ws_d.view(), 9.f);
0282 nv = ws_d.view().nvIntermediate();
0283 #endif
0284 std::cout << "after split " << nv << std::endl;
0285
0286 #ifdef __CUDACC__
0287 cms::cuda::launch(gpuVertexFinder::fitVerticesKernel, {1, 1024 - 256}, onGPU_d.view(), ws_d.view(), 5000.f);
0288 cudaCheck(cudaGetLastError());
0289
0290 cms::cuda::launch(gpuVertexFinder::sortByPt2Kernel, {1, 256}, onGPU_d.view(), ws_d.view());
0291 cudaCheck(cudaGetLastError());
0292 cudaCheck(cudaMemcpy(&nv, &onGPU_d.view().nvFinal(), sizeof(uint32_t), cudaMemcpyDeviceToHost));
0293 #else
0294 gpuVertexFinder::fitVertices(onGPU_d.view(), ws_d.view(), 5000.f);
0295 gpuVertexFinder::sortByPt2(onGPU_d.view(), ws_d.view());
0296 nv = onGPU_d.view().nvFinal();
0297 memcpy(chi2, onGPU_d.view().chi2(), nv * sizeof(float));
0298 #endif
0299
0300 if (nv == 0) {
0301 std::cout << "NO VERTICES???" << std::endl;
0302 continue;
0303 }
0304
0305 #ifdef __CUDACC__
0306 cudaCheck(cudaMemcpy(zv, onGPU_d.view().zv(), nv * sizeof(float), cudaMemcpyDeviceToHost));
0307 cudaCheck(cudaMemcpy(wv, onGPU_d.view().wv(), nv * sizeof(float), cudaMemcpyDeviceToHost));
0308 cudaCheck(cudaMemcpy(chi2, onGPU_d.view().chi2(), nv * sizeof(float), cudaMemcpyDeviceToHost));
0309 cudaCheck(cudaMemcpy(ptv2, onGPU_d.view().ptv2(), nv * sizeof(float), cudaMemcpyDeviceToHost));
0310 cudaCheck(cudaMemcpy(nn, onGPU_d.view().ndof(), nv * sizeof(int32_t), cudaMemcpyDeviceToHost));
0311 cudaCheck(cudaMemcpy(ind, onGPU_d.view().sortInd(), nv * sizeof(uint16_t), cudaMemcpyDeviceToHost));
0312 #endif
0313 for (auto j = 0U; j < nv; ++j)
0314 if (nn[j] > 0)
0315 chi2[j] /= float(nn[j]);
0316 {
0317 auto mx = std::minmax_element(chi2, chi2 + nv);
0318 std::cout << "nv, min max chi2 " << nv << " " << *mx.first << ' ' << *mx.second << std::endl;
0319 }
0320
0321 {
0322 auto mx = std::minmax_element(wv, wv + nv);
0323 std::cout << "min max error " << 1. / std::sqrt(*mx.first) << ' ' << 1. / std::sqrt(*mx.second) << std::endl;
0324 }
0325
0326 {
0327 auto mx = std::minmax_element(ptv2, ptv2 + nv);
0328 std::cout << "min max ptv2 " << *mx.first << ' ' << *mx.second << std::endl;
0329 std::cout << "min max ptv2 " << ptv2[ind[0]] << ' ' << ptv2[ind[nv - 1]] << " at " << ind[0] << ' '
0330 << ind[nv - 1] << std::endl;
0331 }
0332
0333 float dd[nv];
0334 for (auto kv = 0U; kv < nv; ++kv) {
0335 auto zr = zv[kv];
0336 auto md = 500.0f;
0337 for (auto zs : ev.ztrack) {
0338 auto d = std::abs(zr - zs);
0339 md = std::min(d, md);
0340 }
0341 dd[kv] = md;
0342 }
0343 if (i == 6) {
0344 for (auto d : dd)
0345 std::cout << d << ' ';
0346 std::cout << std::endl;
0347 }
0348 auto mx = std::minmax_element(dd, dd + nv);
0349 float rms = 0;
0350 for (auto d : dd)
0351 rms += d * d;
0352 rms = std::sqrt(rms) / (nv - 1);
0353 std::cout << "min max rms " << *mx.first << ' ' << *mx.second << ' ' << rms << std::endl;
0354
0355 }
0356 }
0357
0358 return 0;
0359 }