File indexing completed on 2024-09-13 22:52:47
0001 #include <cmath>
0002 #include <cstdint>
0003 #include <iostream>
0004 #include <random>
0005 #include <vector>
0006
0007 #include <alpaka/alpaka.hpp>
0008
0009
0010 #include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0014 #ifdef USE_DBSCAN
0015 #include "RecoVertex/PixelVertexFinding/plugins/alpaka/clusterTracksDBSCAN.h"
0016 #define CLUSTERIZE ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ClusterTracksDBSCAN
0017 #elif USE_ITERATIVE
0018 #include "RecoVertex/PixelVertexFinding/plugins/alpaka/clusterTracksIterative.h"
0019 #define CLUSTERIZE ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ClusterTracksIterative
0020 #else
0021 #include "RecoVertex/PixelVertexFinding/plugins/alpaka/clusterTracksByDensity.h"
0022 #define CLUSTERIZE ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ClusterTracksByDensityKernel
0023 #endif
0024 #include "RecoVertex/PixelVertexFinding/interface/PixelVertexWorkSpaceLayout.h"
0025 #include "RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoAHostAlpaka.h"
0026 #include "RecoVertex/PixelVertexFinding/plugins/alpaka/PixelVertexWorkSpaceSoADeviceAlpaka.h"
0027 #include "RecoVertex/PixelVertexFinding/plugins/alpaka/fitVertices.h"
0028 #include "RecoVertex/PixelVertexFinding/plugins/alpaka/sortByPt2.h"
0029 #include "RecoVertex/PixelVertexFinding/plugins/alpaka/splitVertices.h"
0030 #include "RecoVertex/PixelVertexFinding/plugins/alpaka/vertexFinder.h"
0031
0032 #include "VertexFinder_t.h"
0033
0034 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0035 using namespace cms::alpakatools;
0036
0037 struct ClusterGenerator {
0038 explicit ClusterGenerator(float nvert, float ntrack)
0039 : rgen(-13., 13), errgen(0.005, 0.025), clusGen(nvert), trackGen(ntrack), gauss(0., 1.), ptGen(1.) {}
0040
0041 void operator()(vertexFinder::PixelVertexWorkSpaceSoAHost& pwsh, ZVertexHost& vtxh) {
0042 int nclus = clusGen(reng);
0043 for (int zint = 0; zint < vtxh.view().metadata().size(); ++zint) {
0044 vtxh.view().zv()[zint] = 3.5f * gauss(reng);
0045 }
0046
0047 int aux = 0;
0048 for (int iv = 0; iv < nclus; ++iv) {
0049 auto nt = trackGen(reng);
0050 pwsh.view().itrk()[iv] = nt;
0051 for (int it = 0; it < nt; ++it) {
0052 auto err = errgen(reng);
0053 pwsh.view().zt()[aux] = vtxh.view().zv()[iv] + err * gauss(reng);
0054 pwsh.view().ezt2()[aux] = err * err;
0055 pwsh.view().iv()[aux] = iv;
0056 pwsh.view().ptt2()[aux] = (iv == 5 ? 1.f : 0.5f) + ptGen(reng);
0057 pwsh.view().ptt2()[aux] *= pwsh.view().ptt2()[aux];
0058 ++aux;
0059 }
0060 }
0061 pwsh.view().ntrks() = aux;
0062
0063 auto nt = 2 * trackGen(reng);
0064 for (int it = 0; it < nt; ++it) {
0065 auto err = 0.03f;
0066 pwsh.view().zt()[it] = rgen(reng);
0067 pwsh.view().ezt2()[it] = err * err;
0068 pwsh.view().iv()[it] = 9999;
0069 pwsh.view().ptt2()[it] = 0.5f + ptGen(reng);
0070 pwsh.view().ptt2()[it] *= pwsh.view().ptt2()[it];
0071 }
0072 }
0073
0074 std::mt19937 reng;
0075 std::uniform_real_distribution<float> rgen;
0076 std::uniform_real_distribution<float> errgen;
0077 std::poisson_distribution<int> clusGen;
0078 std::poisson_distribution<int> trackGen;
0079 std::normal_distribution<float> gauss;
0080 std::exponential_distribution<float> ptGen;
0081 };
0082
0083 namespace vertexfinder_t {
0084 #ifdef ONE_KERNEL
0085 class VertexFinderOneKernel {
0086 public:
0087 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0088 vertexFinder::VtxSoAView pdata,
0089 vertexFinder::TrkSoAView ptrkdata,
0090 vertexFinder::WsSoAView pws,
0091 int minT,
0092 float eps,
0093 float errmax,
0094 float chi2max
0095 ) const {
0096 vertexFinder::clusterTracksByDensity(acc, pdata, ptrkdata, pws, minT, eps, errmax, chi2max);
0097 alpaka::syncBlockThreads(acc);
0098 vertexFinder::fitVertices(acc, pdata, ptrkdata, pws, 50.);
0099 alpaka::syncBlockThreads(acc);
0100 vertexFinder::splitVertices(acc, pdata, ptrkdata, pws, 9.f);
0101 alpaka::syncBlockThreads(acc);
0102 vertexFinder::fitVertices(acc, pdata, ptrkdata, pws, 5000.);
0103 alpaka::syncBlockThreads(acc);
0104 vertexFinder::sortByPt2(acc, pdata, ptrkdata, pws);
0105 alpaka::syncBlockThreads(acc);
0106 }
0107 };
0108 #endif
0109
0110 class Kernel_print {
0111 public:
0112 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0113 vertexFinder::VtxSoAView pdata,
0114 vertexFinder::WsSoAView pws) const {
0115 printf("nt,nv %d %d,%d\n", pws.ntrks(), pdata.nvFinal(), pws.nvIntermediate());
0116 }
0117 };
0118
0119 void runKernels(Queue& queue) {
0120
0121 constexpr uint32_t maxTracks = 32 * 1024;
0122 constexpr uint32_t maxVertices = 1024;
0123
0124 vertexFinder::PixelVertexWorkSpaceSoADevice ws_d(maxTracks, queue);
0125 vertexFinder::PixelVertexWorkSpaceSoAHost ws_h(maxTracks, queue);
0126 ZVertexHost vertices_h({{maxVertices, maxTracks}}, queue);
0127 ZVertexSoACollection vertices_d({{maxVertices, maxTracks}}, queue);
0128
0129 float eps = 0.1f;
0130 std::array<float, 3> par{{eps, 0.01f, 9.0f}};
0131 for (int nav = 30; nav < 80; nav += 20) {
0132 ClusterGenerator gen(nav, 10);
0133
0134 for (int i = 8; i < 20; ++i) {
0135 auto kk = i / 4;
0136
0137 gen(ws_h, vertices_h);
0138 auto workDiv1D = make_workdiv<Acc1D>(1, 1);
0139 alpaka::exec<Acc1D>(queue, workDiv1D, vertexFinder::Init{}, vertices_d.view(), ws_d.view());
0140
0141 alpaka::memcpy(queue, ws_d.buffer(), ws_h.buffer());
0142 alpaka::wait(queue);
0143
0144 std::cout << "M eps, pset " << kk << ' ' << eps << ' ' << (i % 4) << std::endl;
0145
0146 if ((i % 4) == 0)
0147 par = {{eps, 0.02f, 12.0f}};
0148 if ((i % 4) == 1)
0149 par = {{eps, 0.02f, 9.0f}};
0150 if ((i % 4) == 2)
0151 par = {{eps, 0.01f, 9.0f}};
0152 if ((i % 4) == 3)
0153 par = {{0.7f * eps, 0.01f, 9.0f}};
0154
0155 alpaka::exec<Acc1D>(queue, workDiv1D, Kernel_print{}, vertices_d.view(), ws_d.view());
0156
0157 auto workDivClusterizer = make_workdiv<Acc1D>(1, 512 + 256);
0158 #ifdef ONE_KERNEL
0159 alpaka::exec<Acc1D>(queue,
0160 workDivClusterizer,
0161 VertexFinderOneKernel{},
0162 vertices_d.view(),
0163 vertices_d.view<reco::ZVertexTracksSoA>(),
0164 ws_d.view(),
0165 kk,
0166 par[0],
0167 par[1],
0168 par[2]);
0169 #else
0170 alpaka::exec<Acc1D>(queue,
0171 workDivClusterizer,
0172 CLUSTERIZE{},
0173 vertices_d.view(),
0174 vertices_d.view<reco::ZVertexTracksSoA>(),
0175 ws_d.view(),
0176 kk,
0177 par[0],
0178 par[1],
0179 par[2]);
0180 #endif
0181 alpaka::wait(queue);
0182 alpaka::exec<Acc1D>(queue, workDiv1D, Kernel_print{}, vertices_d.view(), ws_d.view());
0183 alpaka::wait(queue);
0184
0185 auto workDivFitter = make_workdiv<Acc1D>(1, 1024 - 256);
0186
0187 alpaka::exec<Acc1D>(queue,
0188 workDivFitter,
0189 vertexFinder::FitVerticesKernel{},
0190 vertices_d.view(),
0191 vertices_d.view<reco::ZVertexTracksSoA>(),
0192 ws_d.view(),
0193 50.f);
0194
0195 alpaka::memcpy(queue, vertices_h.buffer(), vertices_d.buffer());
0196 alpaka::wait(queue);
0197
0198 if (vertices_h.view().nvFinal() == 0) {
0199 std::cout << "NO VERTICES???" << std::endl;
0200 continue;
0201 }
0202
0203 for (auto j = 0U; j < vertices_h.view().nvFinal(); ++j)
0204 if (vertices_h.view<reco::ZVertexTracksSoA>().ndof()[j] > 0)
0205 vertices_h.view().chi2()[j] /= float(vertices_h.view<reco::ZVertexTracksSoA>().ndof()[j]);
0206 {
0207 auto mx =
0208 std::minmax_element(vertices_h.view().chi2(), vertices_h.view().chi2() + vertices_h.view().nvFinal());
0209 std::cout << "after fit nv, min max chi2 " << vertices_h.view().nvFinal() << " " << *mx.first << ' '
0210 << *mx.second << std::endl;
0211 }
0212
0213 alpaka::exec<Acc1D>(queue,
0214 workDivFitter,
0215 vertexFinder::FitVerticesKernel{},
0216 vertices_d.view(),
0217 vertices_d.view<reco::ZVertexTracksSoA>(),
0218 ws_d.view(),
0219 50.f);
0220 alpaka::memcpy(queue, vertices_h.buffer(), vertices_d.buffer());
0221 alpaka::wait(queue);
0222
0223 for (auto j = 0U; j < vertices_h.view().nvFinal(); ++j)
0224 if (vertices_h.view<reco::ZVertexTracksSoA>().ndof()[j] > 0)
0225 vertices_h.view().chi2()[j] /= float(vertices_h.view<reco::ZVertexTracksSoA>().ndof()[j]);
0226 {
0227 auto mx =
0228 std::minmax_element(vertices_h.view().chi2(), vertices_h.view().chi2() + vertices_h.view().nvFinal());
0229 std::cout << "before splitting nv, min max chi2 " << vertices_h.view().nvFinal() << " " << *mx.first << ' '
0230 << *mx.second << std::endl;
0231 }
0232
0233 auto workDivSplitter = make_workdiv<Acc1D>(1024, 64);
0234
0235
0236 alpaka::exec<Acc1D>(queue,
0237 workDivSplitter,
0238 vertexFinder::SplitVerticesKernel{},
0239 vertices_d.view(),
0240 vertices_d.view<reco::ZVertexTracksSoA>(),
0241 ws_d.view(),
0242 9.f);
0243 alpaka::memcpy(queue, ws_h.buffer(), ws_d.buffer());
0244 alpaka::wait(queue);
0245 std::cout << "after split " << ws_h.view().nvIntermediate() << std::endl;
0246
0247 alpaka::exec<Acc1D>(queue,
0248 workDivFitter,
0249 vertexFinder::FitVerticesKernel{},
0250 vertices_d.view(),
0251 vertices_d.view<reco::ZVertexTracksSoA>(),
0252 ws_d.view(),
0253 5000.f);
0254
0255 auto workDivSorter = make_workdiv<Acc1D>(1, 256);
0256 alpaka::exec<Acc1D>(queue,
0257 workDivSorter,
0258 vertexFinder::SortByPt2Kernel{},
0259 vertices_d.view(),
0260 vertices_d.view<reco::ZVertexTracksSoA>(),
0261 ws_d.view());
0262 alpaka::memcpy(queue, vertices_h.buffer(), vertices_d.buffer());
0263 alpaka::wait(queue);
0264
0265 if (vertices_h.view().nvFinal() == 0) {
0266 std::cout << "NO VERTICES???" << std::endl;
0267 continue;
0268 }
0269
0270 for (auto j = 0U; j < vertices_h.view().nvFinal(); ++j)
0271 if (vertices_h.view<reco::ZVertexTracksSoA>().ndof()[j] > 0)
0272 vertices_h.view().chi2()[j] /= float(vertices_h.view<reco::ZVertexTracksSoA>().ndof()[j]);
0273 {
0274 auto mx =
0275 std::minmax_element(vertices_h.view().chi2(), vertices_h.view().chi2() + vertices_h.view().nvFinal());
0276 std::cout << "nv, min max chi2 " << vertices_h.view().nvFinal() << " " << *mx.first << ' ' << *mx.second
0277 << std::endl;
0278 }
0279
0280 {
0281 auto mx = std::minmax_element(vertices_h.view().wv(), vertices_h.view().wv() + vertices_h.view().nvFinal());
0282 std::cout << "min max error " << 1. / std::sqrt(*mx.first) << ' ' << 1. / std::sqrt(*mx.second)
0283 << std::endl;
0284 }
0285
0286 {
0287 auto mx =
0288 std::minmax_element(vertices_h.view().ptv2(), vertices_h.view().ptv2() + vertices_h.view().nvFinal());
0289 std::cout << "min max ptv2 " << *mx.first << ' ' << *mx.second << std::endl;
0290 std::cout << "min max ptv2 " << vertices_h.view().ptv2()[vertices_h.view().sortInd()[0]] << ' '
0291 << vertices_h.view().ptv2()[vertices_h.view().sortInd()[vertices_h.view().nvFinal() - 1]]
0292 << " at " << vertices_h.view().sortInd()[0] << ' '
0293 << vertices_h.view().sortInd()[vertices_h.view().nvFinal() - 1] << std::endl;
0294 }
0295
0296 float dd[vertices_h.view().nvFinal()];
0297 for (auto kv = 0U; kv < vertices_h.view().nvFinal(); ++kv) {
0298 auto zr = vertices_h.view().zv()[kv];
0299 auto md = 500.0f;
0300 for (int zint = 0; zint < ws_h.view().metadata().size(); ++zint) {
0301 auto d = std::abs(zr - ws_h.view().zt()[zint]);
0302 md = std::min(d, md);
0303 }
0304 dd[kv] = md;
0305 }
0306 if (i == 6) {
0307 for (auto d : dd)
0308 std::cout << d << ' ';
0309 std::cout << std::endl;
0310 }
0311 auto mx = std::minmax_element(dd, dd + vertices_h.view().nvFinal());
0312 float rms = 0;
0313 for (auto d : dd)
0314 rms += d * d;
0315 rms = std::sqrt(rms) / (vertices_h.view().nvFinal() - 1);
0316 std::cout << "min max rms " << *mx.first << ' ' << *mx.second << ' ' << rms << std::endl;
0317
0318 }
0319 }
0320 }
0321 }
0322 }