File indexing completed on 2024-04-06 12:28:42
0001 #include <cmath>
0002 #include <cstdint>
0003 #include <iostream>
0004 #include <random>
0005 #include <vector>
0006
0007 #include <alpaka/alpaka.hpp>
0008
0009
0010 #include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0014 #ifdef USE_DBSCAN
0015 #include "RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksDBSCAN.h"
0016 #define CLUSTERIZE ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ClusterTracksDBSCAN
0017 #elif USE_ITERATIVE
0018 #include "RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksIterative.h"
0019 #define CLUSTERIZE ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ClusterTracksIterative
0020 #else
0021 #include "RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksByDensity.h"
0022 #define CLUSTERIZE ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ClusterTracksByDensityKernel
0023 #endif
0024 #include "RecoTracker/PixelVertexFinding/interface/PixelVertexWorkSpaceLayout.h"
0025 #include "RecoTracker/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoAHostAlpaka.h"
0026 #include "RecoTracker/PixelVertexFinding/plugins/alpaka/PixelVertexWorkSpaceSoADeviceAlpaka.h"
0027 #include "RecoTracker/PixelVertexFinding/plugins/alpaka/fitVertices.h"
0028 #include "RecoTracker/PixelVertexFinding/plugins/alpaka/sortByPt2.h"
0029 #include "RecoTracker/PixelVertexFinding/plugins/alpaka/splitVertices.h"
0030 #include "RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.h"
0031
0032 #include "VertexFinder_t.h"
0033
0034 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0035 using namespace cms::alpakatools;
0036
0037 struct ClusterGenerator {
0038 explicit ClusterGenerator(float nvert, float ntrack)
0039 : rgen(-13., 13), errgen(0.005, 0.025), clusGen(nvert), trackGen(ntrack), gauss(0., 1.), ptGen(1.) {}
0040
0041 void operator()(vertexFinder::PixelVertexWorkSpaceSoAHost& pwsh, ZVertexHost& vtxh) {
0042 int nclus = clusGen(reng);
0043 for (int zint = 0; zint < vtxh.view().metadata().size(); ++zint) {
0044 vtxh.view().zv()[zint] = 3.5f * gauss(reng);
0045 }
0046
0047 int aux = 0;
0048 for (int iv = 0; iv < nclus; ++iv) {
0049 auto nt = trackGen(reng);
0050 pwsh.view().itrk()[iv] = nt;
0051 for (int it = 0; it < nt; ++it) {
0052 auto err = errgen(reng);
0053 pwsh.view().zt()[aux] = vtxh.view().zv()[iv] + err * gauss(reng);
0054 pwsh.view().ezt2()[aux] = err * err;
0055 pwsh.view().iv()[aux] = iv;
0056 pwsh.view().ptt2()[aux] = (iv == 5 ? 1.f : 0.5f) + ptGen(reng);
0057 pwsh.view().ptt2()[aux] *= pwsh.view().ptt2()[aux];
0058 ++aux;
0059 }
0060 }
0061 pwsh.view().ntrks() = aux;
0062
0063 auto nt = 2 * trackGen(reng);
0064 for (int it = 0; it < nt; ++it) {
0065 auto err = 0.03f;
0066 pwsh.view().zt()[it] = rgen(reng);
0067 pwsh.view().ezt2()[it] = err * err;
0068 pwsh.view().iv()[it] = 9999;
0069 pwsh.view().ptt2()[it] = 0.5f + ptGen(reng);
0070 pwsh.view().ptt2()[it] *= pwsh.view().ptt2()[it];
0071 }
0072 }
0073
0074 std::mt19937 reng;
0075 std::uniform_real_distribution<float> rgen;
0076 std::uniform_real_distribution<float> errgen;
0077 std::poisson_distribution<int> clusGen;
0078 std::poisson_distribution<int> trackGen;
0079 std::normal_distribution<float> gauss;
0080 std::exponential_distribution<float> ptGen;
0081 };
0082
0083 namespace vertexfinder_t {
0084 #ifdef ONE_KERNEL
0085 class VertexFinderOneKernel {
0086 public:
0087 template <typename TAcc>
0088 ALPAKA_FN_ACC void operator()(const TAcc& acc,
0089 vertexFinder::VtxSoAView pdata,
0090 vertexFinder::WsSoAView pws,
0091 int minT,
0092 float eps,
0093 float errmax,
0094 float chi2max
0095 ) const {
0096 vertexFinder::clusterTracksByDensity(acc, pdata, pws, minT, eps, errmax, chi2max);
0097 alpaka::syncBlockThreads(acc);
0098 vertexFinder::fitVertices(acc, pdata, pws, 50.);
0099 alpaka::syncBlockThreads(acc);
0100 vertexFinder::splitVertices(acc, pdata, pws, 9.f);
0101 alpaka::syncBlockThreads(acc);
0102 vertexFinder::fitVertices(acc, pdata, pws, 5000.);
0103 alpaka::syncBlockThreads(acc);
0104 vertexFinder::sortByPt2(acc, pdata, pws);
0105 alpaka::syncBlockThreads(acc);
0106 }
0107 };
0108 #endif
0109
0110 class Kernel_print {
0111 public:
0112 template <typename TAcc>
0113 ALPAKA_FN_ACC void operator()(const TAcc& acc,
0114 vertexFinder::VtxSoAView pdata,
0115 vertexFinder::WsSoAView pws) const {
0116 printf("nt,nv %d %d,%d\n", pws.ntrks(), pdata.nvFinal(), pws.nvIntermediate());
0117 }
0118 };
0119
0120 void runKernels(Queue& queue) {
0121 vertexFinder::PixelVertexWorkSpaceSoADevice ws_d(zVertex::MAXTRACKS, queue);
0122 vertexFinder::PixelVertexWorkSpaceSoAHost ws_h(zVertex::MAXTRACKS, queue);
0123 ZVertexHost vertices_h(queue);
0124 ZVertexSoACollection vertices_d(queue);
0125
0126 float eps = 0.1f;
0127 std::array<float, 3> par{{eps, 0.01f, 9.0f}};
0128 for (int nav = 30; nav < 80; nav += 20) {
0129 ClusterGenerator gen(nav, 10);
0130
0131 for (int i = 8; i < 20; ++i) {
0132 auto kk = i / 4;
0133
0134 gen(ws_h, vertices_h);
0135 auto workDiv1D = make_workdiv<Acc1D>(1, 1);
0136 alpaka::exec<Acc1D>(queue, workDiv1D, vertexFinder::Init{}, vertices_d.view(), ws_d.view());
0137
0138 alpaka::memcpy(queue, ws_d.buffer(), ws_h.buffer());
0139 alpaka::wait(queue);
0140
0141 std::cout << "M eps, pset " << kk << ' ' << eps << ' ' << (i % 4) << std::endl;
0142
0143 if ((i % 4) == 0)
0144 par = {{eps, 0.02f, 12.0f}};
0145 if ((i % 4) == 1)
0146 par = {{eps, 0.02f, 9.0f}};
0147 if ((i % 4) == 2)
0148 par = {{eps, 0.01f, 9.0f}};
0149 if ((i % 4) == 3)
0150 par = {{0.7f * eps, 0.01f, 9.0f}};
0151
0152 alpaka::exec<Acc1D>(queue, workDiv1D, Kernel_print{}, vertices_d.view(), ws_d.view());
0153
0154 auto workDivClusterizer = make_workdiv<Acc1D>(1, 512 + 256);
0155 #ifdef ONE_KERNEL
0156 alpaka::exec<Acc1D>(queue,
0157 workDivClusterizer,
0158 VertexFinderOneKernel{},
0159 vertices_d.view(),
0160 ws_d.view(),
0161 kk,
0162 par[0],
0163 par[1],
0164 par[2]);
0165 #else
0166 alpaka::exec<Acc1D>(
0167 queue, workDivClusterizer, CLUSTERIZE{}, vertices_d.view(), ws_d.view(), kk, par[0], par[1], par[2]);
0168 #endif
0169 alpaka::wait(queue);
0170 alpaka::exec<Acc1D>(queue, workDiv1D, Kernel_print{}, vertices_d.view(), ws_d.view());
0171 alpaka::wait(queue);
0172
0173 auto workDivFitter = make_workdiv<Acc1D>(1, 1024 - 256);
0174
0175 alpaka::exec<Acc1D>(
0176 queue, workDivFitter, vertexFinder::FitVerticesKernel{}, vertices_d.view(), ws_d.view(), 50.f);
0177
0178 alpaka::memcpy(queue, vertices_h.buffer(), vertices_d.buffer());
0179 alpaka::wait(queue);
0180
0181 if (vertices_h.view().nvFinal() == 0) {
0182 std::cout << "NO VERTICES???" << std::endl;
0183 continue;
0184 }
0185
0186 for (auto j = 0U; j < vertices_h.view().nvFinal(); ++j)
0187 if (vertices_h.view().ndof()[j] > 0)
0188 vertices_h.view().chi2()[j] /= float(vertices_h.view().ndof()[j]);
0189 {
0190 auto mx =
0191 std::minmax_element(vertices_h.view().chi2(), vertices_h.view().chi2() + vertices_h.view().nvFinal());
0192 std::cout << "after fit nv, min max chi2 " << vertices_h.view().nvFinal() << " " << *mx.first << ' '
0193 << *mx.second << std::endl;
0194 }
0195
0196 alpaka::exec<Acc1D>(
0197 queue, workDivFitter, vertexFinder::FitVerticesKernel{}, vertices_d.view(), ws_d.view(), 50.f);
0198 alpaka::memcpy(queue, vertices_h.buffer(), vertices_d.buffer());
0199 alpaka::wait(queue);
0200
0201 for (auto j = 0U; j < vertices_h.view().nvFinal(); ++j)
0202 if (vertices_h.view().ndof()[j] > 0)
0203 vertices_h.view().chi2()[j] /= float(vertices_h.view().ndof()[j]);
0204 {
0205 auto mx =
0206 std::minmax_element(vertices_h.view().chi2(), vertices_h.view().chi2() + vertices_h.view().nvFinal());
0207 std::cout << "before splitting nv, min max chi2 " << vertices_h.view().nvFinal() << " " << *mx.first << ' '
0208 << *mx.second << std::endl;
0209 }
0210
0211 auto workDivSplitter = make_workdiv<Acc1D>(1024, 64);
0212
0213
0214 alpaka::exec<Acc1D>(
0215 queue, workDivSplitter, vertexFinder::SplitVerticesKernel{}, vertices_d.view(), ws_d.view(), 9.f);
0216 alpaka::memcpy(queue, ws_h.buffer(), ws_d.buffer());
0217 alpaka::wait(queue);
0218 std::cout << "after split " << ws_h.view().nvIntermediate() << std::endl;
0219
0220 alpaka::exec<Acc1D>(
0221 queue, workDivFitter, vertexFinder::FitVerticesKernel{}, vertices_d.view(), ws_d.view(), 5000.f);
0222
0223 auto workDivSorter = make_workdiv<Acc1D>(1, 256);
0224 alpaka::exec<Acc1D>(queue, workDivSorter, vertexFinder::SortByPt2Kernel{}, vertices_d.view(), ws_d.view());
0225 alpaka::memcpy(queue, vertices_h.buffer(), vertices_d.buffer());
0226 alpaka::wait(queue);
0227
0228 if (vertices_h.view().nvFinal() == 0) {
0229 std::cout << "NO VERTICES???" << std::endl;
0230 continue;
0231 }
0232
0233 for (auto j = 0U; j < vertices_h.view().nvFinal(); ++j)
0234 if (vertices_h.view().ndof()[j] > 0)
0235 vertices_h.view().chi2()[j] /= float(vertices_h.view().ndof()[j]);
0236 {
0237 auto mx =
0238 std::minmax_element(vertices_h.view().chi2(), vertices_h.view().chi2() + vertices_h.view().nvFinal());
0239 std::cout << "nv, min max chi2 " << vertices_h.view().nvFinal() << " " << *mx.first << ' ' << *mx.second
0240 << std::endl;
0241 }
0242
0243 {
0244 auto mx = std::minmax_element(vertices_h.view().wv(), vertices_h.view().wv() + vertices_h.view().nvFinal());
0245 std::cout << "min max error " << 1. / std::sqrt(*mx.first) << ' ' << 1. / std::sqrt(*mx.second)
0246 << std::endl;
0247 }
0248
0249 {
0250 auto mx =
0251 std::minmax_element(vertices_h.view().ptv2(), vertices_h.view().ptv2() + vertices_h.view().nvFinal());
0252 std::cout << "min max ptv2 " << *mx.first << ' ' << *mx.second << std::endl;
0253 std::cout << "min max ptv2 " << vertices_h.view().ptv2()[vertices_h.view().sortInd()[0]] << ' '
0254 << vertices_h.view().ptv2()[vertices_h.view().sortInd()[vertices_h.view().nvFinal() - 1]]
0255 << " at " << vertices_h.view().sortInd()[0] << ' '
0256 << vertices_h.view().sortInd()[vertices_h.view().nvFinal() - 1] << std::endl;
0257 }
0258
0259 float dd[vertices_h.view().nvFinal()];
0260 for (auto kv = 0U; kv < vertices_h.view().nvFinal(); ++kv) {
0261 auto zr = vertices_h.view().zv()[kv];
0262 auto md = 500.0f;
0263 for (int zint = 0; zint < ws_h.view().metadata().size(); ++zint) {
0264 auto d = std::abs(zr - ws_h.view().zt()[zint]);
0265 md = std::min(d, md);
0266 }
0267 dd[kv] = md;
0268 }
0269 if (i == 6) {
0270 for (auto d : dd)
0271 std::cout << d << ' ';
0272 std::cout << std::endl;
0273 }
0274 auto mx = std::minmax_element(dd, dd + vertices_h.view().nvFinal());
0275 float rms = 0;
0276 for (auto d : dd)
0277 rms += d * d;
0278 rms = std::sqrt(rms) / (vertices_h.view().nvFinal() - 1);
0279 std::cout << "min max rms " << *mx.first << ' ' << *mx.second << ' ' << rms << std::endl;
0280
0281 }
0282 }
0283 }
0284 }
0285 }