Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:28:42

0001 #include <cmath>
0002 #include <cstdint>
0003 #include <iostream>
0004 #include <random>
0005 #include <vector>
0006 
0007 #include <alpaka/alpaka.hpp>
0008 
0009 // TrackUtilities only included in order to compile SoALayout with Eigen columns
0010 #include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h"
0011 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
0012 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
0013 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0014 #ifdef USE_DBSCAN
0015 #include "RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksDBSCAN.h"
0016 #define CLUSTERIZE ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ClusterTracksDBSCAN
0017 #elif USE_ITERATIVE
0018 #include "RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksIterative.h"
0019 #define CLUSTERIZE ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ClusterTracksIterative
0020 #else
0021 #include "RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksByDensity.h"
0022 #define CLUSTERIZE ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder::ClusterTracksByDensityKernel
0023 #endif
0024 #include "RecoTracker/PixelVertexFinding/interface/PixelVertexWorkSpaceLayout.h"
0025 #include "RecoTracker/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoAHostAlpaka.h"
0026 #include "RecoTracker/PixelVertexFinding/plugins/alpaka/PixelVertexWorkSpaceSoADeviceAlpaka.h"
0027 #include "RecoTracker/PixelVertexFinding/plugins/alpaka/fitVertices.h"
0028 #include "RecoTracker/PixelVertexFinding/plugins/alpaka/sortByPt2.h"
0029 #include "RecoTracker/PixelVertexFinding/plugins/alpaka/splitVertices.h"
0030 #include "RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.h"
0031 
0032 #include "VertexFinder_t.h"
0033 
0034 namespace ALPAKA_ACCELERATOR_NAMESPACE {
0035   using namespace cms::alpakatools;
0036 
0037   struct ClusterGenerator {
0038     explicit ClusterGenerator(float nvert, float ntrack)
0039         : rgen(-13., 13), errgen(0.005, 0.025), clusGen(nvert), trackGen(ntrack), gauss(0., 1.), ptGen(1.) {}
0040 
0041     void operator()(vertexFinder::PixelVertexWorkSpaceSoAHost& pwsh, ZVertexHost& vtxh) {
0042       int nclus = clusGen(reng);
0043       for (int zint = 0; zint < vtxh.view().metadata().size(); ++zint) {
0044         vtxh.view().zv()[zint] = 3.5f * gauss(reng);
0045       }
0046 
0047       int aux = 0;
0048       for (int iv = 0; iv < nclus; ++iv) {
0049         auto nt = trackGen(reng);
0050         pwsh.view().itrk()[iv] = nt;
0051         for (int it = 0; it < nt; ++it) {
0052           auto err = errgen(reng);  // reality is not flat....
0053           pwsh.view().zt()[aux] = vtxh.view().zv()[iv] + err * gauss(reng);
0054           pwsh.view().ezt2()[aux] = err * err;
0055           pwsh.view().iv()[aux] = iv;
0056           pwsh.view().ptt2()[aux] = (iv == 5 ? 1.f : 0.5f) + ptGen(reng);
0057           pwsh.view().ptt2()[aux] *= pwsh.view().ptt2()[aux];
0058           ++aux;
0059         }
0060       }
0061       pwsh.view().ntrks() = aux;
0062       // add noise
0063       auto nt = 2 * trackGen(reng);
0064       for (int it = 0; it < nt; ++it) {
0065         auto err = 0.03f;
0066         pwsh.view().zt()[it] = rgen(reng);
0067         pwsh.view().ezt2()[it] = err * err;
0068         pwsh.view().iv()[it] = 9999;
0069         pwsh.view().ptt2()[it] = 0.5f + ptGen(reng);
0070         pwsh.view().ptt2()[it] *= pwsh.view().ptt2()[it];
0071       }
0072     }
0073 
0074     std::mt19937 reng;
0075     std::uniform_real_distribution<float> rgen;
0076     std::uniform_real_distribution<float> errgen;
0077     std::poisson_distribution<int> clusGen;
0078     std::poisson_distribution<int> trackGen;
0079     std::normal_distribution<float> gauss;
0080     std::exponential_distribution<float> ptGen;
0081   };
0082 
0083   namespace vertexfinder_t {
0084 #ifdef ONE_KERNEL
0085     class VertexFinderOneKernel {
0086     public:
0087       template <typename TAcc>
0088       ALPAKA_FN_ACC void operator()(const TAcc& acc,
0089                                     vertexFinder::VtxSoAView pdata,
0090                                     vertexFinder::WsSoAView pws,
0091                                     int minT,      // min number of neighbours to be "seed"
0092                                     float eps,     // max absolute distance to cluster
0093                                     float errmax,  // max error to be "seed"
0094                                     float chi2max  // max normalized distance to cluster,
0095       ) const {
0096         vertexFinder::clusterTracksByDensity(acc, pdata, pws, minT, eps, errmax, chi2max);
0097         alpaka::syncBlockThreads(acc);
0098         vertexFinder::fitVertices(acc, pdata, pws, 50.);
0099         alpaka::syncBlockThreads(acc);
0100         vertexFinder::splitVertices(acc, pdata, pws, 9.f);
0101         alpaka::syncBlockThreads(acc);
0102         vertexFinder::fitVertices(acc, pdata, pws, 5000.);
0103         alpaka::syncBlockThreads(acc);
0104         vertexFinder::sortByPt2(acc, pdata, pws);
0105         alpaka::syncBlockThreads(acc);
0106       }
0107     };
0108 #endif
0109 
0110     class Kernel_print {
0111     public:
0112       template <typename TAcc>
0113       ALPAKA_FN_ACC void operator()(const TAcc& acc,
0114                                     vertexFinder::VtxSoAView pdata,
0115                                     vertexFinder::WsSoAView pws) const {
0116         printf("nt,nv %d %d,%d\n", pws.ntrks(), pdata.nvFinal(), pws.nvIntermediate());
0117       }
0118     };
0119 
0120     void runKernels(Queue& queue) {
0121       vertexFinder::PixelVertexWorkSpaceSoADevice ws_d(zVertex::MAXTRACKS, queue);
0122       vertexFinder::PixelVertexWorkSpaceSoAHost ws_h(zVertex::MAXTRACKS, queue);
0123       ZVertexHost vertices_h(queue);
0124       ZVertexSoACollection vertices_d(queue);
0125 
0126       float eps = 0.1f;
0127       std::array<float, 3> par{{eps, 0.01f, 9.0f}};
0128       for (int nav = 30; nav < 80; nav += 20) {
0129         ClusterGenerator gen(nav, 10);
0130 
0131         for (int i = 8; i < 20; ++i) {
0132           auto kk = i / 4;  // M param
0133 
0134           gen(ws_h, vertices_h);
0135           auto workDiv1D = make_workdiv<Acc1D>(1, 1);
0136           alpaka::exec<Acc1D>(queue, workDiv1D, vertexFinder::Init{}, vertices_d.view(), ws_d.view());
0137           // std::cout << "v,t size " << ws_h.view().zt()[0] << ' ' << vertices_h.view().zv()[0] << std::endl;
0138           alpaka::memcpy(queue, ws_d.buffer(), ws_h.buffer());
0139           alpaka::wait(queue);
0140 
0141           std::cout << "M eps, pset " << kk << ' ' << eps << ' ' << (i % 4) << std::endl;
0142 
0143           if ((i % 4) == 0)
0144             par = {{eps, 0.02f, 12.0f}};
0145           if ((i % 4) == 1)
0146             par = {{eps, 0.02f, 9.0f}};
0147           if ((i % 4) == 2)
0148             par = {{eps, 0.01f, 9.0f}};
0149           if ((i % 4) == 3)
0150             par = {{0.7f * eps, 0.01f, 9.0f}};
0151 
0152           alpaka::exec<Acc1D>(queue, workDiv1D, Kernel_print{}, vertices_d.view(), ws_d.view());
0153 
0154           auto workDivClusterizer = make_workdiv<Acc1D>(1, 512 + 256);
0155 #ifdef ONE_KERNEL
0156           alpaka::exec<Acc1D>(queue,
0157                               workDivClusterizer,
0158                               VertexFinderOneKernel{},
0159                               vertices_d.view(),
0160                               ws_d.view(),
0161                               kk,
0162                               par[0],
0163                               par[1],
0164                               par[2]);
0165 #else
0166           alpaka::exec<Acc1D>(
0167               queue, workDivClusterizer, CLUSTERIZE{}, vertices_d.view(), ws_d.view(), kk, par[0], par[1], par[2]);
0168 #endif
0169           alpaka::wait(queue);
0170           alpaka::exec<Acc1D>(queue, workDiv1D, Kernel_print{}, vertices_d.view(), ws_d.view());
0171           alpaka::wait(queue);
0172 
0173           auto workDivFitter = make_workdiv<Acc1D>(1, 1024 - 256);
0174 
0175           alpaka::exec<Acc1D>(
0176               queue, workDivFitter, vertexFinder::FitVerticesKernel{}, vertices_d.view(), ws_d.view(), 50.f);
0177 
0178           alpaka::memcpy(queue, vertices_h.buffer(), vertices_d.buffer());
0179           alpaka::wait(queue);
0180 
0181           if (vertices_h.view().nvFinal() == 0) {
0182             std::cout << "NO VERTICES???" << std::endl;
0183             continue;
0184           }
0185 
0186           for (auto j = 0U; j < vertices_h.view().nvFinal(); ++j)
0187             if (vertices_h.view().ndof()[j] > 0)
0188               vertices_h.view().chi2()[j] /= float(vertices_h.view().ndof()[j]);
0189           {
0190             auto mx =
0191                 std::minmax_element(vertices_h.view().chi2(), vertices_h.view().chi2() + vertices_h.view().nvFinal());
0192             std::cout << "after fit nv, min max chi2 " << vertices_h.view().nvFinal() << " " << *mx.first << ' '
0193                       << *mx.second << std::endl;
0194           }
0195 
0196           alpaka::exec<Acc1D>(
0197               queue, workDivFitter, vertexFinder::FitVerticesKernel{}, vertices_d.view(), ws_d.view(), 50.f);
0198           alpaka::memcpy(queue, vertices_h.buffer(), vertices_d.buffer());
0199           alpaka::wait(queue);
0200 
0201           for (auto j = 0U; j < vertices_h.view().nvFinal(); ++j)
0202             if (vertices_h.view().ndof()[j] > 0)
0203               vertices_h.view().chi2()[j] /= float(vertices_h.view().ndof()[j]);
0204           {
0205             auto mx =
0206                 std::minmax_element(vertices_h.view().chi2(), vertices_h.view().chi2() + vertices_h.view().nvFinal());
0207             std::cout << "before splitting nv, min max chi2 " << vertices_h.view().nvFinal() << " " << *mx.first << ' '
0208                       << *mx.second << std::endl;
0209           }
0210 
0211           auto workDivSplitter = make_workdiv<Acc1D>(1024, 64);
0212 
0213           // one vertex per block!!!
0214           alpaka::exec<Acc1D>(
0215               queue, workDivSplitter, vertexFinder::SplitVerticesKernel{}, vertices_d.view(), ws_d.view(), 9.f);
0216           alpaka::memcpy(queue, ws_h.buffer(), ws_d.buffer());
0217           alpaka::wait(queue);
0218           std::cout << "after split " << ws_h.view().nvIntermediate() << std::endl;
0219 
0220           alpaka::exec<Acc1D>(
0221               queue, workDivFitter, vertexFinder::FitVerticesKernel{}, vertices_d.view(), ws_d.view(), 5000.f);
0222 
0223           auto workDivSorter = make_workdiv<Acc1D>(1, 256);
0224           alpaka::exec<Acc1D>(queue, workDivSorter, vertexFinder::SortByPt2Kernel{}, vertices_d.view(), ws_d.view());
0225           alpaka::memcpy(queue, vertices_h.buffer(), vertices_d.buffer());
0226           alpaka::wait(queue);
0227 
0228           if (vertices_h.view().nvFinal() == 0) {
0229             std::cout << "NO VERTICES???" << std::endl;
0230             continue;
0231           }
0232 
0233           for (auto j = 0U; j < vertices_h.view().nvFinal(); ++j)
0234             if (vertices_h.view().ndof()[j] > 0)
0235               vertices_h.view().chi2()[j] /= float(vertices_h.view().ndof()[j]);
0236           {
0237             auto mx =
0238                 std::minmax_element(vertices_h.view().chi2(), vertices_h.view().chi2() + vertices_h.view().nvFinal());
0239             std::cout << "nv, min max chi2 " << vertices_h.view().nvFinal() << " " << *mx.first << ' ' << *mx.second
0240                       << std::endl;
0241           }
0242 
0243           {
0244             auto mx = std::minmax_element(vertices_h.view().wv(), vertices_h.view().wv() + vertices_h.view().nvFinal());
0245             std::cout << "min max error " << 1. / std::sqrt(*mx.first) << ' ' << 1. / std::sqrt(*mx.second)
0246                       << std::endl;
0247           }
0248 
0249           {
0250             auto mx =
0251                 std::minmax_element(vertices_h.view().ptv2(), vertices_h.view().ptv2() + vertices_h.view().nvFinal());
0252             std::cout << "min max ptv2 " << *mx.first << ' ' << *mx.second << std::endl;
0253             std::cout << "min max ptv2 " << vertices_h.view().ptv2()[vertices_h.view().sortInd()[0]] << ' '
0254                       << vertices_h.view().ptv2()[vertices_h.view().sortInd()[vertices_h.view().nvFinal() - 1]]
0255                       << " at " << vertices_h.view().sortInd()[0] << ' '
0256                       << vertices_h.view().sortInd()[vertices_h.view().nvFinal() - 1] << std::endl;
0257           }
0258 
0259           float dd[vertices_h.view().nvFinal()];
0260           for (auto kv = 0U; kv < vertices_h.view().nvFinal(); ++kv) {
0261             auto zr = vertices_h.view().zv()[kv];
0262             auto md = 500.0f;
0263             for (int zint = 0; zint < ws_h.view().metadata().size(); ++zint) {
0264               auto d = std::abs(zr - ws_h.view().zt()[zint]);
0265               md = std::min(d, md);
0266             }
0267             dd[kv] = md;
0268           }
0269           if (i == 6) {
0270             for (auto d : dd)
0271               std::cout << d << ' ';
0272             std::cout << std::endl;
0273           }
0274           auto mx = std::minmax_element(dd, dd + vertices_h.view().nvFinal());
0275           float rms = 0;
0276           for (auto d : dd)
0277             rms += d * d;
0278           rms = std::sqrt(rms) / (vertices_h.view().nvFinal() - 1);
0279           std::cout << "min max rms " << *mx.first << ' ' << *mx.second << ' ' << rms << std::endl;
0280 
0281         }  // loop on events
0282       }    // lopp on ave vert
0283     }
0284   }  // namespace vertexfinder_t
0285 }  // namespace ALPAKA_ACCELERATOR_NAMESPACE