File indexing completed on 2024-04-20 02:32:07
0001 #include <cmath>
0002 #include <limits>
0003 #include <alpaka/alpaka.hpp>
0004
0005 #include "CondFormats/EcalObjects/interface/EcalPulseCovariances.h"
0006 #include "DataFormats/CaloRecHit/interface/MultifitComputations.h"
0007 #include "FWCore/Utilities/interface/CMSUnrollLoop.h"
0008 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0009
0010 #include "AmplitudeComputationKernels.h"
0011 #include "KernelHelpers.h"
0012 #include "EcalUncalibRecHitMultiFitAlgoPortable.h"
0013
0014 namespace ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit {
0015
0016 using namespace ::ecal::multifit;
0017
0018 template <typename MatrixType>
0019 ALPAKA_FN_ACC ALPAKA_FN_INLINE void update_covariance(EcalPulseCovariance const& pulse_covariance,
0020 MatrixType& inverse_cov,
0021 SampleVector const& amplitudes) {
0022 constexpr auto nsamples = SampleVector::RowsAtCompileTime;
0023 constexpr auto npulses = BXVectorType::RowsAtCompileTime;
0024
0025 CMS_UNROLL_LOOP
0026 for (unsigned int ipulse = 0; ipulse < npulses; ++ipulse) {
0027 auto const amplitude = amplitudes.coeff(ipulse);
0028 if (amplitude == 0)
0029 continue;
0030
0031
0032 int bx = ipulse - 5;
0033 int first_sample_t = std::max(0, bx + 3);
0034 int offset = -3 - bx;
0035
0036 auto const value_sq = amplitude * amplitude;
0037
0038 for (int col = first_sample_t; col < nsamples; ++col) {
0039 for (int row = col; row < nsamples; ++row) {
0040 inverse_cov(row, col) += value_sq * pulse_covariance.covval[row + offset][col + offset];
0041 }
0042 }
0043 }
0044 }
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056 class Kernel_minimize {
0057 public:
0058 template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
0059 ALPAKA_FN_ACC void operator()(TAcc const& acc,
0060 InputProduct::ConstView const& digisDevEB,
0061 InputProduct::ConstView const& digisDevEE,
0062 OutputProduct::View uncalibRecHitsEB,
0063 OutputProduct::View uncalibRecHitsEE,
0064 EcalMultifitConditionsDevice::ConstView conditionsDev,
0065 ::ecal::multifit::SampleMatrix const* noisecov,
0066 ::ecal::multifit::PulseMatrixType const* pulse_matrix,
0067 ::ecal::multifit::BXVectorType* bxs,
0068 ::ecal::multifit::SampleVector const* samples,
0069 bool* hasSwitchToGain6,
0070 bool* hasSwitchToGain1,
0071 bool* isSaturated,
0072 char* acState,
0073 int max_iterations) const {
0074
0075
0076 constexpr auto NSAMPLES = SampleMatrix::RowsAtCompileTime;
0077 constexpr auto NPULSES = SampleMatrix::ColsAtCompileTime;
0078 static_assert(NSAMPLES == NPULSES);
0079
0080 using DataType = SampleVector::Scalar;
0081
0082 auto const elemsPerBlock(alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u]);
0083
0084 auto const nchannelsEB = digisDevEB.size();
0085 auto const nchannels = nchannelsEB + digisDevEE.size();
0086 auto const offsetForHashes = conditionsDev.offsetEE();
0087
0088 auto const* pulse_covariance = reinterpret_cast<const EcalPulseCovariance*>(conditionsDev.pulseCovariance());
0089
0090
0091 DataType* shrmem = alpaka::getDynSharedMem<DataType>(acc);
0092
0093
0094 for (auto idx : cms::alpakatools::uniform_elements(acc, nchannels)) {
0095 if (static_cast<MinimizationState>(acState[idx]) == MinimizationState::Precomputed)
0096 continue;
0097
0098 auto const elemIdx = idx % elemsPerBlock;
0099
0100
0101 DataType* shrMatrixLForFnnlsStorage = shrmem + calo::multifit::MapSymM<DataType, NPULSES>::total * elemIdx;
0102 DataType* shrAtAStorage =
0103 shrmem + calo::multifit::MapSymM<DataType, NPULSES>::total * (elemIdx + elemsPerBlock);
0104
0105 auto* amplitudes =
0106 reinterpret_cast<SampleVector*>(idx >= nchannelsEB ? uncalibRecHitsEE.outOfTimeAmplitudes()->data()
0107 : uncalibRecHitsEB.outOfTimeAmplitudes()->data());
0108 auto* energies = idx >= nchannelsEB ? uncalibRecHitsEE.amplitude() : uncalibRecHitsEB.amplitude();
0109 auto* chi2s = idx >= nchannelsEB ? uncalibRecHitsEE.chi2() : uncalibRecHitsEB.chi2();
0110
0111
0112 int const inputCh = idx >= nchannelsEB ? idx - nchannelsEB : idx;
0113 auto const* dids = idx >= nchannelsEB ? digisDevEE.id() : digisDevEB.id();
0114 auto const did = DetId{dids[inputCh]};
0115 auto const isBarrel = did.subdetId() == EcalBarrel;
0116 auto const hashedId = isBarrel ? ecal::reconstruction::hashedIndexEB(did.rawId())
0117 : offsetForHashes + ecal::reconstruction::hashedIndexEE(did.rawId());
0118
0119
0120 int npassive = 0;
0121
0122 calo::multifit::ColumnVector<NPULSES, int> pulseOffsets;
0123 CMS_UNROLL_LOOP
0124 for (int i = 0; i < NPULSES; ++i)
0125 pulseOffsets(i) = i;
0126
0127 calo::multifit::ColumnVector<NPULSES, DataType> resultAmplitudes;
0128 CMS_UNROLL_LOOP
0129 for (int counter = 0; counter < NPULSES; ++counter)
0130 resultAmplitudes(counter) = 0;
0131
0132
0133
0134
0135
0136 float chi2 = 0, chi2_now = 0;
0137
0138
0139 for (int iter = 0; iter < max_iterations; ++iter) {
0140
0141
0142 DataType* covMatrixStorage = shrMatrixLForFnnlsStorage;
0143 calo::multifit::MapSymM<DataType, NSAMPLES> covMatrix{covMatrixStorage};
0144 int counter = 0;
0145 CMS_UNROLL_LOOP
0146 for (int col = 0; col < NSAMPLES; ++col) {
0147 CMS_UNROLL_LOOP
0148 for (int row = col; row < NSAMPLES; ++row) {
0149 covMatrixStorage[counter++] = noisecov[idx].coeffRef(row, col);
0150 }
0151 }
0152 update_covariance(pulse_covariance[hashedId], covMatrix, resultAmplitudes);
0153
0154
0155
0156
0157 DataType matrixLStorage[calo::multifit::MapSymM<DataType, NSAMPLES>::total];
0158 calo::multifit::MapSymM<DataType, NSAMPLES> matrixL{matrixLStorage};
0159 calo::multifit::compute_decomposition_unrolled(matrixL, covMatrix);
0160
0161
0162 calo::multifit::ColMajorMatrix<NSAMPLES, NPULSES> A;
0163 calo::multifit::solve_forward_subst_matrix(A, pulse_matrix[idx], matrixL);
0164
0165
0166 float reg_b[NSAMPLES];
0167 calo::multifit::solve_forward_subst_vector(reg_b, samples[idx], matrixL);
0168
0169
0170
0171 calo::multifit::MapSymM<DataType, NPULSES> AtA{shrAtAStorage};
0172
0173 SampleVector Atb;
0174 CMS_UNROLL_LOOP
0175 for (int icol = 0; icol < NPULSES; ++icol) {
0176 float reg_ai[NSAMPLES];
0177
0178
0179 CMS_UNROLL_LOOP
0180 for (int counter = 0; counter < NSAMPLES; ++counter)
0181 reg_ai[counter] = A(counter, icol);
0182
0183
0184 float sum = 0.f;
0185 CMS_UNROLL_LOOP
0186 for (int counter = 0; counter < NSAMPLES; ++counter)
0187 sum += reg_ai[counter] * reg_ai[counter];
0188
0189
0190 AtA(icol, icol) = sum;
0191
0192
0193 CMS_UNROLL_LOOP
0194 for (int j = icol + 1; j < NPULSES; ++j) {
0195
0196 float reg_aj[NSAMPLES];
0197 CMS_UNROLL_LOOP
0198 for (int counter = 0; counter < NSAMPLES; ++counter)
0199 reg_aj[counter] = A(counter, j);
0200
0201
0202 float sum = 0.f;
0203 CMS_UNROLL_LOOP
0204 for (int counter = 0; counter < NSAMPLES; ++counter)
0205 sum += reg_aj[counter] * reg_ai[counter];
0206
0207
0208
0209 AtA(j, icol) = sum;
0210 }
0211
0212
0213 float sum_atb = 0.f;
0214 CMS_UNROLL_LOOP
0215 for (int counter = 0; counter < NSAMPLES; ++counter)
0216 sum_atb += reg_ai[counter] * reg_b[counter];
0217
0218
0219 Atb(icol) = sum_atb;
0220 }
0221
0222
0223
0224 calo::multifit::MapSymM<DataType, NPULSES> matrixLForFnnls{shrMatrixLForFnnlsStorage};
0225
0226 calo::multifit::fnnls(AtA,
0227 Atb,
0228
0229 resultAmplitudes,
0230 npassive,
0231 pulseOffsets,
0232 matrixLForFnnls,
0233 1e-11,
0234 500,
0235 16,
0236 2);
0237
0238 calo::multifit::calculateChiSq(matrixL, pulse_matrix[idx], resultAmplitudes, samples[idx], chi2_now);
0239
0240 auto const deltachi2 = chi2_now - chi2;
0241 chi2 = chi2_now;
0242
0243 if (std::abs(deltachi2) < 1e-3)
0244 break;
0245 }
0246
0247
0248
0249 chi2s[inputCh] = chi2;
0250 energies[inputCh] = resultAmplitudes(5);
0251
0252 CMS_UNROLL_LOOP
0253 for (int counter = 0; counter < NPULSES; ++counter)
0254 amplitudes[inputCh](counter) = resultAmplitudes(counter);
0255 }
0256 }
0257 };
0258
0259 void minimization_procedure(Queue& queue,
0260 InputProduct const& digisDevEB,
0261 InputProduct const& digisDevEE,
0262 OutputProduct& uncalibRecHitsDevEB,
0263 OutputProduct& uncalibRecHitsDevEE,
0264 EventDataForScratchDevice& scratch,
0265 EcalMultifitConditionsDevice const& conditionsDev,
0266 ConfigurationParameters const& configParams,
0267 uint32_t const totalChannels) {
0268 using DataType = SampleVector::Scalar;
0269
0270 auto threads_min = configParams.kernelMinimizeThreads[0];
0271 auto blocks_min = cms::alpakatools::divide_up_by(totalChannels, threads_min);
0272
0273 auto workDivMinimize = cms::alpakatools::make_workdiv<Acc1D>(blocks_min, threads_min);
0274 alpaka::exec<Acc1D>(queue,
0275 workDivMinimize,
0276 Kernel_minimize{},
0277 digisDevEB.const_view(),
0278 digisDevEE.const_view(),
0279 uncalibRecHitsDevEB.view(),
0280 uncalibRecHitsDevEE.view(),
0281 conditionsDev.const_view(),
0282 reinterpret_cast<::ecal::multifit::SampleMatrix*>(scratch.noisecovDevBuf.data()),
0283 reinterpret_cast<::ecal::multifit::PulseMatrixType*>(scratch.pulse_matrixDevBuf.data()),
0284 reinterpret_cast<::ecal::multifit::BXVectorType*>(scratch.activeBXsDevBuf.data()),
0285 reinterpret_cast<::ecal::multifit::SampleVector*>(scratch.samplesDevBuf.data()),
0286 scratch.hasSwitchToGain6DevBuf.data(),
0287 scratch.hasSwitchToGain1DevBuf.data(),
0288 scratch.isSaturatedDevBuf.data(),
0289 scratch.acStateDevBuf.data(),
0290 50);
0291 }
0292
0293 }
0294
0295 namespace alpaka::trait {
0296 using namespace ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit;
0297
0298
0299 template <typename TAcc>
0300 struct BlockSharedMemDynSizeBytes<Kernel_minimize, TAcc> {
0301
0302 template <typename TVec, typename... TArgs>
0303 ALPAKA_FN_HOST_ACC static auto getBlockSharedMemDynSizeBytes(Kernel_minimize const&,
0304 TVec const& threadsPerBlock,
0305 TVec const& elemsPerThread,
0306 TArgs const&...) -> std::size_t {
0307 using ScalarType = ecal::multifit::SampleVector::Scalar;
0308
0309
0310 std::size_t bytes = 2 * threadsPerBlock[0u] * elemsPerThread[0u] *
0311 calo::multifit::MapSymM<ScalarType, ecal::multifit::SampleVector::RowsAtCompileTime>::total *
0312 sizeof(ScalarType);
0313 return bytes;
0314 }
0315 };
0316 }