File indexing completed on 2025-01-22 07:34:22
0001 #include <cmath>
0002 #include <limits>
0003 #include <alpaka/alpaka.hpp>
0004
0005 #include "CondFormats/EcalObjects/interface/EcalPulseCovariances.h"
0006 #include "DataFormats/CaloRecHit/interface/MultifitComputations.h"
0007 #include "FWCore/Utilities/interface/CMSUnrollLoop.h"
0008 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
0009
0010 #include "AmplitudeComputationKernels.h"
0011 #include "KernelHelpers.h"
0012 #include "EcalUncalibRecHitMultiFitAlgoPortable.h"
0013
0014 namespace ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit {
0015
0016 using namespace ::ecal::multifit;
0017
0018 template <typename MatrixType>
0019 ALPAKA_FN_ACC ALPAKA_FN_INLINE void update_covariance(EcalPulseCovariance const& pulse_covariance,
0020 MatrixType& inverse_cov,
0021 SampleVector const& amplitudes) {
0022 constexpr auto nsamples = SampleVector::RowsAtCompileTime;
0023 constexpr auto npulses = BXVectorType::RowsAtCompileTime;
0024
0025 CMS_UNROLL_LOOP
0026 for (unsigned int ipulse = 0; ipulse < npulses; ++ipulse) {
0027 auto const amplitude = amplitudes.coeff(ipulse);
0028 if (amplitude == 0)
0029 continue;
0030
0031
0032 int bx = ipulse - 5;
0033 int first_sample_t = std::max(0, bx + 3);
0034 int offset = -3 - bx;
0035
0036 auto const value_sq = amplitude * amplitude;
0037
0038 for (int col = first_sample_t; col < nsamples; ++col) {
0039 for (int row = col; row < nsamples; ++row) {
0040 inverse_cov(row, col) += value_sq * pulse_covariance.covval[row + offset][col + offset];
0041 }
0042 }
0043 }
0044 }
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056 class Kernel_minimize {
0057 public:
0058 ALPAKA_FN_ACC void operator()(Acc1D const& acc,
0059 InputProduct::ConstView const& digisDevEB,
0060 InputProduct::ConstView const& digisDevEE,
0061 OutputProduct::View uncalibRecHitsEB,
0062 OutputProduct::View uncalibRecHitsEE,
0063 EcalMultifitConditionsDevice::ConstView conditionsDev,
0064 ::ecal::multifit::SampleMatrix const* noisecov,
0065 ::ecal::multifit::PulseMatrixType const* pulse_matrix,
0066 ::ecal::multifit::BXVectorType* bxs,
0067 ::ecal::multifit::SampleVector const* samples,
0068 bool* hasSwitchToGain6,
0069 bool* hasSwitchToGain1,
0070 bool* isSaturated,
0071 char* acState,
0072 int max_iterations) const {
0073
0074
0075 constexpr auto NSAMPLES = SampleMatrix::RowsAtCompileTime;
0076 constexpr auto NPULSES = SampleMatrix::ColsAtCompileTime;
0077 static_assert(NSAMPLES == NPULSES);
0078
0079 using DataType = SampleVector::Scalar;
0080
0081 auto const elemsPerBlock(alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u]);
0082
0083 auto const nchannelsEB = digisDevEB.size();
0084 auto const nchannels = nchannelsEB + digisDevEE.size();
0085 auto const offsetForHashes = conditionsDev.offsetEE();
0086
0087 auto const* pulse_covariance = reinterpret_cast<const EcalPulseCovariance*>(conditionsDev.pulseCovariance());
0088
0089
0090 DataType* shrmem = alpaka::getDynSharedMem<DataType>(acc);
0091
0092
0093 for (auto idx : cms::alpakatools::uniform_elements(acc, nchannels)) {
0094 if (static_cast<MinimizationState>(acState[idx]) == MinimizationState::Precomputed)
0095 continue;
0096
0097 auto const elemIdx = idx % elemsPerBlock;
0098
0099
0100 DataType* shrMatrixLForFnnlsStorage = shrmem + calo::multifit::MapSymM<DataType, NPULSES>::total * elemIdx;
0101 DataType* shrAtAStorage =
0102 shrmem + calo::multifit::MapSymM<DataType, NPULSES>::total * (elemIdx + elemsPerBlock);
0103
0104 auto* amplitudes =
0105 reinterpret_cast<SampleVector*>(idx >= nchannelsEB ? uncalibRecHitsEE.outOfTimeAmplitudes()->data()
0106 : uncalibRecHitsEB.outOfTimeAmplitudes()->data());
0107 auto* energies = idx >= nchannelsEB ? uncalibRecHitsEE.amplitude() : uncalibRecHitsEB.amplitude();
0108 auto* chi2s = idx >= nchannelsEB ? uncalibRecHitsEE.chi2() : uncalibRecHitsEB.chi2();
0109
0110
0111 int const inputCh = idx >= nchannelsEB ? idx - nchannelsEB : idx;
0112 auto const* dids = idx >= nchannelsEB ? digisDevEE.id() : digisDevEB.id();
0113 auto const did = DetId{dids[inputCh]};
0114 auto const isBarrel = did.subdetId() == EcalBarrel;
0115 auto const hashedId = isBarrel ? ecal::reconstruction::hashedIndexEB(did.rawId())
0116 : offsetForHashes + ecal::reconstruction::hashedIndexEE(did.rawId());
0117
0118
0119 int npassive = 0;
0120
0121 calo::multifit::ColumnVector<NPULSES, int> pulseOffsets;
0122 CMS_UNROLL_LOOP
0123 for (int i = 0; i < NPULSES; ++i)
0124 pulseOffsets(i) = i;
0125
0126 calo::multifit::ColumnVector<NPULSES, DataType> resultAmplitudes;
0127 CMS_UNROLL_LOOP
0128 for (int counter = 0; counter < NPULSES; ++counter)
0129 resultAmplitudes(counter) = 0;
0130
0131
0132
0133
0134
0135 float chi2 = 0, chi2_now = 0;
0136
0137
0138 for (int iter = 0; iter < max_iterations; ++iter) {
0139
0140
0141 DataType* covMatrixStorage = shrMatrixLForFnnlsStorage;
0142 calo::multifit::MapSymM<DataType, NSAMPLES> covMatrix{covMatrixStorage};
0143 int counter = 0;
0144 CMS_UNROLL_LOOP
0145 for (int col = 0; col < NSAMPLES; ++col) {
0146 CMS_UNROLL_LOOP
0147 for (int row = col; row < NSAMPLES; ++row) {
0148 covMatrixStorage[counter++] = noisecov[idx].coeffRef(row, col);
0149 }
0150 }
0151 update_covariance(pulse_covariance[hashedId], covMatrix, resultAmplitudes);
0152
0153
0154
0155
0156 DataType matrixLStorage[calo::multifit::MapSymM<DataType, NSAMPLES>::total];
0157 calo::multifit::MapSymM<DataType, NSAMPLES> matrixL{matrixLStorage};
0158 calo::multifit::compute_decomposition_unrolled(matrixL, covMatrix);
0159
0160
0161 calo::multifit::ColMajorMatrix<NSAMPLES, NPULSES> A;
0162 calo::multifit::solve_forward_subst_matrix(A, pulse_matrix[idx], matrixL);
0163
0164
0165 float reg_b[NSAMPLES];
0166 calo::multifit::solve_forward_subst_vector(reg_b, samples[idx], matrixL);
0167
0168
0169
0170 calo::multifit::MapSymM<DataType, NPULSES> AtA{shrAtAStorage};
0171
0172 SampleVector Atb;
0173 CMS_UNROLL_LOOP
0174 for (int icol = 0; icol < NPULSES; ++icol) {
0175 float reg_ai[NSAMPLES];
0176
0177
0178 CMS_UNROLL_LOOP
0179 for (int counter = 0; counter < NSAMPLES; ++counter)
0180 reg_ai[counter] = A(counter, icol);
0181
0182
0183 float sum = 0.f;
0184 CMS_UNROLL_LOOP
0185 for (int counter = 0; counter < NSAMPLES; ++counter)
0186 sum += reg_ai[counter] * reg_ai[counter];
0187
0188
0189 AtA(icol, icol) = sum;
0190
0191
0192 CMS_UNROLL_LOOP
0193 for (int j = icol + 1; j < NPULSES; ++j) {
0194
0195 float reg_aj[NSAMPLES];
0196 CMS_UNROLL_LOOP
0197 for (int counter = 0; counter < NSAMPLES; ++counter)
0198 reg_aj[counter] = A(counter, j);
0199
0200
0201 float sum = 0.f;
0202 CMS_UNROLL_LOOP
0203 for (int counter = 0; counter < NSAMPLES; ++counter)
0204 sum += reg_aj[counter] * reg_ai[counter];
0205
0206
0207
0208 AtA(j, icol) = sum;
0209 }
0210
0211
0212 float sum_atb = 0.f;
0213 CMS_UNROLL_LOOP
0214 for (int counter = 0; counter < NSAMPLES; ++counter)
0215 sum_atb += reg_ai[counter] * reg_b[counter];
0216
0217
0218 Atb(icol) = sum_atb;
0219 }
0220
0221
0222
0223 calo::multifit::MapSymM<DataType, NPULSES> matrixLForFnnls{shrMatrixLForFnnlsStorage};
0224
0225 calo::multifit::fnnls(AtA,
0226 Atb,
0227
0228 resultAmplitudes,
0229 npassive,
0230 pulseOffsets,
0231 matrixLForFnnls,
0232 1e-11,
0233 500,
0234 16,
0235 2);
0236
0237 calo::multifit::calculateChiSq(matrixL, pulse_matrix[idx], resultAmplitudes, samples[idx], chi2_now);
0238
0239 auto const deltachi2 = chi2_now - chi2;
0240 chi2 = chi2_now;
0241
0242 if (std::abs(deltachi2) < 1e-3)
0243 break;
0244 }
0245
0246
0247
0248 chi2s[inputCh] = chi2;
0249 energies[inputCh] = resultAmplitudes(5);
0250
0251 CMS_UNROLL_LOOP
0252 for (int counter = 0; counter < NPULSES; ++counter)
0253 amplitudes[inputCh](counter) = resultAmplitudes(counter);
0254 }
0255 }
0256 };
0257
0258 void minimization_procedure(Queue& queue,
0259 InputProduct const& digisDevEB,
0260 InputProduct const& digisDevEE,
0261 OutputProduct& uncalibRecHitsDevEB,
0262 OutputProduct& uncalibRecHitsDevEE,
0263 EventDataForScratchDevice& scratch,
0264 EcalMultifitConditionsDevice const& conditionsDev,
0265 ConfigurationParameters const& configParams,
0266 uint32_t const totalChannels) {
0267 using DataType = SampleVector::Scalar;
0268
0269 auto threads_min = configParams.kernelMinimizeThreads[0];
0270 auto blocks_min = cms::alpakatools::divide_up_by(totalChannels, threads_min);
0271
0272 auto workDivMinimize = cms::alpakatools::make_workdiv<Acc1D>(blocks_min, threads_min);
0273 alpaka::exec<Acc1D>(queue,
0274 workDivMinimize,
0275 Kernel_minimize{},
0276 digisDevEB.const_view(),
0277 digisDevEE.const_view(),
0278 uncalibRecHitsDevEB.view(),
0279 uncalibRecHitsDevEE.view(),
0280 conditionsDev.const_view(),
0281 reinterpret_cast<::ecal::multifit::SampleMatrix*>(scratch.noisecovDevBuf.data()),
0282 reinterpret_cast<::ecal::multifit::PulseMatrixType*>(scratch.pulse_matrixDevBuf.data()),
0283 reinterpret_cast<::ecal::multifit::BXVectorType*>(scratch.activeBXsDevBuf.data()),
0284 reinterpret_cast<::ecal::multifit::SampleVector*>(scratch.samplesDevBuf.data()),
0285 scratch.hasSwitchToGain6DevBuf.data(),
0286 scratch.hasSwitchToGain1DevBuf.data(),
0287 scratch.isSaturatedDevBuf.data(),
0288 scratch.acStateDevBuf.data(),
0289 50);
0290 }
0291
0292 }
0293
0294 namespace alpaka::trait {
0295 using namespace ALPAKA_ACCELERATOR_NAMESPACE;
0296 using namespace ALPAKA_ACCELERATOR_NAMESPACE::ecal::multifit;
0297
0298
0299 template <>
0300 struct BlockSharedMemDynSizeBytes<Kernel_minimize, Acc1D> {
0301
0302 template <typename TVec, typename... TArgs>
0303 ALPAKA_FN_HOST_ACC static auto getBlockSharedMemDynSizeBytes(Kernel_minimize const&,
0304 TVec const& threadsPerBlock,
0305 TVec const& elemsPerThread,
0306 TArgs const&...) -> std::size_t {
0307 using ScalarType = ::ecal::multifit::SampleVector::Scalar;
0308
0309
0310 std::size_t bytes =
0311 2 * threadsPerBlock[0u] * elemsPerThread[0u] *
0312 calo::multifit::MapSymM<ScalarType, ::ecal::multifit::SampleVector::RowsAtCompileTime>::total *
0313 sizeof(ScalarType);
0314 return bytes;
0315 }
0316 };
0317 }