File indexing completed on 2024-04-06 12:03:48
0001 #ifndef DataFormats_CaloRecHit_interface_MultifitComputations_h
0002 #define DataFormats_CaloRecHit_interface_MultifitComputations_h
0003
0004 #include <cmath>
0005 #include <limits>
0006 #include <type_traits>
0007
0008 #include <Eigen/Dense>
0009
0010 #include "FWCore/Utilities/interface/CMSUnrollLoop.h"
0011
0012 namespace calo {
0013 namespace multifit {
0014
0015 template <int NROWS, int NCOLS>
0016 using ColMajorMatrix = Eigen::Matrix<float, NROWS, NCOLS, Eigen::ColMajor>;
0017
0018 template <int NROWS, int NCOLS>
0019 using RowMajorMatrix = Eigen::Matrix<float, NROWS, NCOLS, Eigen::RowMajor>;
0020
0021 template <int SIZE, typename T = float>
0022 using ColumnVector = Eigen::Matrix<T, SIZE, 1>;
0023
0024 template <int SIZE, typename T = float>
0025 using RowVector = Eigen::Matrix<T, 1, SIZE>;
0026
0027
0028 template <typename T, int Stride, int Order = Eigen::ColMajor>
0029 struct MapSymM {
0030 using type = T;
0031 using base_type = typename std::remove_const<type>::type;
0032
0033 static constexpr int total = Stride * (Stride + 1) / 2;
0034 static constexpr int stride = Stride;
0035 T* data;
0036
0037 EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC MapSymM(T* data) : data{data} {}
0038
0039 EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC T const& operator()(int const row, int const col) const {
0040 auto const tmp = (Stride - col) * (Stride - col + 1) / 2;
0041 auto const index = total - tmp + row - col;
0042 return data[index];
0043 }
0044
0045 template <typename U = T>
0046 EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC typename std::enable_if<std::is_same<base_type, U>::value, base_type>::type&
0047 operator()(int const row, int const col) {
0048 auto const tmp = (Stride - col) * (Stride - col + 1) / 2;
0049 auto const index = total - tmp + row - col;
0050 return data[index];
0051 }
0052 };
0053
0054
0055
0056
0057 template <typename T>
0058 struct MapMForPM {
0059 using type = T;
0060 using base_type = typename std::remove_cv<type>::type;
0061
0062 type* data;
0063 EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC MapMForPM(type* data) : data{data} {}
0064
0065 EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC base_type operator()(int const row, int const col) const {
0066 auto const index = 2 - col + row;
0067 return index >= 0 ? data[index] : 0;
0068 }
0069 };
0070
0071
0072 template <typename MatrixType1, typename MatrixType2>
0073 EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void compute_decomposition_unrolled(MatrixType1& L, MatrixType2 const& M) {
0074 auto const sqrtm_0_0 = std::sqrt(M(0, 0));
0075 L(0, 0) = sqrtm_0_0;
0076 using T = typename MatrixType1::base_type;
0077
0078 CMS_UNROLL_LOOP
0079 for (int i = 1; i < MatrixType1::stride; i++) {
0080 T sumsq{0};
0081 for (int j = 0; j < i; j++) {
0082 T sumsq2{0};
0083 auto const m_i_j = M(i, j);
0084 for (int k = 0; k < j; ++k)
0085 sumsq2 += L(i, k) * L(j, k);
0086
0087 auto const value_i_j = (m_i_j - sumsq2) / L(j, j);
0088 L(i, j) = value_i_j;
0089
0090 sumsq += value_i_j * value_i_j;
0091 }
0092
0093 auto const l_i_i = std::sqrt(M(i, i) - sumsq);
0094 L(i, i) = l_i_i;
0095 }
0096 }
0097
0098 template <typename MatrixType1, typename MatrixType2>
0099 EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void compute_decomposition(MatrixType1& L,
0100 MatrixType2 const& M,
0101 int const N) {
0102 auto const sqrtm_0_0 = std::sqrt(M(0, 0));
0103 L(0, 0) = sqrtm_0_0;
0104 using T = typename MatrixType1::base_type;
0105
0106 for (int i = 1; i < N; i++) {
0107 T sumsq{0};
0108 for (int j = 0; j < i; j++) {
0109 T sumsq2{0};
0110 auto const m_i_j = M(i, j);
0111 for (int k = 0; k < j; ++k)
0112 sumsq2 += L(i, k) * L(j, k);
0113
0114 auto const value_i_j = (m_i_j - sumsq2) / L(j, j);
0115 L(i, j) = value_i_j;
0116
0117 sumsq += value_i_j * value_i_j;
0118 }
0119
0120 auto const l_i_i = std::sqrt(M(i, i) - sumsq);
0121 L(i, i) = l_i_i;
0122 }
0123 }
0124
0125 template <typename MatrixType1, typename MatrixType2, typename VectorType>
0126 EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void compute_decomposition_forwardsubst_with_offsets(
0127 MatrixType1& L,
0128 MatrixType2 const& M,
0129 float b[MatrixType1::stride],
0130 VectorType const& Atb,
0131 int const N,
0132 ColumnVector<MatrixType1::stride, int> const& pulseOffsets) {
0133 auto const real_0 = pulseOffsets(0);
0134 auto const sqrtm_0_0 = std::sqrt(M(real_0, real_0));
0135 L(0, 0) = sqrtm_0_0;
0136 using T = typename MatrixType1::base_type;
0137 b[0] = Atb(real_0) / sqrtm_0_0;
0138
0139 for (int i = 1; i < N; i++) {
0140 auto const i_real = pulseOffsets(i);
0141 T sumsq{0};
0142 T total = 0;
0143 auto const atb = Atb(i_real);
0144 for (int j = 0; j < i; j++) {
0145 auto const j_real = pulseOffsets(j);
0146 T sumsq2{0};
0147 auto const m_i_j = M(std::max(i_real, j_real), std::min(i_real, j_real));
0148 for (int k = 0; k < j; ++k)
0149 sumsq2 += L(i, k) * L(j, k);
0150
0151 auto const value_i_j = (m_i_j - sumsq2) / L(j, j);
0152 L(i, j) = value_i_j;
0153
0154 sumsq += value_i_j * value_i_j;
0155 total += value_i_j * b[j];
0156 }
0157
0158 auto const l_i_i = std::sqrt(M(i_real, i_real) - sumsq);
0159 L(i, i) = l_i_i;
0160 b[i] = (atb - total) / l_i_i;
0161 }
0162 }
0163
0164 template <typename MatrixType1, typename MatrixType2, typename VectorType>
0165 EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void update_decomposition_forwardsubst_with_offsets(
0166 MatrixType1& L,
0167 MatrixType2 const& M,
0168 float b[MatrixType1::stride],
0169 VectorType const& Atb,
0170 int const N,
0171 ColumnVector<MatrixType1::stride, int> const& pulseOffsets) {
0172 using T = typename MatrixType1::base_type;
0173 auto const i = N - 1;
0174 auto const i_real = pulseOffsets(i);
0175 T sumsq{0};
0176 T total = 0;
0177 for (int j = 0; j < i; j++) {
0178 auto const j_real = pulseOffsets(j);
0179 T sumsq2{0};
0180 auto const m_i_j = M(std::max(i_real, j_real), std::min(i_real, j_real));
0181 for (int k = 0; k < j; ++k)
0182 sumsq2 += L(i, k) * L(j, k);
0183
0184 auto const value_i_j = (m_i_j - sumsq2) / L(j, j);
0185 L(i, j) = value_i_j;
0186 sumsq += value_i_j * value_i_j;
0187
0188 total += value_i_j * b[j];
0189 }
0190
0191 auto const l_i_i = std::sqrt(M(i_real, i_real) - sumsq);
0192 L(i, i) = l_i_i;
0193 b[i] = (Atb(i_real) - total) / l_i_i;
0194 }
0195
0196 template <typename MatrixType1, typename MatrixType2, typename MatrixType3>
0197 EIGEN_DEVICE_FUNC void solve_forward_subst_matrix(MatrixType1& A,
0198 MatrixType2 const& pulseMatrixView,
0199 MatrixType3 const& matrixL) {
0200
0201 constexpr auto NPULSES = MatrixType2::ColsAtCompileTime;
0202 constexpr auto NSAMPLES = MatrixType2::RowsAtCompileTime;
0203
0204 CMS_UNROLL_LOOP
0205 for (int icol = 0; icol < NPULSES; icol++) {
0206 float reg_b[NSAMPLES];
0207 float reg_L[NSAMPLES];
0208
0209
0210 CMS_UNROLL_LOOP
0211 for (int i = 0; i < NSAMPLES; i++) {
0212 #ifdef __CUDA_ARCH__
0213
0214 reg_b[i] = __ldg(&pulseMatrixView.coeffRef(i, icol));
0215 #else
0216 reg_b[i] = pulseMatrixView.coeffRef(i, icol);
0217 #endif
0218 reg_L[i] = matrixL(i, 0);
0219 }
0220
0221
0222 auto x_prev = reg_b[0] / reg_L[0];
0223 A(0, icol) = x_prev;
0224
0225
0226 CMS_UNROLL_LOOP
0227 for (int iL = 1; iL < NSAMPLES; iL++) {
0228
0229 CMS_UNROLL_LOOP
0230 for (int counter = iL; counter < NSAMPLES; counter++)
0231 reg_b[counter] -= x_prev * reg_L[counter];
0232
0233
0234 CMS_UNROLL_LOOP
0235 for (int counter = iL; counter < NSAMPLES; counter++)
0236 reg_L[counter] = matrixL(counter, iL);
0237
0238
0239 x_prev = reg_b[iL] / reg_L[iL];
0240
0241
0242 A(iL, icol) = x_prev;
0243 }
0244 }
0245 }
0246
0247 template <typename MatrixType1, typename MatrixType2>
0248 EIGEN_DEVICE_FUNC void solve_forward_subst_vector(float reg_b[MatrixType1::RowsAtCompileTime],
0249 MatrixType1 inputAmplitudesView,
0250 MatrixType2 matrixL) {
0251 constexpr auto NSAMPLES = MatrixType1::RowsAtCompileTime;
0252
0253 float reg_b_tmp[NSAMPLES];
0254 float reg_L[NSAMPLES];
0255
0256
0257 CMS_UNROLL_LOOP
0258 for (int i = 0; i < NSAMPLES; i++) {
0259 reg_b_tmp[i] = inputAmplitudesView(i);
0260 reg_L[i] = matrixL(i, 0);
0261 }
0262
0263
0264 auto x_prev = reg_b_tmp[0] / reg_L[0];
0265 reg_b[0] = x_prev;
0266
0267
0268 CMS_UNROLL_LOOP
0269 for (int iL = 1; iL < NSAMPLES; iL++) {
0270
0271 CMS_UNROLL_LOOP
0272 for (int counter = iL; counter < NSAMPLES; counter++)
0273 reg_b_tmp[counter] -= x_prev * reg_L[counter];
0274
0275
0276 CMS_UNROLL_LOOP
0277 for (int counter = iL; counter < NSAMPLES; counter++)
0278 reg_L[counter] = matrixL(counter, iL);
0279
0280
0281 x_prev = reg_b_tmp[iL] / reg_L[iL];
0282
0283
0284 reg_b[iL] = x_prev;
0285 }
0286 }
0287
0288 template <typename MatrixType1, typename MatrixType2, typename MatrixType3, typename MatrixType4>
0289 EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void calculateChiSq(MatrixType1 const& matrixL,
0290 MatrixType2 const& pulseMatrixView,
0291 MatrixType3 const& resultAmplitudesVector,
0292 MatrixType4 const& inputAmplitudesView,
0293 float& chi2) {
0294
0295 constexpr auto NPULSES = MatrixType2::ColsAtCompileTime;
0296 constexpr auto NSAMPLES = MatrixType2::RowsAtCompileTime;
0297
0298
0299
0300 float accum[NSAMPLES];
0301 {
0302 float results[NPULSES];
0303
0304
0305 CMS_UNROLL_LOOP
0306 for (int counter = 0; counter < NPULSES; counter++) {
0307 results[counter] = resultAmplitudesVector[counter];
0308 }
0309
0310
0311 CMS_UNROLL_LOOP
0312 for (int counter = 0; counter < NSAMPLES; counter++)
0313 accum[counter] = -inputAmplitudesView(counter);
0314
0315
0316 for (int icol = 0; icol < NPULSES; icol++) {
0317 float pm_col[NSAMPLES];
0318
0319
0320 CMS_UNROLL_LOOP
0321 for (int counter = 0; counter < NSAMPLES; counter++)
0322 #ifdef __CUDA_ARCH__
0323 pm_col[counter] = __ldg(&pulseMatrixView.coeffRef(counter, icol));
0324 #else
0325 pm_col[counter] = pulseMatrixView.coeffRef(counter, icol);
0326 #endif
0327
0328
0329 CMS_UNROLL_LOOP
0330 for (int counter = 0; counter < NSAMPLES; counter++)
0331 accum[counter] += results[icol] * pm_col[counter];
0332 }
0333 }
0334
0335
0336
0337
0338
0339
0340
0341
0342 {
0343 float reg_L[NSAMPLES];
0344 float accumSum = 0;
0345
0346
0347 CMS_UNROLL_LOOP
0348 for (int i = 0; i < NSAMPLES; i++) {
0349 reg_L[i] = matrixL(i, 0);
0350 }
0351
0352
0353 auto x_prev = accum[0] / reg_L[0];
0354 accumSum += x_prev * x_prev;
0355
0356
0357 CMS_UNROLL_LOOP
0358 for (int iL = 1; iL < NSAMPLES; iL++) {
0359
0360 CMS_UNROLL_LOOP
0361 for (int counter = iL; counter < NSAMPLES; counter++)
0362 accum[counter] -= x_prev * reg_L[counter];
0363
0364
0365 CMS_UNROLL_LOOP
0366 for (int counter = iL; counter < NSAMPLES; counter++)
0367 reg_L[counter] = matrixL(counter, iL);
0368
0369
0370 x_prev = accum[iL] / reg_L[iL];
0371
0372
0373 accumSum += x_prev * x_prev;
0374 }
0375
0376 chi2 = accumSum;
0377 }
0378 }
0379
0380
0381 template <typename MatrixType, typename VectorType>
0382 EIGEN_DEVICE_FUNC void fnnls(MatrixType const& AtA,
0383 VectorType const& Atb,
0384 VectorType& solution,
0385 int& npassive,
0386 ColumnVector<VectorType::RowsAtCompileTime, int>& pulseOffsets,
0387 MapSymM<float, VectorType::RowsAtCompileTime>& matrixL,
0388 double eps,
0389 const int maxIterations,
0390 const int relaxationPeriod,
0391 const int relaxationFactor) {
0392
0393 constexpr auto NPULSES = VectorType::RowsAtCompileTime;
0394
0395
0396 Eigen::Index w_max_idx_prev = 0;
0397 float w_max_prev = 0;
0398 bool recompute = false;
0399
0400
0401 VectorType s;
0402 float reg_b[NPULSES];
0403
0404
0405
0406 int iter = 0;
0407 while (true) {
0408 if (iter > 0 || npassive == 0) {
0409 auto const nactive = NPULSES - npassive;
0410
0411 if (nactive == 0)
0412 break;
0413
0414
0415
0416 Eigen::Index w_max_idx = 0;
0417 float w_max = -std::numeric_limits<float>::max();
0418 for (int icol = npassive; icol < NPULSES; icol++) {
0419 auto const icol_real = pulseOffsets(icol);
0420 auto const atb = Atb(icol_real);
0421 float sum = 0;
0422 CMS_UNROLL_LOOP
0423 for (int counter = 0; counter < NPULSES; counter++)
0424 sum += counter > icol_real ? AtA(counter, icol_real) * solution(counter)
0425 : AtA(icol_real, counter) * solution(counter);
0426
0427 auto const w = atb - sum;
0428 if (w > w_max) {
0429 w_max = w;
0430 w_max_idx = icol - npassive;
0431 }
0432 }
0433
0434
0435 if (w_max < eps || (w_max_idx == w_max_idx_prev && w_max == w_max_prev))
0436 break;
0437
0438 if (iter >= maxIterations)
0439 break;
0440
0441 w_max_prev = w_max;
0442 w_max_idx_prev = w_max_idx;
0443
0444
0445 w_max_idx += npassive;
0446
0447 Eigen::numext::swap(pulseOffsets.coeffRef(npassive), pulseOffsets.coeffRef(w_max_idx));
0448 ++npassive;
0449 }
0450
0451
0452 while (true) {
0453 if (npassive == 0)
0454 break;
0455
0456
0457
0458
0459
0460
0461 if (recompute || iter == 0)
0462 compute_decomposition_forwardsubst_with_offsets(matrixL, AtA, reg_b, Atb, npassive, pulseOffsets);
0463 else
0464 update_decomposition_forwardsubst_with_offsets(matrixL, AtA, reg_b, Atb, npassive, pulseOffsets);
0465
0466
0467 s(npassive - 1) = reg_b[npassive - 1] / matrixL(npassive - 1, npassive - 1);
0468 for (int i = npassive - 2; i >= 0; --i) {
0469 float total = 0;
0470 for (int j = i + 1; j < npassive; j++)
0471 total += matrixL(j, i) * s(j);
0472
0473 s(i) = (reg_b[i] - total) / matrixL(i, i);
0474 }
0475
0476
0477 bool hasNegative = false;
0478 bool hasNans = false;
0479 for (int counter = 0; counter < npassive; counter++) {
0480 auto const s_ii = s(counter);
0481 hasNegative |= s_ii <= 0;
0482 hasNans |= std::isnan(s_ii);
0483 }
0484
0485
0486
0487
0488 if (hasNans)
0489 break;
0490 if (!hasNegative) {
0491 for (int i = 0; i < npassive; i++) {
0492 auto const i_real = pulseOffsets(i);
0493 solution(i_real) = s(i);
0494 }
0495
0496 recompute = false;
0497 break;
0498 }
0499
0500
0501 recompute = true;
0502
0503 auto alpha = std::numeric_limits<float>::max();
0504 Eigen::Index alpha_idx = 0, alpha_idx_real = 0;
0505 for (int i = 0; i < npassive; i++) {
0506 if (s[i] <= 0.) {
0507 auto const i_real = pulseOffsets(i);
0508 auto const ratio = solution[i_real] / (solution[i_real] - s[i]);
0509 if (ratio < alpha) {
0510 alpha = ratio;
0511 alpha_idx = i;
0512 alpha_idx_real = i_real;
0513 }
0514 }
0515 }
0516
0517
0518 for (int i = 0; i < npassive; i++) {
0519 auto const i_real = pulseOffsets(i);
0520 solution(i_real) += alpha * (s(i) - solution(i_real));
0521 }
0522
0523
0524 solution[alpha_idx_real] = 0;
0525 --npassive;
0526
0527 Eigen::numext::swap(pulseOffsets.coeffRef(npassive), pulseOffsets.coeffRef(alpha_idx));
0528 }
0529
0530
0531 ++iter;
0532 if (iter % relaxationPeriod == 0)
0533 eps *= relaxationFactor;
0534 }
0535 }
0536
0537 }
0538 }
0539
0540 #endif