File indexing completed on 2025-01-18 03:42:18
0001 #include "KalmanUtilsMPlex.h"
0002 #include "PropagationMPlex.h"
0003
0004
0005 #include "Debug.h"
0006
0007 #include "KalmanUtilsMPlex.icc"
0008
0009 #include "RecoTracker/MkFitCore/interface/cms_common_macros.h"
0010
0011 namespace {
0012 using namespace mkfit;
0013 using idx_t = Matriplex::idx_t;
0014
0015 inline void MultResidualsAdd(const MPlexLH& A, const MPlexLV& B, const MPlex2V& C, MPlexLV& D) {
0016
0017
0018
0019
0020
0021
0022 MultResidualsAdd_imp(A, B, C, D, 0, NN);
0023 }
0024
0025 inline void MultResidualsAdd(const MPlexL2& A, const MPlexLV& B, const MPlex2V& C, MPlexLV& D) {
0026
0027
0028
0029
0030
0031 typedef float T;
0032 const idx_t N = NN;
0033
0034 const T* a = A.fArray;
0035 ASSUME_ALIGNED(a, 64);
0036 const T* b = B.fArray;
0037 ASSUME_ALIGNED(b, 64);
0038 const T* c = C.fArray;
0039 ASSUME_ALIGNED(c, 64);
0040 T* d = D.fArray;
0041 ASSUME_ALIGNED(d, 64);
0042
0043 #pragma omp simd
0044 for (idx_t n = 0; n < N; ++n) {
0045
0046 d[0 * N + n] = b[0 * N + n] + a[0 * N + n] * c[0 * N + n] + a[1 * N + n] * c[1 * N + n];
0047 d[1 * N + n] = b[1 * N + n] + a[2 * N + n] * c[0 * N + n] + a[3 * N + n] * c[1 * N + n];
0048 d[2 * N + n] = b[2 * N + n] + a[4 * N + n] * c[0 * N + n] + a[5 * N + n] * c[1 * N + n];
0049 d[3 * N + n] = b[3 * N + n] + a[6 * N + n] * c[0 * N + n] + a[7 * N + n] * c[1 * N + n];
0050 d[4 * N + n] = b[4 * N + n] + a[8 * N + n] * c[0 * N + n] + a[9 * N + n] * c[1 * N + n];
0051 d[5 * N + n] = b[5 * N + n] + a[10 * N + n] * c[0 * N + n] + a[11 * N + n] * c[1 * N + n];
0052 }
0053 }
0054
0055 inline void MultResidualsAdd(const MPlex52& A, const MPlex5V& B, const MPlex2V& C, MPlex5V& D) {
0056
0057
0058
0059
0060
0061 typedef float T;
0062 const idx_t N = NN;
0063
0064 const T* a = A.fArray;
0065 ASSUME_ALIGNED(a, 64);
0066 const T* b = B.fArray;
0067 ASSUME_ALIGNED(b, 64);
0068 const T* c = C.fArray;
0069 ASSUME_ALIGNED(c, 64);
0070 T* d = D.fArray;
0071 ASSUME_ALIGNED(d, 64);
0072
0073 #pragma omp simd
0074 for (idx_t n = 0; n < N; ++n) {
0075
0076 d[0 * N + n] = b[0 * N + n] + a[0 * N + n] * c[0 * N + n] + a[1 * N + n] * c[1 * N + n];
0077 d[1 * N + n] = b[1 * N + n] + a[2 * N + n] * c[0 * N + n] + a[3 * N + n] * c[1 * N + n];
0078 d[2 * N + n] = b[2 * N + n] + a[4 * N + n] * c[0 * N + n] + a[5 * N + n] * c[1 * N + n];
0079 d[3 * N + n] = b[3 * N + n] + a[6 * N + n] * c[0 * N + n] + a[7 * N + n] * c[1 * N + n];
0080 d[4 * N + n] = b[4 * N + n] + a[8 * N + n] * c[0 * N + n] + a[9 * N + n] * c[1 * N + n];
0081 }
0082 }
0083
0084
0085
0086 inline void Chi2Similarity(const MPlex2V& A,
0087 const MPlex2S& C,
0088 MPlexQF& D)
0089 {
0090
0091
0092
0093 typedef float T;
0094 const idx_t N = NN;
0095
0096 const T* a = A.fArray;
0097 ASSUME_ALIGNED(a, 64);
0098 const T* c = C.fArray;
0099 ASSUME_ALIGNED(c, 64);
0100 T* d = D.fArray;
0101 ASSUME_ALIGNED(d, 64);
0102
0103 #pragma omp simd
0104 for (idx_t n = 0; n < N; ++n) {
0105
0106 d[0 * N + n] = c[0 * N + n] * a[0 * N + n] * a[0 * N + n] + c[2 * N + n] * a[1 * N + n] * a[1 * N + n] +
0107 2 * (c[1 * N + n] * a[1 * N + n] * a[0 * N + n]);
0108 }
0109 }
0110
0111
0112
0113 inline void AddIntoUpperLeft3x3(const MPlexLS& A, const MPlexHS& B, MPlexHS& C) {
0114
0115
0116 typedef float T;
0117 const idx_t N = NN;
0118
0119 const T* a = A.fArray;
0120 ASSUME_ALIGNED(a, 64);
0121 const T* b = B.fArray;
0122 ASSUME_ALIGNED(b, 64);
0123 T* c = C.fArray;
0124 ASSUME_ALIGNED(c, 64);
0125
0126 #pragma omp simd
0127 for (idx_t n = 0; n < N; ++n) {
0128 c[0 * N + n] = a[0 * N + n] + b[0 * N + n];
0129 c[1 * N + n] = a[1 * N + n] + b[1 * N + n];
0130 c[2 * N + n] = a[2 * N + n] + b[2 * N + n];
0131 c[3 * N + n] = a[3 * N + n] + b[3 * N + n];
0132 c[4 * N + n] = a[4 * N + n] + b[4 * N + n];
0133 c[5 * N + n] = a[5 * N + n] + b[5 * N + n];
0134 }
0135 }
0136
0137 inline void AddIntoUpperLeft2x2(const MPlexLS& A, const MPlexHS& B, MPlex2S& C) {
0138
0139
0140 typedef float T;
0141 const idx_t N = NN;
0142
0143 const T* a = A.fArray;
0144 ASSUME_ALIGNED(a, 64);
0145 const T* b = B.fArray;
0146 ASSUME_ALIGNED(b, 64);
0147 T* c = C.fArray;
0148 ASSUME_ALIGNED(c, 64);
0149
0150 #pragma omp simd
0151 for (idx_t n = 0; n < N; ++n) {
0152 c[0 * N + n] = a[0 * N + n] + b[0 * N + n];
0153 c[1 * N + n] = a[1 * N + n] + b[1 * N + n];
0154 c[2 * N + n] = a[2 * N + n] + b[2 * N + n];
0155 }
0156 }
0157
0158
0159
0160 inline void SubtractFirst3(const MPlexHV& A, const MPlexLV& B, MPlexHV& C) {
0161
0162
0163 typedef float T;
0164 const idx_t N = NN;
0165
0166 const T* a = A.fArray;
0167 ASSUME_ALIGNED(a, 64);
0168 const T* b = B.fArray;
0169 ASSUME_ALIGNED(b, 64);
0170 T* c = C.fArray;
0171 ASSUME_ALIGNED(c, 64);
0172
0173 #pragma omp simd
0174 for (idx_t n = 0; n < N; ++n) {
0175 c[0 * N + n] = a[0 * N + n] - b[0 * N + n];
0176 c[1 * N + n] = a[1 * N + n] - b[1 * N + n];
0177 c[2 * N + n] = a[2 * N + n] - b[2 * N + n];
0178 }
0179 }
0180
0181 inline void SubtractFirst2(const MPlexHV& A, const MPlexLV& B, MPlex2V& C) {
0182
0183
0184 typedef float T;
0185 const idx_t N = NN;
0186
0187 const T* a = A.fArray;
0188 ASSUME_ALIGNED(a, 64);
0189 const T* b = B.fArray;
0190 ASSUME_ALIGNED(b, 64);
0191 T* c = C.fArray;
0192 ASSUME_ALIGNED(c, 64);
0193
0194 #pragma omp simd
0195 for (idx_t n = 0; n < N; ++n) {
0196 c[0 * N + n] = a[0 * N + n] - b[0 * N + n];
0197 c[1 * N + n] = a[1 * N + n] - b[1 * N + n];
0198 }
0199 }
0200
0201
0202
0203 inline void ProjectResErr(const MPlexQF& A00, const MPlexQF& A01, const MPlexHS& B, MPlexHH& C) {
0204
0205
0206
0207
0208 typedef float T;
0209 const idx_t N = NN;
0210
0211 const T* a00 = A00.fArray;
0212 ASSUME_ALIGNED(a00, 64);
0213 const T* a01 = A01.fArray;
0214 ASSUME_ALIGNED(a01, 64);
0215 const T* b = B.fArray;
0216 ASSUME_ALIGNED(b, 64);
0217 T* c = C.fArray;
0218 ASSUME_ALIGNED(c, 64);
0219
0220 #pragma omp simd
0221 for (int n = 0; n < N; ++n) {
0222 c[0 * N + n] = a00[n] * b[0 * N + n] + a01[n] * b[1 * N + n];
0223 c[1 * N + n] = a00[n] * b[1 * N + n] + a01[n] * b[2 * N + n];
0224 c[2 * N + n] = a00[n] * b[3 * N + n] + a01[n] * b[4 * N + n];
0225 c[3 * N + n] = b[3 * N + n];
0226 c[4 * N + n] = b[4 * N + n];
0227 c[5 * N + n] = b[5 * N + n];
0228 c[6 * N + n] = a01[n] * b[0 * N + n] - a00[n] * b[1 * N + n];
0229 c[7 * N + n] = a01[n] * b[1 * N + n] - a00[n] * b[2 * N + n];
0230 c[8 * N + n] = a01[n] * b[3 * N + n] - a00[n] * b[4 * N + n];
0231 }
0232 }
0233
0234 inline void ProjectResErrTransp(const MPlexQF& A00, const MPlexQF& A01, const MPlexHH& B, MPlex2S& C) {
0235
0236
0237
0238
0239 typedef float T;
0240 const idx_t N = NN;
0241
0242 const T* a00 = A00.fArray;
0243 ASSUME_ALIGNED(a00, 64);
0244 const T* a01 = A01.fArray;
0245 ASSUME_ALIGNED(a01, 64);
0246 const T* b = B.fArray;
0247 ASSUME_ALIGNED(b, 64);
0248 T* c = C.fArray;
0249 ASSUME_ALIGNED(c, 64);
0250
0251 #pragma omp simd
0252 for (int n = 0; n < N; ++n) {
0253 c[0 * N + n] = b[0 * N + n] * a00[n] + b[1 * N + n] * a01[n];
0254 c[1 * N + n] = b[3 * N + n] * a00[n] + b[4 * N + n] * a01[n];
0255 c[2 * N + n] = b[5 * N + n];
0256 }
0257 }
0258
0259 inline void RotateResidualsOnTangentPlane(const MPlexQF& R00,
0260 const MPlexQF& R01,
0261 const MPlexHV& A,
0262 MPlex2V& B)
0263 {
0264 RotateResidualsOnTangentPlane_impl(R00, R01, A, B, 0, NN);
0265 }
0266
0267
0268
0269
0270
0271
0272
0273
0274
0275
0276
0277
0278
0279
0280
0281
0282
0283
0284
0285
0286
0287
0288
0289
0290
0291
0292
0293
0294
0295
0296
0297
0298
0299
0300
0301
0302
0303
0304 template <class T1, class T2>
0305 inline void ProjectResErr(const T1& A, const T2& B, MPlex2H& C) {
0306
0307
0308
0309
0310
0311
0312
0313
0314
0315
0316 typedef float T;
0317 const idx_t N = NN;
0318
0319 const T* a = A.fArray;
0320 ASSUME_ALIGNED(a, 64);
0321 const T* b = B.fArray;
0322 ASSUME_ALIGNED(b, 64);
0323 T* c = C.fArray;
0324 ASSUME_ALIGNED(c, 64);
0325
0326 #pragma omp simd
0327 for (int n = 0; n < N; ++n) {
0328 c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[1 * N + n] * b[1 * N + n] + a[2 * N + n] * b[3 * N + n];
0329 c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[1 * N + n] * b[2 * N + n] + a[2 * N + n] * b[4 * N + n];
0330 c[2 * N + n] = a[0 * N + n] * b[3 * N + n] + a[1 * N + n] * b[4 * N + n] + a[2 * N + n] * b[5 * N + n];
0331 c[3 * N + n] = a[3 * N + n] * b[0 * N + n] + a[4 * N + n] * b[1 * N + n] + a[5 * N + n] * b[3 * N + n];
0332 c[4 * N + n] = a[3 * N + n] * b[1 * N + n] + a[4 * N + n] * b[2 * N + n] + a[5 * N + n] * b[4 * N + n];
0333 c[5 * N + n] = a[3 * N + n] * b[3 * N + n] + a[4 * N + n] * b[4 * N + n] + a[5 * N + n] * b[5 * N + n];
0334 }
0335 }
0336
0337
0338 template <class T1>
0339 inline void ProjectResErrTransp(const T1& A, const MPlex2H& B, MPlex2S& C) {
0340
0341
0342
0343
0344
0345
0346
0347
0348
0349
0350 typedef float T;
0351 const idx_t N = NN;
0352
0353 const T* a = A.fArray;
0354 ASSUME_ALIGNED(a, 64);
0355 const T* b = B.fArray;
0356 ASSUME_ALIGNED(b, 64);
0357 T* c = C.fArray;
0358 ASSUME_ALIGNED(c, 64);
0359
0360 #pragma omp simd
0361 for (int n = 0; n < N; ++n) {
0362 c[0 * N + n] = b[0 * N + n] * a[0 * N + n] + b[1 * N + n] * a[1 * N + n] + b[2 * N + n] * a[2 * N + n];
0363 c[1 * N + n] = b[0 * N + n] * a[3 * N + n] + b[1 * N + n] * a[4 * N + n] + b[2 * N + n] * a[5 * N + n];
0364 c[2 * N + n] = b[3 * N + n] * a[3 * N + n] + b[4 * N + n] * a[4 * N + n] + b[5 * N + n] * a[5 * N + n];
0365 }
0366 }
0367
0368 inline void RotateVectorOnPlane(const MPlexHH& R, const MPlexHV& A, MPlexHV& B) {
0369
0370
0371
0372
0373
0374
0375
0376
0377
0378
0379 #pragma omp simd
0380 for (int n = 0; n < NN; ++n) {
0381 B(n, 0, 0) = R(n, 0, 0) * A(n, 0, 0) + R(n, 0, 1) * A(n, 1, 0) + R(n, 0, 2) * A(n, 2, 0);
0382 B(n, 1, 0) = R(n, 1, 0) * A(n, 0, 0) + R(n, 1, 1) * A(n, 1, 0) + R(n, 1, 2) * A(n, 2, 0);
0383 B(n, 2, 0) = R(n, 2, 0) * A(n, 0, 0) + R(n, 2, 1) * A(n, 1, 0) + R(n, 2, 2) * A(n, 2, 0);
0384 }
0385 }
0386
0387 inline void RotateVectorOnPlaneTransp(const MPlexHH& R, const MPlexHV& A, MPlexHV& B) {
0388
0389
0390
0391
0392
0393
0394
0395
0396
0397
0398 #pragma omp simd
0399 for (int n = 0; n < NN; ++n) {
0400 B(n, 0, 0) = R(n, 0, 0) * A(n, 0, 0) + R(n, 1, 0) * A(n, 1, 0) + R(n, 2, 0) * A(n, 2, 0);
0401 B(n, 1, 0) = R(n, 0, 1) * A(n, 0, 0) + R(n, 1, 1) * A(n, 1, 0) + R(n, 2, 1) * A(n, 2, 0);
0402 B(n, 2, 0) = R(n, 0, 2) * A(n, 0, 0) + R(n, 1, 2) * A(n, 1, 0) + R(n, 2, 2) * A(n, 2, 0);
0403 }
0404 }
0405
0406 template <typename T1, typename T2>
0407 void RotateResidualsOnPlane(const T1& R,
0408 const T2& A,
0409 MPlex2V& B)
0410 {
0411 #pragma omp simd
0412 for (int n = 0; n < NN; ++n) {
0413 B(n, 0, 0) = R(n, 0, 0) * A(n, 0, 0) + R(n, 0, 1) * A(n, 1, 0) + R(n, 0, 2) * A(n, 2, 0);
0414 B(n, 1, 0) = R(n, 1, 0) * A(n, 0, 0) + R(n, 1, 1) * A(n, 1, 0) + R(n, 1, 2) * A(n, 2, 0);
0415 }
0416 }
0417
0418 inline void KalmanHTG(const MPlexQF& A00, const MPlexQF& A01, const MPlex2S& B, MPlexHH& C) {
0419
0420
0421
0422
0423
0424 typedef float T;
0425 const idx_t N = NN;
0426
0427 const T* a00 = A00.fArray;
0428 ASSUME_ALIGNED(a00, 64);
0429 const T* a01 = A01.fArray;
0430 ASSUME_ALIGNED(a01, 64);
0431 const T* b = B.fArray;
0432 ASSUME_ALIGNED(b, 64);
0433 T* c = C.fArray;
0434 ASSUME_ALIGNED(c, 64);
0435
0436 #pragma omp simd
0437 for (int n = 0; n < N; ++n) {
0438 c[0 * N + n] = a00[n] * b[0 * N + n];
0439 c[1 * N + n] = a00[n] * b[1 * N + n];
0440 c[2 * N + n] = 0.;
0441 c[3 * N + n] = a01[n] * b[0 * N + n];
0442 c[4 * N + n] = a01[n] * b[1 * N + n];
0443 c[5 * N + n] = 0.;
0444 c[6 * N + n] = b[1 * N + n];
0445 c[7 * N + n] = b[2 * N + n];
0446 c[8 * N + n] = 0.;
0447 }
0448 }
0449
0450 inline void KalmanGain(const MPlexLS& A, const MPlexHH& B, MPlexLH& C) {
0451
0452
0453 typedef float T;
0454 const idx_t N = NN;
0455
0456 const T* a = A.fArray;
0457 ASSUME_ALIGNED(a, 64);
0458 const T* b = B.fArray;
0459 ASSUME_ALIGNED(b, 64);
0460 T* c = C.fArray;
0461 ASSUME_ALIGNED(c, 64);
0462
0463 #pragma omp simd
0464 for (int n = 0; n < N; ++n) {
0465 c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[1 * N + n] * b[3 * N + n] + a[3 * N + n] * b[6 * N + n];
0466 c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[1 * N + n] * b[4 * N + n] + a[3 * N + n] * b[7 * N + n];
0467 c[2 * N + n] = 0;
0468 c[3 * N + n] = a[1 * N + n] * b[0 * N + n] + a[2 * N + n] * b[3 * N + n] + a[4 * N + n] * b[6 * N + n];
0469 c[4 * N + n] = a[1 * N + n] * b[1 * N + n] + a[2 * N + n] * b[4 * N + n] + a[4 * N + n] * b[7 * N + n];
0470 c[5 * N + n] = 0;
0471 c[6 * N + n] = a[3 * N + n] * b[0 * N + n] + a[4 * N + n] * b[3 * N + n] + a[5 * N + n] * b[6 * N + n];
0472 c[7 * N + n] = a[3 * N + n] * b[1 * N + n] + a[4 * N + n] * b[4 * N + n] + a[5 * N + n] * b[7 * N + n];
0473 c[8 * N + n] = 0;
0474 c[9 * N + n] = a[6 * N + n] * b[0 * N + n] + a[7 * N + n] * b[3 * N + n] + a[8 * N + n] * b[6 * N + n];
0475 c[10 * N + n] = a[6 * N + n] * b[1 * N + n] + a[7 * N + n] * b[4 * N + n] + a[8 * N + n] * b[7 * N + n];
0476 c[11 * N + n] = 0;
0477 c[12 * N + n] = a[10 * N + n] * b[0 * N + n] + a[11 * N + n] * b[3 * N + n] + a[12 * N + n] * b[6 * N + n];
0478 c[13 * N + n] = a[10 * N + n] * b[1 * N + n] + a[11 * N + n] * b[4 * N + n] + a[12 * N + n] * b[7 * N + n];
0479 c[14 * N + n] = 0;
0480 c[15 * N + n] = a[15 * N + n] * b[0 * N + n] + a[16 * N + n] * b[3 * N + n] + a[17 * N + n] * b[6 * N + n];
0481 c[16 * N + n] = a[15 * N + n] * b[1 * N + n] + a[16 * N + n] * b[4 * N + n] + a[17 * N + n] * b[7 * N + n];
0482 c[17 * N + n] = 0;
0483 }
0484 }
0485
0486 inline void KalmanHTG(const MPlex2H& A, const MPlex2S& B, MPlexH2& C) {
0487
0488
0489
0490
0491
0492
0493
0494
0495
0496
0497
0498
0499
0500
0501 typedef float T;
0502 const idx_t N = NN;
0503
0504 const T* a = A.fArray;
0505 ASSUME_ALIGNED(a, 64);
0506 const T* b = B.fArray;
0507 ASSUME_ALIGNED(b, 64);
0508 T* c = C.fArray;
0509 ASSUME_ALIGNED(c, 64);
0510
0511 #pragma omp simd
0512 for (int n = 0; n < N; ++n) {
0513 c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[3 * N + n] * b[1 * N + n];
0514 c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[3 * N + n] * b[2 * N + n];
0515 c[2 * N + n] = a[1 * N + n] * b[0 * N + n] + a[4 * N + n] * b[1 * N + n];
0516 c[3 * N + n] = a[1 * N + n] * b[1 * N + n] + a[4 * N + n] * b[2 * N + n];
0517 c[4 * N + n] = a[2 * N + n] * b[0 * N + n] + a[5 * N + n] * b[1 * N + n];
0518 c[5 * N + n] = a[2 * N + n] * b[1 * N + n] + a[5 * N + n] * b[2 * N + n];
0519 }
0520 }
0521
0522 inline void KalmanGain(const MPlexLS& A, const MPlexH2& B, MPlexL2& C) {
0523
0524
0525
0526
0527
0528
0529
0530
0531
0532
0533
0534
0535
0536
0537
0538
0539
0540
0541
0542
0543
0544
0545
0546 typedef float T;
0547 const idx_t N = NN;
0548
0549 const T* a = A.fArray;
0550 ASSUME_ALIGNED(a, 64);
0551 const T* b = B.fArray;
0552 ASSUME_ALIGNED(b, 64);
0553 T* c = C.fArray;
0554 ASSUME_ALIGNED(c, 64);
0555
0556 #pragma omp simd
0557 for (int n = 0; n < N; ++n) {
0558 c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[1 * N + n] * b[2 * N + n] + a[3 * N + n] * b[4 * N + n];
0559 c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[1 * N + n] * b[3 * N + n] + a[3 * N + n] * b[5 * N + n];
0560 c[2 * N + n] = a[1 * N + n] * b[0 * N + n] + a[2 * N + n] * b[2 * N + n] + a[4 * N + n] * b[4 * N + n];
0561 c[3 * N + n] = a[1 * N + n] * b[1 * N + n] + a[2 * N + n] * b[3 * N + n] + a[4 * N + n] * b[5 * N + n];
0562 c[4 * N + n] = a[3 * N + n] * b[0 * N + n] + a[4 * N + n] * b[2 * N + n] + a[5 * N + n] * b[4 * N + n];
0563 c[5 * N + n] = a[3 * N + n] * b[1 * N + n] + a[4 * N + n] * b[3 * N + n] + a[5 * N + n] * b[5 * N + n];
0564 c[6 * N + n] = a[6 * N + n] * b[0 * N + n] + a[7 * N + n] * b[2 * N + n] + a[8 * N + n] * b[4 * N + n];
0565 c[7 * N + n] = a[6 * N + n] * b[1 * N + n] + a[7 * N + n] * b[3 * N + n] + a[8 * N + n] * b[5 * N + n];
0566 c[8 * N + n] = a[10 * N + n] * b[0 * N + n] + a[11 * N + n] * b[2 * N + n] + a[12 * N + n] * b[4 * N + n];
0567 c[9 * N + n] = a[10 * N + n] * b[1 * N + n] + a[11 * N + n] * b[3 * N + n] + a[12 * N + n] * b[5 * N + n];
0568 c[10 * N + n] = a[15 * N + n] * b[0 * N + n] + a[16 * N + n] * b[2 * N + n] + a[17 * N + n] * b[4 * N + n];
0569 c[11 * N + n] = a[15 * N + n] * b[1 * N + n] + a[16 * N + n] * b[3 * N + n] + a[17 * N + n] * b[5 * N + n];
0570 }
0571 }
0572
0573 inline void CovXYconstrain(const MPlexQF& R00, const MPlexQF& R01, const MPlexLS& Ci, MPlexLS& Co) {
0574
0575
0576 typedef float T;
0577 const idx_t N = NN;
0578
0579 const T* r00 = R00.fArray;
0580 ASSUME_ALIGNED(r00, 64);
0581 const T* r01 = R01.fArray;
0582 ASSUME_ALIGNED(r01, 64);
0583 const T* ci = Ci.fArray;
0584 ASSUME_ALIGNED(ci, 64);
0585 T* co = Co.fArray;
0586 ASSUME_ALIGNED(co, 64);
0587
0588 #pragma omp simd
0589 for (int n = 0; n < N; ++n) {
0590
0591 co[0 * N + n] =
0592 r00[n] * r00[n] * ci[0 * N + n] + 2 * r00[n] * r01[n] * ci[1 * N + n] + r01[n] * r01[n] * ci[2 * N + n];
0593 co[1 * N + n] = r00[n] * r01[n] * co[0 * N + n];
0594 co[2 * N + n] = r01[n] * r01[n] * co[0 * N + n];
0595 co[0 * N + n] = r00[n] * r00[n] * co[0 * N + n];
0596
0597 co[3 * N + n] = r00[n] * ci[3 * N + n] + r01[n] * ci[4 * N + n];
0598 co[4 * N + n] = r01[n] * co[3 * N + n];
0599 co[3 * N + n] = r00[n] * co[3 * N + n];
0600
0601 co[6 * N + n] = r00[n] * ci[6 * N + n] + r01[n] * ci[7 * N + n];
0602 co[7 * N + n] = r01[n] * co[6 * N + n];
0603 co[6 * N + n] = r00[n] * co[6 * N + n];
0604
0605 co[10 * N + n] = r00[n] * ci[10 * N + n] + r01[n] * ci[11 * N + n];
0606 co[11 * N + n] = r01[n] * co[10 * N + n];
0607 co[10 * N + n] = r00[n] * co[10 * N + n];
0608
0609 co[15 * N + n] = r00[n] * ci[15 * N + n] + r01[n] * ci[16 * N + n];
0610 co[16 * N + n] = r01[n] * co[15 * N + n];
0611 co[15 * N + n] = r00[n] * co[15 * N + n];
0612 }
0613 }
0614
0615 void KalmanGain(const MPlexLS& A, const MPlex2S& B, MPlexL2& C) {
0616
0617
0618 typedef float T;
0619 const idx_t N = NN;
0620
0621 const T* a = A.fArray;
0622 ASSUME_ALIGNED(a, 64);
0623 const T* b = B.fArray;
0624 ASSUME_ALIGNED(b, 64);
0625 T* c = C.fArray;
0626 ASSUME_ALIGNED(c, 64);
0627
0628 #include "KalmanGain62.ah"
0629 }
0630
0631 inline void KHMult(const MPlexLH& A, const MPlexQF& B00, const MPlexQF& B01, MPlexLL& C) {
0632
0633 KHMult_imp(A, B00, B01, C, 0, NN);
0634 }
0635
0636 inline void KHMult(const MPlexL2& A, const MPlex2H& B, MPlexLL& C) {
0637
0638
0639
0640
0641
0642
0643
0644
0645
0646
0647
0648
0649
0650
0651
0652
0653
0654
0655
0656
0657
0658
0659
0660
0661
0662
0663
0664
0665
0666 #pragma omp simd
0667 for (int n = 0; n < NN; ++n) {
0668 C(n, 0, 0) = A(n, 0, 0) * B(n, 0, 0) + A(n, 0, 1) * B(n, 1, 0);
0669 C(n, 0, 1) = A(n, 0, 0) * B(n, 0, 1) + A(n, 0, 1) * B(n, 1, 1);
0670 C(n, 0, 2) = A(n, 0, 0) * B(n, 0, 2) + A(n, 0, 1) * B(n, 1, 2);
0671 C(n, 0, 3) = 0;
0672 C(n, 0, 4) = 0;
0673 C(n, 0, 5) = 0;
0674 C(n, 0, 6) = A(n, 1, 0) * B(n, 0, 0) + A(n, 1, 1) * B(n, 1, 0);
0675 C(n, 0, 7) = A(n, 1, 0) * B(n, 0, 1) + A(n, 1, 1) * B(n, 1, 1);
0676 C(n, 0, 8) = A(n, 1, 0) * B(n, 0, 2) + A(n, 1, 1) * B(n, 1, 2);
0677 C(n, 0, 9) = 0;
0678 C(n, 0, 10) = 0;
0679 C(n, 0, 11) = 0;
0680 C(n, 0, 12) = A(n, 2, 0) * B(n, 0, 0) + A(n, 2, 1) * B(n, 1, 0);
0681 C(n, 0, 13) = A(n, 2, 0) * B(n, 0, 1) + A(n, 2, 1) * B(n, 1, 1);
0682 C(n, 0, 14) = A(n, 2, 0) * B(n, 0, 2) + A(n, 2, 1) * B(n, 1, 2);
0683 C(n, 0, 15) = 0;
0684 C(n, 0, 16) = 0;
0685 C(n, 0, 17) = 0;
0686 C(n, 0, 18) = A(n, 3, 0) * B(n, 0, 0) + A(n, 3, 1) * B(n, 1, 0);
0687 C(n, 0, 19) = A(n, 3, 0) * B(n, 0, 1) + A(n, 3, 1) * B(n, 1, 1);
0688 C(n, 0, 20) = A(n, 3, 0) * B(n, 0, 2) + A(n, 3, 1) * B(n, 1, 2);
0689 C(n, 0, 21) = 0;
0690 C(n, 0, 22) = 0;
0691 C(n, 0, 23) = 0;
0692 C(n, 0, 24) = A(n, 4, 0) * B(n, 0, 0) + A(n, 4, 1) * B(n, 1, 0);
0693 C(n, 0, 25) = A(n, 4, 0) * B(n, 0, 1) + A(n, 4, 1) * B(n, 1, 1);
0694 C(n, 0, 26) = A(n, 4, 0) * B(n, 0, 2) + A(n, 4, 1) * B(n, 1, 2);
0695 C(n, 0, 27) = 0;
0696 C(n, 0, 28) = 0;
0697 C(n, 0, 29) = 0;
0698 C(n, 0, 30) = A(n, 5, 0) * B(n, 0, 0) + A(n, 5, 1) * B(n, 1, 0);
0699 C(n, 0, 31) = A(n, 5, 0) * B(n, 0, 1) + A(n, 5, 1) * B(n, 1, 1);
0700 C(n, 0, 32) = A(n, 5, 0) * B(n, 0, 2) + A(n, 5, 1) * B(n, 1, 2);
0701 C(n, 0, 33) = 0;
0702 C(n, 0, 34) = 0;
0703 C(n, 0, 35) = 0;
0704 }
0705 }
0706
0707 inline void KHC(const MPlexLL& A, const MPlexLS& B, MPlexLS& C) {
0708
0709
0710 typedef float T;
0711 const idx_t N = NN;
0712
0713 const T* a = A.fArray;
0714 ASSUME_ALIGNED(a, 64);
0715 const T* b = B.fArray;
0716 ASSUME_ALIGNED(b, 64);
0717 T* c = C.fArray;
0718 ASSUME_ALIGNED(c, 64);
0719
0720 #include "KHC.ah"
0721 }
0722
0723 inline void KHC(const MPlexL2& A, const MPlexLS& B, MPlexLS& C) {
0724
0725
0726 typedef float T;
0727 const idx_t N = NN;
0728
0729 const T* a = A.fArray;
0730 ASSUME_ALIGNED(a, 64);
0731 const T* b = B.fArray;
0732 ASSUME_ALIGNED(b, 64);
0733 T* c = C.fArray;
0734 ASSUME_ALIGNED(c, 64);
0735
0736 #include "K62HC.ah"
0737 }
0738
0739 void JacCCS2Loc(const MPlex55& A, const MPlex56& B, MPlex56& C) {
0740 typedef float T;
0741 const idx_t N = NN;
0742
0743 const T* a = A.fArray;
0744 ASSUME_ALIGNED(a, 64);
0745 const T* b = B.fArray;
0746 ASSUME_ALIGNED(b, 64);
0747 T* c = C.fArray;
0748 ASSUME_ALIGNED(c, 64);
0749
0750 #include "JacCCS2Loc.ah"
0751 }
0752
0753 void PsErrLoc(const MPlex56& A, const MPlexLS& B, MPlex56& C) {
0754
0755
0756 typedef float T;
0757 const Matriplex::idx_t N = NN;
0758
0759 const T* a = A.fArray;
0760 ASSUME_ALIGNED(a, 64);
0761 const T* b = B.fArray;
0762 ASSUME_ALIGNED(b, 64);
0763 T* c = C.fArray;
0764 ASSUME_ALIGNED(c, 64);
0765
0766 #include "PsErrLoc.ah"
0767 }
0768
0769 void PsErrLocTransp(const MPlex56& B, const MPlex56& A, MPlex5S& C) {
0770
0771
0772 typedef float T;
0773 const Matriplex::idx_t N = NN;
0774
0775 const T* a = A.fArray;
0776 ASSUME_ALIGNED(a, 64);
0777 const T* b = B.fArray;
0778 ASSUME_ALIGNED(b, 64);
0779 T* c = C.fArray;
0780 ASSUME_ALIGNED(c, 64);
0781
0782 #include "PsErrLocTransp.ah"
0783 }
0784
0785 void PsErrLocUpd(const MPlex55& A, const MPlex5S& B, MPlex5S& C) {
0786
0787
0788 typedef float T;
0789 const Matriplex::idx_t N = NN;
0790
0791 const T* a = A.fArray;
0792 ASSUME_ALIGNED(a, 64);
0793 const T* b = B.fArray;
0794 ASSUME_ALIGNED(b, 64);
0795 T* c = C.fArray;
0796 ASSUME_ALIGNED(c, 64);
0797
0798 #include "PsErrLocUpd.ah"
0799 }
0800
0801 void JacLoc2CCS(const MPlex65& A, const MPlex55& B, MPlex65& C) {
0802
0803
0804 typedef float T;
0805 const Matriplex::idx_t N = NN;
0806
0807 const T* a = A.fArray;
0808 ASSUME_ALIGNED(a, 64);
0809 const T* b = B.fArray;
0810 ASSUME_ALIGNED(b, 64);
0811 T* c = C.fArray;
0812 ASSUME_ALIGNED(c, 64);
0813
0814 #include "JacLoc2CCS.ah"
0815 }
0816
0817 void OutErrCCS(const MPlex65& A, const MPlex5S& B, MPlex65& C) {
0818
0819
0820 typedef float T;
0821 const Matriplex::idx_t N = NN;
0822
0823 const T* a = A.fArray;
0824 ASSUME_ALIGNED(a, 64);
0825 const T* b = B.fArray;
0826 ASSUME_ALIGNED(b, 64);
0827 T* c = C.fArray;
0828 ASSUME_ALIGNED(c, 64);
0829
0830 #include "OutErrCCS.ah"
0831 }
0832
0833 void OutErrCCSTransp(const MPlex65& B, const MPlex65& A, MPlexLS& C) {
0834
0835
0836 typedef float T;
0837 const Matriplex::idx_t N = NN;
0838
0839 const T* a = A.fArray;
0840 ASSUME_ALIGNED(a, 64);
0841 const T* b = B.fArray;
0842 ASSUME_ALIGNED(b, 64);
0843 T* c = C.fArray;
0844 ASSUME_ALIGNED(c, 64);
0845
0846 #include "OutErrCCSTransp.ah"
0847 }
0848
0849
0850 template <typename T1, typename T2, typename T3>
0851 void MultFull(const T1& A, int nia, int nja, const T2& B, int nib, int njb, T3& C, int nic, int njc) {
0852 #ifdef DEBUG
0853 assert(nja == nib);
0854 assert(nia == nic);
0855 assert(njb == njc);
0856 #endif
0857 for (int n = 0; n < NN; ++n) {
0858 for (int i = 0; i < nia; ++i) {
0859 for (int j = 0; j < njb; ++j) {
0860 C(n, i, j) = 0.;
0861 for (int k = 0; k < nja; ++k)
0862 C(n, i, j) += A.constAt(n, i, k) * B.constAt(n, k, j);
0863 }
0864 }
0865 }
0866 }
0867
0868
0869
0870 template <typename T1, typename T2, typename T3>
0871 void MultTranspFull(const T1& A, int nia, int nja, const T2& B, int nib, int njb, T3& C, int nic, int njc) {
0872 #ifdef DEBUG
0873 assert(nja == njb);
0874 assert(nia == nic);
0875 assert(nib == njc);
0876 #endif
0877 for (int n = 0; n < NN; ++n) {
0878 for (int i = 0; i < nia; ++i) {
0879 for (int j = 0; j < nib; ++j) {
0880 C(n, i, j) = 0.;
0881 for (int k = 0; k < nja; ++k)
0882 C(n, i, j) += A.constAt(n, i, k) * B.constAt(n, j, k);
0883 }
0884 }
0885 }
0886 }
0887
0888 }
0889
0890
0891
0892
0893
0894 namespace {
0895
0896
0897
0898
0899 CMS_SA_ALLOW mkfit::MPlexLS dummy_err;
0900 CMS_SA_ALLOW mkfit::MPlexLV dummy_par;
0901 CMS_SA_ALLOW mkfit::MPlexQF dummy_chi2;
0902 }
0903
0904 namespace mkfit {
0905
0906
0907
0908
0909
0910 void kalmanUpdate(const MPlexLS& psErr,
0911 const MPlexLV& psPar,
0912 const MPlexHS& msErr,
0913 const MPlexHV& msPar,
0914 MPlexLS& outErr,
0915 MPlexLV& outPar,
0916 const int N_proc) {
0917 kalmanOperation(KFO_Update_Params | KFO_Local_Cov, psErr, psPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
0918 }
0919
0920 void kalmanPropagateAndUpdate(const MPlexLS& psErr,
0921 const MPlexLV& psPar,
0922 MPlexQI& Chg,
0923 const MPlexHS& msErr,
0924 const MPlexHV& msPar,
0925 MPlexLS& outErr,
0926 MPlexLV& outPar,
0927 MPlexQI& outFailFlag,
0928 const int N_proc,
0929 const PropagationFlags& propFlags,
0930 const bool propToHit) {
0931 if (propToHit) {
0932 MPlexLS propErr;
0933 MPlexLV propPar;
0934 MPlexQF msRad;
0935 #pragma omp simd
0936 for (int n = 0; n < NN; ++n) {
0937 msRad.At(n, 0, 0) = hipo(msPar.constAt(n, 0, 0), msPar.constAt(n, 1, 0));
0938 }
0939
0940 propagateHelixToRMPlex(psErr, psPar, Chg, msRad, propErr, propPar, outFailFlag, N_proc, propFlags);
0941
0942 kalmanOperation(
0943 KFO_Update_Params | KFO_Local_Cov, propErr, propPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
0944 } else {
0945 kalmanOperation(
0946 KFO_Update_Params | KFO_Local_Cov, psErr, psPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
0947 }
0948 for (int n = 0; n < NN; ++n) {
0949 if (n < N_proc && outPar.At(n, 3, 0) < 0) {
0950 Chg.At(n, 0, 0) = -Chg.At(n, 0, 0);
0951 outPar.At(n, 3, 0) = -outPar.At(n, 3, 0);
0952 }
0953 }
0954 }
0955
0956
0957
0958 void kalmanComputeChi2(const MPlexLS& psErr,
0959 const MPlexLV& psPar,
0960 const MPlexQI& inChg,
0961 const MPlexHS& msErr,
0962 const MPlexHV& msPar,
0963 MPlexQF& outChi2,
0964 const int N_proc) {
0965 kalmanOperation(KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
0966 }
0967
0968 void kalmanPropagateAndComputeChi2(const MPlexLS& psErr,
0969 const MPlexLV& psPar,
0970 const MPlexQI& inChg,
0971 const MPlexHS& msErr,
0972 const MPlexHV& msPar,
0973 MPlexQF& outChi2,
0974 MPlexLV& propPar,
0975 MPlexQI& outFailFlag,
0976 const int N_proc,
0977 const PropagationFlags& propFlags,
0978 const bool propToHit) {
0979 propPar = psPar;
0980 if (propToHit) {
0981 MPlexLS propErr;
0982 MPlexQF msRad;
0983 #pragma omp simd
0984 for (int n = 0; n < NN; ++n) {
0985 if (n < N_proc) {
0986 msRad.At(n, 0, 0) = hipo(msPar.constAt(n, 0, 0), msPar.constAt(n, 1, 0));
0987 } else {
0988 msRad.At(n, 0, 0) = 0.0f;
0989 }
0990 }
0991
0992 propagateHelixToRMPlex(psErr, psPar, inChg, msRad, propErr, propPar, outFailFlag, N_proc, propFlags);
0993
0994 kalmanOperation(KFO_Calculate_Chi2, propErr, propPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
0995 } else {
0996 kalmanOperation(KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
0997 }
0998 }
0999
1000
1001
1002 void kalmanOperation(const int kfOp,
1003 const MPlexLS& psErr,
1004 const MPlexLV& psPar,
1005 const MPlexHS& msErr,
1006 const MPlexHV& msPar,
1007 MPlexLS& outErr,
1008 MPlexLV& outPar,
1009 MPlexQF& outChi2,
1010 const int N_proc) {
1011 #ifdef DEBUG
1012 {
1013 dmutex_guard;
1014 printf("psPar:\n");
1015 for (int i = 0; i < 6; ++i) {
1016 printf("%8f ", psPar.constAt(0, 0, i));
1017 printf("\n");
1018 }
1019 printf("\n");
1020 printf("psErr:\n");
1021 for (int i = 0; i < 6; ++i) {
1022 for (int j = 0; j < 6; ++j)
1023 printf("%8f ", psErr.constAt(0, i, j));
1024 printf("\n");
1025 }
1026 printf("\n");
1027 printf("msPar:\n");
1028 for (int i = 0; i < 3; ++i) {
1029 printf("%8f ", msPar.constAt(0, 0, i));
1030 printf("\n");
1031 }
1032 printf("\n");
1033 printf("msErr:\n");
1034 for (int i = 0; i < 3; ++i) {
1035 for (int j = 0; j < 3; ++j)
1036 printf("%8f ", msErr.constAt(0, i, j));
1037 printf("\n");
1038 }
1039 printf("\n");
1040 }
1041 #endif
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052 MPlexQF rotT00;
1053 MPlexQF rotT01;
1054 for (int n = 0; n < NN; ++n) {
1055 if (n < N_proc) {
1056 const float r = hipo(msPar.constAt(n, 0, 0), msPar.constAt(n, 1, 0));
1057 rotT00.At(n, 0, 0) = -(msPar.constAt(n, 1, 0) + psPar.constAt(n, 1, 0)) / (2 * r);
1058 rotT01.At(n, 0, 0) = (msPar.constAt(n, 0, 0) + psPar.constAt(n, 0, 0)) / (2 * r);
1059 } else {
1060 rotT00.At(n, 0, 0) = 0.0f;
1061 rotT01.At(n, 0, 0) = 0.0f;
1062 }
1063 }
1064
1065 MPlexHV res_glo;
1066 SubtractFirst3(msPar, psPar, res_glo);
1067
1068 MPlexHS resErr_glo;
1069 AddIntoUpperLeft3x3(psErr, msErr, resErr_glo);
1070
1071 MPlex2V res_loc;
1072 RotateResidualsOnTangentPlane(rotT00, rotT01, res_glo, res_loc);
1073 MPlex2S resErr_loc;
1074 MPlexHH tempHH;
1075 ProjectResErr(rotT00, rotT01, resErr_glo, tempHH);
1076 ProjectResErrTransp(rotT00, rotT01, tempHH, resErr_loc);
1077
1078 #ifdef DEBUG
1079 {
1080 dmutex_guard;
1081 printf("res_glo:\n");
1082 for (int i = 0; i < 3; ++i) {
1083 printf("%8f ", res_glo.At(0, i, 0));
1084 }
1085 printf("\n");
1086 printf("resErr_glo:\n");
1087 for (int i = 0; i < 3; ++i) {
1088 for (int j = 0; j < 3; ++j)
1089 printf("%8f ", resErr_glo.At(0, i, j));
1090 printf("\n");
1091 }
1092 printf("\n");
1093 printf("res_loc:\n");
1094 for (int i = 0; i < 2; ++i) {
1095 printf("%8f ", res_loc.At(0, i, 0));
1096 }
1097 printf("\n");
1098 printf("tempHH:\n");
1099 for (int i = 0; i < 3; ++i) {
1100 for (int j = 0; j < 3; ++j)
1101 printf("%8f ", tempHH.At(0, i, j));
1102 printf("\n");
1103 }
1104 printf("\n");
1105 printf("resErr_loc:\n");
1106 for (int i = 0; i < 2; ++i) {
1107 for (int j = 0; j < 2; ++j)
1108 printf("%8f ", resErr_loc.At(0, i, j));
1109 printf("\n");
1110 }
1111 printf("\n");
1112 }
1113 #endif
1114
1115
1116 Matriplex::invertCramerSym(resErr_loc);
1117
1118 if (kfOp & KFO_Calculate_Chi2) {
1119 Chi2Similarity(res_loc, resErr_loc, outChi2);
1120
1121 #ifdef DEBUG
1122 {
1123 dmutex_guard;
1124 printf("resErr_loc (Inv):\n");
1125 for (int i = 0; i < 2; ++i) {
1126 for (int j = 0; j < 2; ++j)
1127 printf("%8f ", resErr_loc.At(0, i, j));
1128 printf("\n");
1129 }
1130 printf("\n");
1131 printf("chi2: %8f\n", outChi2.At(0, 0, 0));
1132 }
1133 #endif
1134 }
1135
1136 if (kfOp & KFO_Update_Params) {
1137 MPlexLS psErrLoc = psErr;
1138 if (kfOp & KFO_Local_Cov)
1139 CovXYconstrain(rotT00, rotT01, psErr, psErrLoc);
1140
1141 MPlexLH K;
1142 KalmanHTG(rotT00, rotT01, resErr_loc, tempHH);
1143 KalmanGain(psErrLoc, tempHH, K);
1144
1145 MultResidualsAdd(K, psPar, res_loc, outPar);
1146
1147 squashPhiMPlex(outPar, N_proc);
1148
1149 MPlexLL tempLL;
1150 KHMult(K, rotT00, rotT01, tempLL);
1151 KHC(tempLL, psErrLoc, outErr);
1152 outErr.subtract(psErrLoc, outErr);
1153
1154 #ifdef DEBUG
1155 {
1156 dmutex_guard;
1157 if (kfOp & KFO_Local_Cov) {
1158 printf("psErrLoc:\n");
1159 for (int i = 0; i < 6; ++i) {
1160 for (int j = 0; j < 6; ++j)
1161 printf("% 8e ", psErrLoc.At(0, i, j));
1162 printf("\n");
1163 }
1164 printf("\n");
1165 }
1166 printf("resErr_loc (Inv):\n");
1167 for (int i = 0; i < 2; ++i) {
1168 for (int j = 0; j < 2; ++j)
1169 printf("%8f ", resErr_loc.At(0, i, j));
1170 printf("\n");
1171 }
1172 printf("\n");
1173 printf("tempHH:\n");
1174 for (int i = 0; i < 3; ++i) {
1175 for (int j = 0; j < 3; ++j)
1176 printf("%8f ", tempHH.At(0, i, j));
1177 printf("\n");
1178 }
1179 printf("\n");
1180 printf("K:\n");
1181 for (int i = 0; i < 6; ++i) {
1182 for (int j = 0; j < 3; ++j)
1183 printf("%8f ", K.At(0, i, j));
1184 printf("\n");
1185 }
1186 printf("\n");
1187 printf("tempLL:\n");
1188 for (int i = 0; i < 6; ++i) {
1189 for (int j = 0; j < 6; ++j)
1190 printf("%8f ", tempLL.At(0, i, j));
1191 printf("\n");
1192 }
1193 printf("\n");
1194 printf("outPar:\n");
1195 for (int i = 0; i < 6; ++i) {
1196 printf("%8f ", outPar.At(0, i, 0));
1197 }
1198 printf("\n");
1199 printf("outErr:\n");
1200 for (int i = 0; i < 6; ++i) {
1201 for (int j = 0; j < 6; ++j)
1202 printf("%8f ", outErr.At(0, i, j));
1203 printf("\n");
1204 }
1205 printf("\n");
1206 }
1207 #endif
1208 }
1209 }
1210
1211
1212
1213
1214
1215 void kalmanUpdatePlane(const MPlexLS& psErr,
1216 const MPlexLV& psPar,
1217 const MPlexQI& Chg,
1218 const MPlexHS& msErr,
1219 const MPlexHV& msPar,
1220 const MPlexHV& plNrm,
1221 const MPlexHV& plDir,
1222 const MPlexHV& plPnt,
1223 MPlexLS& outErr,
1224 MPlexLV& outPar,
1225 const int N_proc) {
1226 kalmanOperationPlaneLocal(KFO_Update_Params | KFO_Local_Cov,
1227 psErr,
1228 psPar,
1229 Chg,
1230 msErr,
1231 msPar,
1232 plNrm,
1233 plDir,
1234 plPnt,
1235 outErr,
1236 outPar,
1237 dummy_chi2,
1238 N_proc);
1239 }
1240
1241 void kalmanPropagateAndUpdatePlane(const MPlexLS& psErr,
1242 const MPlexLV& psPar,
1243 MPlexQI& Chg,
1244 const MPlexHS& msErr,
1245 const MPlexHV& msPar,
1246 const MPlexHV& plNrm,
1247 const MPlexHV& plDir,
1248 const MPlexHV& plPnt,
1249 MPlexLS& outErr,
1250 MPlexLV& outPar,
1251 MPlexQI& outFailFlag,
1252 const int N_proc,
1253 const PropagationFlags& propFlags,
1254 const bool propToHit) {
1255 if (propToHit) {
1256 MPlexLS propErr;
1257 MPlexLV propPar;
1258 propagateHelixToPlaneMPlex(psErr, psPar, Chg, plPnt, plNrm, propErr, propPar, outFailFlag, N_proc, propFlags);
1259
1260 kalmanOperationPlaneLocal(KFO_Update_Params | KFO_Local_Cov,
1261 propErr,
1262 propPar,
1263 Chg,
1264 msErr,
1265 msPar,
1266 plNrm,
1267 plDir,
1268 plPnt,
1269 outErr,
1270 outPar,
1271 dummy_chi2,
1272 N_proc);
1273 } else {
1274 kalmanOperationPlaneLocal(KFO_Update_Params | KFO_Local_Cov,
1275 psErr,
1276 psPar,
1277 Chg,
1278 msErr,
1279 msPar,
1280 plNrm,
1281 plDir,
1282 plPnt,
1283 outErr,
1284 outPar,
1285 dummy_chi2,
1286 N_proc);
1287 }
1288 for (int n = 0; n < NN; ++n) {
1289 if (outPar.At(n, 3, 0) < 0) {
1290 Chg.At(n, 0, 0) = -Chg.At(n, 0, 0);
1291 outPar.At(n, 3, 0) = -outPar.At(n, 3, 0);
1292 }
1293 }
1294 }
1295
1296
1297
1298 void kalmanComputeChi2Plane(const MPlexLS& psErr,
1299 const MPlexLV& psPar,
1300 const MPlexQI& inChg,
1301 const MPlexHS& msErr,
1302 const MPlexHV& msPar,
1303 const MPlexHV& plNrm,
1304 const MPlexHV& plDir,
1305 const MPlexHV& plPnt,
1306 MPlexQF& outChi2,
1307 const int N_proc) {
1308 kalmanOperationPlaneLocal(KFO_Calculate_Chi2,
1309 psErr,
1310 psPar,
1311 inChg,
1312 msErr,
1313 msPar,
1314 plNrm,
1315 plDir,
1316 plPnt,
1317 dummy_err,
1318 dummy_par,
1319 outChi2,
1320 N_proc);
1321 }
1322
1323 void kalmanPropagateAndComputeChi2Plane(const MPlexLS& psErr,
1324 const MPlexLV& psPar,
1325 const MPlexQI& inChg,
1326 const MPlexHS& msErr,
1327 const MPlexHV& msPar,
1328 const MPlexHV& plNrm,
1329 const MPlexHV& plDir,
1330 const MPlexHV& plPnt,
1331 MPlexQF& outChi2,
1332 MPlexLV& propPar,
1333 MPlexQI& outFailFlag,
1334 const int N_proc,
1335 const PropagationFlags& propFlags,
1336 const bool propToHit) {
1337 propPar = psPar;
1338 if (propToHit) {
1339 MPlexLS propErr;
1340 propagateHelixToPlaneMPlex(psErr, psPar, inChg, plPnt, plNrm, propErr, propPar, outFailFlag, N_proc, propFlags);
1341
1342 kalmanOperationPlaneLocal(KFO_Calculate_Chi2,
1343 propErr,
1344 propPar,
1345 inChg,
1346 msErr,
1347 msPar,
1348 plNrm,
1349 plDir,
1350 plPnt,
1351 dummy_err,
1352 dummy_par,
1353 outChi2,
1354 N_proc);
1355 } else {
1356 kalmanOperationPlaneLocal(KFO_Calculate_Chi2,
1357 psErr,
1358 psPar,
1359 inChg,
1360 msErr,
1361 msPar,
1362 plNrm,
1363 plDir,
1364 plPnt,
1365 dummy_err,
1366 dummy_par,
1367 outChi2,
1368 N_proc);
1369 }
1370 }
1371
1372
1373
1374 void kalmanOperationPlaneLocal(const int kfOp,
1375 const MPlexLS& psErr,
1376 const MPlexLV& psPar,
1377 const MPlexQI& inChg,
1378 const MPlexHS& msErr,
1379 const MPlexHV& msPar,
1380 const MPlexHV& plNrm,
1381 const MPlexHV& plDir,
1382 const MPlexHV& plPnt,
1383 MPlexLS& outErr,
1384 MPlexLV& outPar,
1385 MPlexQF& outChi2,
1386 const int N_proc) {
1387 #ifdef DEBUG
1388 {
1389 dmutex_guard;
1390 printf("psPar:\n");
1391 for (int i = 0; i < 6; ++i) {
1392 printf("%8f ", psPar.constAt(0, 0, i));
1393 printf("\n");
1394 }
1395 printf("\n");
1396 printf("psErr:\n");
1397 for (int i = 0; i < 6; ++i) {
1398 for (int j = 0; j < 6; ++j)
1399 printf("%8f ", psErr.constAt(0, i, j));
1400 printf("\n");
1401 }
1402 printf("\n");
1403 printf("msPar:\n");
1404 for (int i = 0; i < 3; ++i) {
1405 printf("%8f ", msPar.constAt(0, 0, i));
1406 printf("\n");
1407 }
1408 printf("\n");
1409 printf("msErr:\n");
1410 for (int i = 0; i < 3; ++i) {
1411 for (int j = 0; j < 3; ++j)
1412 printf("%8f ", msErr.constAt(0, i, j));
1413 printf("\n");
1414 }
1415 printf("\n");
1416 }
1417 #endif
1418
1419 MPlexHH rot;
1420 #pragma omp simd
1421 for (int n = 0; n < NN; ++n) {
1422 rot(n, 0, 0) = plDir(n, 0, 0);
1423 rot(n, 0, 1) = plDir(n, 1, 0);
1424 rot(n, 0, 2) = plDir(n, 2, 0);
1425 rot(n, 1, 0) = plNrm(n, 1, 0) * plDir(n, 2, 0) - plNrm(n, 2, 0) * plDir(n, 1, 0);
1426 rot(n, 1, 1) = plNrm(n, 2, 0) * plDir(n, 0, 0) - plNrm(n, 0, 0) * plDir(n, 2, 0);
1427 rot(n, 1, 2) = plNrm(n, 0, 0) * plDir(n, 1, 0) - plNrm(n, 1, 0) * plDir(n, 0, 0);
1428 rot(n, 2, 0) = plNrm(n, 0, 0);
1429 rot(n, 2, 1) = plNrm(n, 1, 0);
1430 rot(n, 2, 2) = plNrm(n, 2, 0);
1431 }
1432
1433
1434 MPlexHV xd;
1435 #pragma omp simd
1436 for (int n = 0; n < NN; ++n) {
1437 xd(n, 0, 0) = psPar(n, 0, 0) - plPnt(n, 0, 0);
1438 xd(n, 0, 1) = psPar(n, 0, 1) - plPnt(n, 0, 1);
1439 xd(n, 0, 2) = psPar(n, 0, 2) - plPnt(n, 0, 2);
1440 }
1441 MPlex2V xlo;
1442 RotateResidualsOnPlane(rot, xd, xlo);
1443
1444 MPlexQF sinP, sinT, cosP, cosT, pt;
1445 #pragma omp simd
1446 for (int n = 0; n < NN; ++n) {
1447 pt(n, 0, 0) = 1.f / psPar(n, 3, 0);
1448 sinP(n, 0, 0) = std::sin(psPar(n, 4, 0));
1449 cosP(n, 0, 0) = std::cos(psPar(n, 4, 0));
1450 sinT(n, 0, 0) = std::sin(psPar(n, 5, 0));
1451 cosT(n, 0, 0) = std::cos(psPar(n, 5, 0));
1452 }
1453
1454 MPlexHV pgl;
1455 #pragma omp simd
1456 for (int n = 0; n < NN; ++n) {
1457 pgl(n, 0, 0) = cosP(n, 0, 0) * pt(n, 0, 0);
1458 pgl(n, 0, 1) = sinP(n, 0, 0) * pt(n, 0, 0);
1459 pgl(n, 0, 2) = cosT(n, 0, 0) * pt(n, 0, 0) / sinT(n, 0, 0);
1460 }
1461
1462 MPlexHV plo;
1463 RotateVectorOnPlane(rot, pgl, plo);
1464 MPlex5V lp;
1465 #pragma omp simd
1466 for (int n = 0; n < NN; ++n) {
1467 lp(n, 0, 0) = inChg(n, 0, 0) * psPar(n, 3, 0) * sinT(n, 0, 0);
1468 lp(n, 0, 1) = plo(n, 0, 0) / plo(n, 0, 2);
1469 lp(n, 0, 2) = plo(n, 0, 1) / plo(n, 0, 2);
1470 lp(n, 0, 3) = xlo(n, 0, 0);
1471 lp(n, 0, 4) = xlo(n, 0, 1);
1472 }
1473 MPlexQI pzSign;
1474 #pragma omp simd
1475 for (int n = 0; n < NN; ++n) {
1476 pzSign(n, 0, 0) = plo(n, 0, 2) > 0.f ? 1 : -1;
1477 }
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516 MPlex56 jacCCS2Curv(0.f);
1517 #pragma omp simd
1518 for (int n = 0; n < NN; ++n) {
1519 jacCCS2Curv(n, 0, 3) = inChg(n, 0, 0) * sinT(n, 0, 0);
1520 jacCCS2Curv(n, 0, 5) = inChg(n, 0, 0) * cosT(n, 0, 0) * psPar(n, 3, 0);
1521 jacCCS2Curv(n, 1, 5) = -1.f;
1522 jacCCS2Curv(n, 2, 4) = 1.f;
1523 jacCCS2Curv(n, 3, 0) = -sinP(n, 0, 0);
1524 jacCCS2Curv(n, 3, 1) = cosP(n, 0, 0);
1525 jacCCS2Curv(n, 4, 0) = -cosP(n, 0, 0) * cosT(n, 0, 0);
1526 jacCCS2Curv(n, 4, 1) = -sinP(n, 0, 0) * cosT(n, 0, 0);
1527 jacCCS2Curv(n, 4, 2) = sinT(n, 0, 0);
1528 }
1529
1530
1531
1532 MPlexHV un;
1533 MPlexHV vn;
1534 #pragma omp simd
1535 for (int n = 0; n < NN; ++n) {
1536 const float abslp00 = std::abs(lp(n, 0, 0));
1537 vn(n, 0, 2) = std::max(1.e-30f, abslp00 * pt(n, 0, 0));
1538 un(n, 0, 0) = -pgl(n, 0, 1) * abslp00 / vn(n, 0, 2);
1539 un(n, 0, 1) = pgl(n, 0, 0) * abslp00 / vn(n, 0, 2);
1540 un(n, 0, 2) = 0.f;
1541 vn(n, 0, 0) = -pgl(n, 0, 2) * abslp00 * un(n, 0, 1);
1542 vn(n, 0, 1) = pgl(n, 0, 2) * abslp00 * un(n, 0, 0);
1543 }
1544 MPlexHV u;
1545 RotateVectorOnPlane(rot, un, u);
1546 MPlexHV v;
1547 RotateVectorOnPlane(rot, vn, v);
1548 MPlex55 jacCurv2Loc(0.f);
1549 #pragma omp simd
1550 for (int n = 0; n < NN; ++n) {
1551
1552 const float bF = 0.01f * Const::sol * Config::Bfield;
1553 const float qh2 = bF * lp(n, 0, 0);
1554 const float t1r = std::sqrt(1.f + lp(n, 0, 1) * lp(n, 0, 1) + lp(n, 0, 2) * lp(n, 0, 2)) * pzSign(n, 0, 0);
1555 const float t2r = t1r * t1r;
1556 const float t3r = t1r * t2r;
1557 jacCurv2Loc(n, 0, 0) = 1.f;
1558 jacCurv2Loc(n, 1, 1) = -u(n, 0, 1) * t2r;
1559 jacCurv2Loc(n, 1, 2) = v(n, 0, 1) * vn(n, 0, 2) * t2r;
1560 jacCurv2Loc(n, 2, 1) = u(n, 0, 0) * t2r;
1561 jacCurv2Loc(n, 2, 2) = -v(n, 0, 0) * vn(n, 0, 2) * t2r;
1562 jacCurv2Loc(n, 3, 3) = v(n, 0, 1) * t1r;
1563 jacCurv2Loc(n, 3, 4) = -u(n, 0, 1) * t1r;
1564 jacCurv2Loc(n, 4, 3) = -v(n, 0, 0) * t1r;
1565 jacCurv2Loc(n, 4, 4) = u(n, 0, 0) * t1r;
1566 const float cosz = -vn(n, 0, 2) * qh2;
1567 const float ui = u(n, 0, 2) * t3r;
1568 const float vi = v(n, 0, 2) * t3r;
1569 jacCurv2Loc(n, 1, 3) = -ui * v(n, 0, 1) * cosz;
1570 jacCurv2Loc(n, 1, 4) = -vi * v(n, 0, 1) * cosz;
1571 jacCurv2Loc(n, 2, 3) = ui * v(n, 0, 0) * cosz;
1572 jacCurv2Loc(n, 2, 4) = vi * v(n, 0, 0) * cosz;
1573
1574 }
1575
1576
1577 MPlex56 jacCCS2Loc;
1578 JacCCS2Loc(jacCurv2Loc, jacCCS2Curv, jacCCS2Loc);
1579
1580
1581 MPlex5S psErrLoc;
1582 MPlex56 temp56;
1583 PsErrLoc(jacCCS2Loc, psErr, temp56);
1584 PsErrLocTransp(temp56, jacCCS2Loc, psErrLoc);
1585
1586 MPlexHV md;
1587 #pragma omp simd
1588 for (int n = 0; n < NN; ++n) {
1589 md(n, 0, 0) = msPar(n, 0, 0) - plPnt(n, 0, 0);
1590 md(n, 0, 1) = msPar(n, 0, 1) - plPnt(n, 0, 1);
1591 md(n, 0, 2) = msPar(n, 0, 2) - plPnt(n, 0, 2);
1592 }
1593 MPlex2V mslo;
1594 RotateResidualsOnPlane(rot, md, mslo);
1595
1596 MPlex2V res_loc;
1597 #pragma omp simd
1598 for (int n = 0; n < NN; ++n) {
1599 res_loc(n, 0, 0) = mslo(n, 0, 0) - xlo(n, 0, 0);
1600 res_loc(n, 0, 1) = mslo(n, 0, 1) - xlo(n, 0, 1);
1601 }
1602
1603 MPlex2S msErr_loc;
1604 MPlex2H temp2Hmsl;
1605 ProjectResErr(rot, msErr, temp2Hmsl);
1606 ProjectResErrTransp(rot, temp2Hmsl, msErr_loc);
1607
1608 MPlex2S resErr_loc;
1609 #pragma omp simd
1610 for (int n = 0; n < NN; ++n) {
1611 resErr_loc(n, 0, 0) = psErrLoc(n, 3, 3) + msErr_loc(n, 0, 0);
1612 resErr_loc(n, 0, 1) = psErrLoc(n, 3, 4) + msErr_loc(n, 0, 1);
1613 resErr_loc(n, 1, 1) = psErrLoc(n, 4, 4) + msErr_loc(n, 1, 1);
1614 }
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675 Matriplex::invertCramerSym(resErr_loc);
1676
1677 if (kfOp & KFO_Calculate_Chi2) {
1678 Chi2Similarity(res_loc, resErr_loc, outChi2);
1679
1680 #ifdef DEBUG
1681 {
1682 dmutex_guard;
1683 printf("resErr_loc (Inv):\n");
1684 for (int i = 0; i < 2; ++i) {
1685 for (int j = 0; j < 2; ++j)
1686 printf("%8f ", resErr_loc.At(0, i, j));
1687 printf("\n");
1688 }
1689 printf("\n");
1690 printf("chi2: %8f\n", outChi2.At(0, 0, 0));
1691 }
1692 #endif
1693 }
1694
1695 if (kfOp & KFO_Update_Params) {
1696 MPlex52 K;
1697 #pragma omp simd
1698 for (int n = 0; n < NN; ++n) {
1699 #pragma GCC unroll 5
1700 for (int j = 0; j < 5; ++j) {
1701 K(n, j, 0) = resErr_loc(n, 0, 0) * psErrLoc(n, j, 3) + resErr_loc(n, 0, 1) * psErrLoc(n, j, 4);
1702 K(n, j, 1) = resErr_loc(n, 0, 1) * psErrLoc(n, j, 3) + resErr_loc(n, 1, 1) * psErrLoc(n, j, 4);
1703 }
1704 }
1705
1706 MPlex5V lp_upd;
1707 MultResidualsAdd(K, lp, res_loc, lp_upd);
1708
1709 MPlex55 ImKH(0.f);
1710 #pragma omp simd
1711 for (int n = 0; n < NN; ++n) {
1712 #pragma GCC unroll 5
1713 for (int j = 0; j < 5; ++j) {
1714 ImKH(n, j, j) = 1.f;
1715 ImKH(n, j, 3) -= K(n, j, 0);
1716 ImKH(n, j, 4) -= K(n, j, 1);
1717 }
1718 }
1719 MPlex5S psErrLoc_upd;
1720 PsErrLocUpd(ImKH, psErrLoc, psErrLoc_upd);
1721
1722
1723 MPlexHV lxu;
1724 MPlexHV lpu;
1725 #pragma omp simd
1726 for (int n = 0; n < NN; ++n) {
1727 lxu(n, 0, 0) = lp_upd(n, 0, 3);
1728 lxu(n, 0, 1) = lp_upd(n, 0, 4);
1729 lxu(n, 0, 2) = 0.f;
1730 lpu(n, 0, 2) =
1731 pzSign(n, 0, 0) / (std::max(std::abs(lp_upd(n, 0, 0)), 1.e-9f) *
1732 std::sqrt(1.f + lp_upd(n, 0, 1) * lp_upd(n, 0, 1) + lp_upd(n, 0, 2) * lp_upd(n, 0, 2)));
1733 lpu(n, 0, 0) = lpu(n, 0, 2) * lp_upd(n, 0, 1);
1734 lpu(n, 0, 1) = lpu(n, 0, 2) * lp_upd(n, 0, 2);
1735 }
1736 MPlexHV gxu;
1737 RotateVectorOnPlaneTransp(rot, lxu, gxu);
1738 #pragma omp simd
1739 for (int n = 0; n < NN; ++n) {
1740 gxu(n, 0, 0) += plPnt(n, 0, 0);
1741 gxu(n, 0, 1) += plPnt(n, 0, 1);
1742 gxu(n, 0, 2) += plPnt(n, 0, 2);
1743 }
1744 MPlexHV gpu;
1745 RotateVectorOnPlaneTransp(rot, lpu, gpu);
1746
1747 MPlexQF p;
1748 #pragma omp simd
1749 for (int n = 0; n < NN; ++n) {
1750 pt(n, 0, 0) = std::sqrt(gpu.At(n, 0, 0) * gpu.At(n, 0, 0) + gpu.At(n, 0, 1) * gpu.At(n, 0, 1));
1751 p(n, 0, 0) = std::sqrt(pt.At(n, 0, 0) * pt.At(n, 0, 0) + gpu.At(n, 0, 2) * gpu.At(n, 0, 2));
1752 sinP(n, 0, 0) = gpu.At(n, 0, 1) / pt(n, 0, 0);
1753 cosP(n, 0, 0) = gpu.At(n, 0, 0) / pt(n, 0, 0);
1754 sinT(n, 0, 0) = pt(n, 0, 0) / p(n, 0, 0);
1755 cosT(n, 0, 0) = gpu.At(n, 0, 2) / p(n, 0, 0);
1756 }
1757
1758 #pragma omp simd
1759 for (int n = 0; n < NN; ++n) {
1760 outPar(n, 0, 0) = gxu.At(n, 0, 0);
1761 outPar(n, 0, 1) = gxu.At(n, 0, 1);
1762 outPar(n, 0, 2) = gxu.At(n, 0, 2);
1763 outPar(n, 0, 3) = 1.f / pt(n, 0, 0);
1764 outPar(n, 0, 4) = getPhi(gpu.At(n, 0, 0), gpu.At(n, 0, 1));
1765 outPar(n, 0, 5) = getTheta(pt(n, 0, 0), gpu.At(n, 0, 2));
1766 }
1767
1768
1769
1770 MPlex65 jacCurv2CCS(0.f);
1771 #pragma omp simd
1772 for (int n = 0; n < NN; ++n) {
1773 jacCurv2CCS(n, 0, 3) = -sinP(n, 0, 0);
1774 jacCurv2CCS(n, 0, 4) = -cosT(n, 0, 0) * cosP(n, 0, 0);
1775 jacCurv2CCS(n, 1, 3) = cosP(n, 0, 0);
1776 jacCurv2CCS(n, 1, 4) = -cosT(n, 0, 0) * sinP(n, 0, 0);
1777 jacCurv2CCS(n, 2, 4) = sinT(n, 0, 0);
1778 jacCurv2CCS(n, 3, 0) = inChg(n, 0, 0) / sinT(n, 0, 0);
1779 jacCurv2CCS(n, 3, 1) = outPar(n, 3, 0) * cosT(n, 0, 0) / sinT(n, 0, 0);
1780 jacCurv2CCS(n, 4, 2) = 1.f;
1781 jacCurv2CCS(n, 5, 1) = -1.f;
1782 }
1783
1784
1785
1786 MPlexHV tnl;
1787 #pragma omp simd
1788 for (int n = 0; n < NN; ++n) {
1789 const float abslpupd00 = std::max(std::abs(lp_upd(n, 0, 0)), 1.e-9f);
1790 tnl(n, 0, 0) = lpu(n, 0, 0) * abslpupd00;
1791 tnl(n, 0, 1) = lpu(n, 0, 1) * abslpupd00;
1792 tnl(n, 0, 2) = lpu(n, 0, 2) * abslpupd00;
1793 }
1794 MPlexHV tn;
1795 RotateVectorOnPlaneTransp(rot, tnl, tn);
1796 #pragma omp simd
1797 for (int n = 0; n < NN; ++n) {
1798 vn(n, 0, 2) = std::max(1.e-30f, std::sqrt(tn(n, 0, 0) * tn(n, 0, 0) + tn(n, 0, 1) * tn(n, 0, 1)));
1799 un(n, 0, 0) = -tn(n, 0, 1) / vn(n, 0, 2);
1800 un(n, 0, 1) = tn(n, 0, 0) / vn(n, 0, 2);
1801 un(n, 0, 2) = 0.f;
1802 vn(n, 0, 0) = -tn(n, 0, 2) * un(n, 0, 1);
1803 vn(n, 0, 1) = tn(n, 0, 2) * un(n, 0, 0);
1804 }
1805 MPlex55 jacLoc2Curv(0.f);
1806 #pragma omp simd
1807 for (int n = 0; n < NN; ++n) {
1808
1809 const float bF = 0.01f * Const::sol * Config::Bfield;
1810 const float qh2 = bF * lp_upd(n, 0, 0);
1811 const float cosl1 = 1.f / vn(n, 0, 2);
1812 const float uj = un(n, 0, 0) * rot(n, 0, 0) + un(n, 0, 1) * rot(n, 0, 1);
1813 const float uk = un(n, 0, 0) * rot(n, 1, 0) + un(n, 0, 1) * rot(n, 1, 1);
1814 const float vj = vn(n, 0, 0) * rot(n, 0, 0) + vn(n, 0, 1) * rot(n, 0, 1) + vn(n, 0, 2) * rot(n, 0, 2);
1815 const float vk = vn(n, 0, 0) * rot(n, 1, 0) + vn(n, 0, 1) * rot(n, 1, 1) + vn(n, 0, 2) * rot(n, 1, 2);
1816 const float cosz = vn(n, 0, 2) * qh2;
1817 jacLoc2Curv(n, 0, 0) = 1.f;
1818 jacLoc2Curv(n, 1, 1) = tnl(n, 0, 2) * vj;
1819 jacLoc2Curv(n, 1, 2) = tnl(n, 0, 2) * vk;
1820 jacLoc2Curv(n, 2, 1) = tnl(n, 0, 2) * uj * cosl1;
1821 jacLoc2Curv(n, 2, 2) = tnl(n, 0, 2) * uk * cosl1;
1822 jacLoc2Curv(n, 3, 3) = uj;
1823 jacLoc2Curv(n, 3, 4) = uk;
1824 jacLoc2Curv(n, 4, 3) = vj;
1825 jacLoc2Curv(n, 4, 4) = vk;
1826 jacLoc2Curv(n, 2, 3) = tnl(n, 0, 0) * (cosz * cosl1);
1827 jacLoc2Curv(n, 2, 4) = tnl(n, 0, 1) * (cosz * cosl1);
1828 }
1829
1830
1831 MPlex65 jacLoc2CCS;
1832 JacLoc2CCS(jacCurv2CCS, jacLoc2Curv, jacLoc2CCS);
1833
1834
1835 MPlex65 temp65;
1836 OutErrCCS(jacLoc2CCS, psErrLoc_upd, temp65);
1837 OutErrCCSTransp(temp65, jacLoc2CCS, outErr);
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914 #ifdef DEBUG
1915 {
1916 dmutex_guard;
1917 if (kfOp & KFO_Local_Cov) {
1918 printf("psErrLoc_upd:\n");
1919 for (int i = 0; i < 5; ++i) {
1920 for (int j = 0; j < 5; ++j)
1921 printf("% 8e ", psErrLoc_upd.At(0, i, j));
1922 printf("\n");
1923 }
1924 printf("\n");
1925 }
1926 printf("resErr_loc (Inv):\n");
1927 for (int i = 0; i < 2; ++i) {
1928 for (int j = 0; j < 2; ++j)
1929 printf("%8f ", resErr_loc.At(0, i, j));
1930 printf("\n");
1931 }
1932 printf("\n");
1933 printf("K:\n");
1934 for (int i = 0; i < 6; ++i) {
1935 for (int j = 0; j < 2; ++j)
1936 printf("%8f ", K.At(0, i, j));
1937 printf("\n");
1938 }
1939 printf("\n");
1940 printf("outPar:\n");
1941 for (int i = 0; i < 6; ++i) {
1942 printf("%8f ", outPar.At(0, i, 0));
1943 }
1944 printf("\n");
1945 printf("outErr:\n");
1946 for (int i = 0; i < 6; ++i) {
1947 for (int j = 0; j < 6; ++j)
1948 printf("%8f ", outErr.At(0, i, j));
1949 printf("\n");
1950 }
1951 printf("\n");
1952 }
1953 #endif
1954 }
1955
1956 return;
1957 }
1958
1959
1960
1961 void kalmanOperationPlane(const int kfOp,
1962 const MPlexLS& psErr,
1963 const MPlexLV& psPar,
1964 const MPlexQI& inChg,
1965 const MPlexHS& msErr,
1966 const MPlexHV& msPar,
1967 const MPlexHV& plNrm,
1968 const MPlexHV& plDir,
1969 const MPlexHV& plPnt,
1970 MPlexLS& outErr,
1971 MPlexLV& outPar,
1972 MPlexQF& outChi2,
1973 const int N_proc) {
1974 #ifdef DEBUG
1975 {
1976 dmutex_guard;
1977 printf("psPar:\n");
1978 for (int i = 0; i < 6; ++i) {
1979 printf("%8f ", psPar.constAt(0, 0, i));
1980 printf("\n");
1981 }
1982 printf("\n");
1983 printf("psErr:\n");
1984 for (int i = 0; i < 6; ++i) {
1985 for (int j = 0; j < 6; ++j)
1986 printf("%8f ", psErr.constAt(0, i, j));
1987 printf("\n");
1988 }
1989 printf("\n");
1990 printf("msPar:\n");
1991 for (int i = 0; i < 3; ++i) {
1992 printf("%8f ", msPar.constAt(0, 0, i));
1993 printf("\n");
1994 }
1995 printf("\n");
1996 printf("msErr:\n");
1997 for (int i = 0; i < 3; ++i) {
1998 for (int j = 0; j < 3; ++j)
1999 printf("%8f ", msErr.constAt(0, i, j));
2000 printf("\n");
2001 }
2002 printf("\n");
2003 }
2004 #endif
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014 MPlex2H prj;
2015 for (int n = 0; n < NN; ++n) {
2016 prj(n, 0, 0) = plDir(n, 0, 0);
2017 prj(n, 0, 1) = plDir(n, 1, 0);
2018 prj(n, 0, 2) = plDir(n, 2, 0);
2019 prj(n, 1, 0) = plNrm(n, 1, 0) * plDir(n, 2, 0) - plNrm(n, 2, 0) * plDir(n, 1, 0);
2020 prj(n, 1, 1) = plNrm(n, 2, 0) * plDir(n, 0, 0) - plNrm(n, 0, 0) * plDir(n, 2, 0);
2021 prj(n, 1, 2) = plNrm(n, 0, 0) * plDir(n, 1, 0) - plNrm(n, 1, 0) * plDir(n, 0, 0);
2022 }
2023
2024 MPlexHV res_glo;
2025 SubtractFirst3(msPar, psPar, res_glo);
2026
2027 MPlexHS resErr_glo;
2028 AddIntoUpperLeft3x3(psErr, msErr, resErr_glo);
2029
2030 MPlex2V res_loc;
2031 RotateResidualsOnPlane(prj, res_glo, res_loc);
2032 MPlex2S resErr_loc;
2033 MPlex2H temp2H;
2034 ProjectResErr(prj, resErr_glo, temp2H);
2035 ProjectResErrTransp(prj, temp2H, resErr_loc);
2036
2037 #ifdef DEBUG
2038 {
2039 dmutex_guard;
2040 printf("prj:\n");
2041 for (int i = 0; i < 2; ++i) {
2042 for (int j = 0; j < 3; ++j)
2043 printf("%8f ", prj.At(0, i, j));
2044 printf("\n");
2045 }
2046 printf("\n");
2047 printf("res_glo:\n");
2048 for (int i = 0; i < 3; ++i) {
2049 printf("%8f ", res_glo.At(0, i, 0));
2050 }
2051 printf("\n");
2052 printf("resErr_glo:\n");
2053 for (int i = 0; i < 3; ++i) {
2054 for (int j = 0; j < 3; ++j)
2055 printf("%8f ", resErr_glo.At(0, i, j));
2056 printf("\n");
2057 }
2058 printf("\n");
2059 printf("res_loc:\n");
2060 for (int i = 0; i < 2; ++i) {
2061 printf("%8f ", res_loc.At(0, i, 0));
2062 }
2063 printf("\n");
2064 printf("temp2H:\n");
2065 for (int i = 0; i < 2; ++i) {
2066 for (int j = 0; j < 3; ++j)
2067 printf("%8f ", temp2H.At(0, i, j));
2068 printf("\n");
2069 }
2070 printf("\n");
2071 printf("resErr_loc:\n");
2072 for (int i = 0; i < 2; ++i) {
2073 for (int j = 0; j < 2; ++j)
2074 printf("%8f ", resErr_loc.At(0, i, j));
2075 printf("\n");
2076 }
2077 printf("\n");
2078 }
2079 #endif
2080
2081
2082 Matriplex::invertCramerSym(resErr_loc);
2083
2084 if (kfOp & KFO_Calculate_Chi2) {
2085 Chi2Similarity(res_loc, resErr_loc, outChi2);
2086
2087 #ifdef DEBUG
2088 {
2089 dmutex_guard;
2090 printf("resErr_loc (Inv):\n");
2091 for (int i = 0; i < 2; ++i) {
2092 for (int j = 0; j < 2; ++j)
2093 printf("%8f ", resErr_loc.At(0, i, j));
2094 printf("\n");
2095 }
2096 printf("\n");
2097 printf("chi2: %8f\n", outChi2.At(0, 0, 0));
2098 }
2099 #endif
2100 }
2101
2102 if (kfOp & KFO_Update_Params) {
2103 MPlexLS psErrLoc = psErr;
2104
2105 MPlexH2 tempH2;
2106 MPlexL2 K;
2107 KalmanHTG(prj, resErr_loc, tempH2);
2108 KalmanGain(psErrLoc, tempH2, K);
2109
2110 MultResidualsAdd(K, psPar, res_loc, outPar);
2111
2112 squashPhiMPlex(outPar, N_proc);
2113
2114 MPlexLL tempLL;
2115 KHMult(K, prj, tempLL);
2116 KHC(tempLL, psErrLoc, outErr);
2117 outErr.subtract(psErrLoc, outErr);
2118
2119 #ifdef DEBUG
2120 {
2121 dmutex_guard;
2122 if (kfOp & KFO_Local_Cov) {
2123 printf("psErrLoc:\n");
2124 for (int i = 0; i < 6; ++i) {
2125 for (int j = 0; j < 6; ++j)
2126 printf("% 8e ", psErrLoc.At(0, i, j));
2127 printf("\n");
2128 }
2129 printf("\n");
2130 }
2131 printf("resErr_loc (Inv):\n");
2132 for (int i = 0; i < 2; ++i) {
2133 for (int j = 0; j < 2; ++j)
2134 printf("%8f ", resErr_loc.At(0, i, j));
2135 printf("\n");
2136 }
2137 printf("\n");
2138 printf("tempH2:\n");
2139 for (int i = 0; i < 3; ++i) {
2140 for (int j = 0; j < 2; ++j)
2141 printf("%8f ", tempH2.At(0, i, j));
2142 printf("\n");
2143 }
2144 printf("\n");
2145 printf("K:\n");
2146 for (int i = 0; i < 6; ++i) {
2147 for (int j = 0; j < 2; ++j)
2148 printf("%8f ", K.At(0, i, j));
2149 printf("\n");
2150 }
2151 printf("\n");
2152 printf("tempLL:\n");
2153 for (int i = 0; i < 6; ++i) {
2154 for (int j = 0; j < 6; ++j)
2155 printf("%8f ", tempLL.At(0, i, j));
2156 printf("\n");
2157 }
2158 printf("\n");
2159 printf("outPar:\n");
2160 for (int i = 0; i < 6; ++i) {
2161 printf("%8f ", outPar.At(0, i, 0));
2162 }
2163 printf("\n");
2164 printf("outErr:\n");
2165 for (int i = 0; i < 6; ++i) {
2166 for (int j = 0; j < 6; ++j)
2167 printf("%8f ", outErr.At(0, i, j));
2168 printf("\n");
2169 }
2170 printf("\n");
2171 }
2172 #endif
2173 }
2174 }
2175
2176
2177
2178
2179
2180 void kalmanUpdateEndcap(const MPlexLS& psErr,
2181 const MPlexLV& psPar,
2182 const MPlexHS& msErr,
2183 const MPlexHV& msPar,
2184 MPlexLS& outErr,
2185 MPlexLV& outPar,
2186 const int N_proc) {
2187 kalmanOperationEndcap(KFO_Update_Params, psErr, psPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
2188 }
2189
2190 void kalmanPropagateAndUpdateEndcap(const MPlexLS& psErr,
2191 const MPlexLV& psPar,
2192 MPlexQI& Chg,
2193 const MPlexHS& msErr,
2194 const MPlexHV& msPar,
2195 MPlexLS& outErr,
2196 MPlexLV& outPar,
2197 MPlexQI& outFailFlag,
2198 const int N_proc,
2199 const PropagationFlags& propFlags,
2200 const bool propToHit) {
2201 if (propToHit) {
2202 MPlexLS propErr;
2203 MPlexLV propPar;
2204 MPlexQF msZ;
2205 #pragma omp simd
2206 for (int n = 0; n < NN; ++n) {
2207 msZ.At(n, 0, 0) = msPar.constAt(n, 2, 0);
2208 }
2209
2210 propagateHelixToZMPlex(psErr, psPar, Chg, msZ, propErr, propPar, outFailFlag, N_proc, propFlags);
2211
2212 kalmanOperationEndcap(KFO_Update_Params, propErr, propPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
2213 } else {
2214 kalmanOperationEndcap(KFO_Update_Params, psErr, psPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
2215 }
2216 for (int n = 0; n < NN; ++n) {
2217 if (n < N_proc && outPar.At(n, 3, 0) < 0) {
2218 Chg.At(n, 0, 0) = -Chg.At(n, 0, 0);
2219 outPar.At(n, 3, 0) = -outPar.At(n, 3, 0);
2220 }
2221 }
2222 }
2223
2224
2225
2226 void kalmanComputeChi2Endcap(const MPlexLS& psErr,
2227 const MPlexLV& psPar,
2228 const MPlexQI& inChg,
2229 const MPlexHS& msErr,
2230 const MPlexHV& msPar,
2231 MPlexQF& outChi2,
2232 const int N_proc) {
2233 kalmanOperationEndcap(KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
2234 }
2235
2236 void kalmanPropagateAndComputeChi2Endcap(const MPlexLS& psErr,
2237 const MPlexLV& psPar,
2238 const MPlexQI& inChg,
2239 const MPlexHS& msErr,
2240 const MPlexHV& msPar,
2241 MPlexQF& outChi2,
2242 MPlexLV& propPar,
2243 MPlexQI& outFailFlag,
2244 const int N_proc,
2245 const PropagationFlags& propFlags,
2246 const bool propToHit) {
2247 propPar = psPar;
2248 if (propToHit) {
2249 MPlexLS propErr;
2250 MPlexQF msZ;
2251 #pragma omp simd
2252 for (int n = 0; n < NN; ++n) {
2253 msZ.At(n, 0, 0) = msPar.constAt(n, 2, 0);
2254 }
2255
2256 propagateHelixToZMPlex(psErr, psPar, inChg, msZ, propErr, propPar, outFailFlag, N_proc, propFlags);
2257
2258 kalmanOperationEndcap(KFO_Calculate_Chi2, propErr, propPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
2259 } else {
2260 kalmanOperationEndcap(KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
2261 }
2262 }
2263
2264
2265
2266 void kalmanOperationEndcap(const int kfOp,
2267 const MPlexLS& psErr,
2268 const MPlexLV& psPar,
2269 const MPlexHS& msErr,
2270 const MPlexHV& msPar,
2271 MPlexLS& outErr,
2272 MPlexLV& outPar,
2273 MPlexQF& outChi2,
2274 const int N_proc) {
2275 #ifdef DEBUG
2276 {
2277 dmutex_guard;
2278 printf("updateParametersEndcapMPlex\n");
2279 printf("psPar:\n");
2280 for (int i = 0; i < 6; ++i) {
2281 printf("%8f ", psPar.constAt(0, 0, i));
2282 printf("\n");
2283 }
2284 printf("\n");
2285 printf("msPar:\n");
2286 for (int i = 0; i < 3; ++i) {
2287 printf("%8f ", msPar.constAt(0, 0, i));
2288 printf("\n");
2289 }
2290 printf("\n");
2291 printf("psErr:\n");
2292 for (int i = 0; i < 6; ++i) {
2293 for (int j = 0; j < 6; ++j)
2294 printf("%8f ", psErr.constAt(0, i, j));
2295 printf("\n");
2296 }
2297 printf("\n");
2298 printf("msErr:\n");
2299 for (int i = 0; i < 3; ++i) {
2300 for (int j = 0; j < 3; ++j)
2301 printf("%8f ", msErr.constAt(0, i, j));
2302 printf("\n");
2303 }
2304 printf("\n");
2305 }
2306 #endif
2307
2308 MPlex2V res;
2309 SubtractFirst2(msPar, psPar, res);
2310
2311 MPlex2S resErr;
2312 AddIntoUpperLeft2x2(psErr, msErr, resErr);
2313
2314 #ifdef DEBUG
2315 {
2316 dmutex_guard;
2317 printf("resErr:\n");
2318 for (int i = 0; i < 2; ++i) {
2319 for (int j = 0; j < 2; ++j)
2320 printf("%8f ", resErr.At(0, i, j));
2321 printf("\n");
2322 }
2323 printf("\n");
2324 }
2325 #endif
2326
2327
2328 Matriplex::invertCramerSym(resErr);
2329
2330 if (kfOp & KFO_Calculate_Chi2) {
2331 Chi2Similarity(res, resErr, outChi2);
2332
2333 #ifdef DEBUG
2334 {
2335 dmutex_guard;
2336 printf("resErr_loc (Inv):\n");
2337 for (int i = 0; i < 2; ++i) {
2338 for (int j = 0; j < 2; ++j)
2339 printf("%8f ", resErr.At(0, i, j));
2340 printf("\n");
2341 }
2342 printf("\n");
2343 printf("chi2: %8f\n", outChi2.At(0, 0, 0));
2344 }
2345 #endif
2346 }
2347
2348 if (kfOp & KFO_Update_Params) {
2349 MPlexL2 K;
2350 KalmanGain(psErr, resErr, K);
2351
2352 MultResidualsAdd(K, psPar, res, outPar);
2353
2354 squashPhiMPlex(outPar, N_proc);
2355
2356 KHC(K, psErr, outErr);
2357
2358 #ifdef DEBUG
2359 {
2360 dmutex_guard;
2361 printf("outErr before subtract:\n");
2362 for (int i = 0; i < 6; ++i) {
2363 for (int j = 0; j < 6; ++j)
2364 printf("%8f ", outErr.At(0, i, j));
2365 printf("\n");
2366 }
2367 printf("\n");
2368 }
2369 #endif
2370
2371 outErr.subtract(psErr, outErr);
2372
2373 #ifdef DEBUG
2374 {
2375 dmutex_guard;
2376 printf("res:\n");
2377 for (int i = 0; i < 2; ++i) {
2378 printf("%8f ", res.At(0, i, 0));
2379 }
2380 printf("\n");
2381 printf("resErr (Inv):\n");
2382 for (int i = 0; i < 2; ++i) {
2383 for (int j = 0; j < 2; ++j)
2384 printf("%8f ", resErr.At(0, i, j));
2385 printf("\n");
2386 }
2387 printf("\n");
2388 printf("K:\n");
2389 for (int i = 0; i < 6; ++i) {
2390 for (int j = 0; j < 2; ++j)
2391 printf("%8f ", K.At(0, i, j));
2392 printf("\n");
2393 }
2394 printf("\n");
2395 printf("outPar:\n");
2396 for (int i = 0; i < 6; ++i) {
2397 printf("%8f ", outPar.At(0, i, 0));
2398 }
2399 printf("\n");
2400 printf("outErr:\n");
2401 for (int i = 0; i < 6; ++i) {
2402 for (int j = 0; j < 6; ++j)
2403 printf("%8f ", outErr.At(0, i, j));
2404 printf("\n");
2405 }
2406 printf("\n");
2407 }
2408 #endif
2409 }
2410 }
2411
2412 }