Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2022-08-17 23:57:48

0001 #include "KalmanUtilsMPlex.h"
0002 #include "PropagationMPlex.h"
0003 
0004 //#define DEBUG
0005 #include "Debug.h"
0006 
0007 #include "KalmanUtilsMPlex.icc"
0008 
0009 #include "RecoTracker/MkFitCore/interface/cms_common_macros.h"
0010 
0011 namespace {
0012   using namespace mkfit;
0013   using idx_t = Matriplex::idx_t;
0014 
0015   inline void MultResidualsAdd(const MPlexLH& A, const MPlexLV& B, const MPlex2V& C, MPlexLV& D) {
0016     // outPar = psPar + kalmanGain*(dPar)
0017     //   D    =   B         A         C
0018     // where right half of kalman gain is 0
0019 
0020     // XXX Regenerate with a script.
0021 
0022     MultResidualsAdd_imp(A, B, C, D, 0, NN);
0023   }
0024 
0025   inline void MultResidualsAdd(const MPlexL2& A, const MPlexLV& B, const MPlex2V& C, MPlexLV& D) {
0026     // outPar = psPar + kalmanGain*(dPar)
0027     //   D    =   B         A         C
0028     // where right half of kalman gain is 0
0029 
0030     // XXX Regenerate with a script.
0031 
0032     typedef float T;
0033     const idx_t N = NN;
0034 
0035     const T* a = A.fArray;
0036     ASSUME_ALIGNED(a, 64);
0037     const T* b = B.fArray;
0038     ASSUME_ALIGNED(b, 64);
0039     const T* c = C.fArray;
0040     ASSUME_ALIGNED(c, 64);
0041     T* d = D.fArray;
0042     ASSUME_ALIGNED(d, 64);
0043 
0044 #pragma omp simd
0045     for (idx_t n = 0; n < N; ++n) {
0046       // generate loop (can also write it manually this time, it's not much)
0047       d[0 * N + n] = b[0 * N + n] + a[0 * N + n] * c[0 * N + n] + a[1 * N + n] * c[1 * N + n];
0048       d[1 * N + n] = b[1 * N + n] + a[2 * N + n] * c[0 * N + n] + a[3 * N + n] * c[1 * N + n];
0049       d[2 * N + n] = b[2 * N + n] + a[4 * N + n] * c[0 * N + n] + a[5 * N + n] * c[1 * N + n];
0050       d[3 * N + n] = b[3 * N + n] + a[6 * N + n] * c[0 * N + n] + a[7 * N + n] * c[1 * N + n];
0051       d[4 * N + n] = b[4 * N + n] + a[8 * N + n] * c[0 * N + n] + a[9 * N + n] * c[1 * N + n];
0052       d[5 * N + n] = b[5 * N + n] + a[10 * N + n] * c[0 * N + n] + a[11 * N + n] * c[1 * N + n];
0053     }
0054   }
0055 
0056   //------------------------------------------------------------------------------
0057 
0058   inline void Chi2Similarity(const MPlex2V& A,  //resPar
0059                              const MPlex2S& C,  //resErr
0060                              MPlexQF& D)        //outChi2
0061   {
0062     // outChi2 = (resPar) * resErr * (resPar)
0063     //   D     =    A      *    C   *      A
0064 
0065     typedef float T;
0066     const idx_t N = NN;
0067 
0068     const T* a = A.fArray;
0069     ASSUME_ALIGNED(a, 64);
0070     const T* c = C.fArray;
0071     ASSUME_ALIGNED(c, 64);
0072     T* d = D.fArray;
0073     ASSUME_ALIGNED(d, 64);
0074 
0075 #pragma omp simd
0076     for (idx_t n = 0; n < N; ++n) {
0077       // generate loop (can also write it manually this time, it's not much)
0078       d[0 * N + n] = c[0 * N + n] * a[0 * N + n] * a[0 * N + n] + c[2 * N + n] * a[1 * N + n] * a[1 * N + n] +
0079                      2 * (c[1 * N + n] * a[1 * N + n] * a[0 * N + n]);
0080     }
0081   }
0082 
0083   //------------------------------------------------------------------------------
0084 
0085   inline void AddIntoUpperLeft3x3(const MPlexLS& A, const MPlexHS& B, MPlexHS& C) {
0086     // The rest of matrix is left untouched.
0087 
0088     typedef float T;
0089     const idx_t N = NN;
0090 
0091     const T* a = A.fArray;
0092     ASSUME_ALIGNED(a, 64);
0093     const T* b = B.fArray;
0094     ASSUME_ALIGNED(b, 64);
0095     T* c = C.fArray;
0096     ASSUME_ALIGNED(c, 64);
0097 
0098 #pragma omp simd
0099     for (idx_t n = 0; n < N; ++n) {
0100       c[0 * N + n] = a[0 * N + n] + b[0 * N + n];
0101       c[1 * N + n] = a[1 * N + n] + b[1 * N + n];
0102       c[2 * N + n] = a[2 * N + n] + b[2 * N + n];
0103       c[3 * N + n] = a[3 * N + n] + b[3 * N + n];
0104       c[4 * N + n] = a[4 * N + n] + b[4 * N + n];
0105       c[5 * N + n] = a[5 * N + n] + b[5 * N + n];
0106     }
0107   }
0108 
0109   inline void AddIntoUpperLeft2x2(const MPlexLS& A, const MPlexHS& B, MPlex2S& C) {
0110     // The rest of matrix is left untouched.
0111 
0112     typedef float T;
0113     const idx_t N = NN;
0114 
0115     const T* a = A.fArray;
0116     ASSUME_ALIGNED(a, 64);
0117     const T* b = B.fArray;
0118     ASSUME_ALIGNED(b, 64);
0119     T* c = C.fArray;
0120     ASSUME_ALIGNED(c, 64);
0121 
0122 #pragma omp simd
0123     for (idx_t n = 0; n < N; ++n) {
0124       c[0 * N + n] = a[0 * N + n] + b[0 * N + n];
0125       c[1 * N + n] = a[1 * N + n] + b[1 * N + n];
0126       c[2 * N + n] = a[2 * N + n] + b[2 * N + n];
0127     }
0128   }
0129 
0130   //------------------------------------------------------------------------------
0131 
0132   inline void SubtractFirst3(const MPlexHV& A, const MPlexLV& B, MPlexHV& C) {
0133     // The rest of matrix is left untouched.
0134 
0135     typedef float T;
0136     const idx_t N = NN;
0137 
0138     const T* a = A.fArray;
0139     ASSUME_ALIGNED(a, 64);
0140     const T* b = B.fArray;
0141     ASSUME_ALIGNED(b, 64);
0142     T* c = C.fArray;
0143     ASSUME_ALIGNED(c, 64);
0144 
0145 #pragma omp simd
0146     for (idx_t n = 0; n < N; ++n) {
0147       c[0 * N + n] = a[0 * N + n] - b[0 * N + n];
0148       c[1 * N + n] = a[1 * N + n] - b[1 * N + n];
0149       c[2 * N + n] = a[2 * N + n] - b[2 * N + n];
0150     }
0151   }
0152 
0153   inline void SubtractFirst2(const MPlexHV& A, const MPlexLV& B, MPlex2V& C) {
0154     // The rest of matrix is left untouched.
0155 
0156     typedef float T;
0157     const idx_t N = NN;
0158 
0159     const T* a = A.fArray;
0160     ASSUME_ALIGNED(a, 64);
0161     const T* b = B.fArray;
0162     ASSUME_ALIGNED(b, 64);
0163     T* c = C.fArray;
0164     ASSUME_ALIGNED(c, 64);
0165 
0166 #pragma omp simd
0167     for (idx_t n = 0; n < N; ++n) {
0168       c[0 * N + n] = a[0 * N + n] - b[0 * N + n];
0169       c[1 * N + n] = a[1 * N + n] - b[1 * N + n];
0170     }
0171   }
0172 
0173   //==============================================================================
0174 
0175   inline void ProjectResErr(const MPlexQF& A00, const MPlexQF& A01, const MPlexHS& B, MPlexHH& C) {
0176     // C = A * B, C is 3x3, A is 3x3 , B is 3x3 sym
0177 
0178     // Based on script generation and adapted to custom sizes.
0179 
0180     typedef float T;
0181     const idx_t N = NN;
0182 
0183     const T* a00 = A00.fArray;
0184     ASSUME_ALIGNED(a00, 64);
0185     const T* a01 = A01.fArray;
0186     ASSUME_ALIGNED(a01, 64);
0187     const T* b = B.fArray;
0188     ASSUME_ALIGNED(b, 64);
0189     T* c = C.fArray;
0190     ASSUME_ALIGNED(c, 64);
0191 
0192 #pragma omp simd
0193     for (int n = 0; n < N; ++n) {
0194       c[0 * N + n] = a00[n] * b[0 * N + n] + a01[n] * b[1 * N + n];
0195       c[1 * N + n] = a00[n] * b[1 * N + n] + a01[n] * b[2 * N + n];
0196       c[2 * N + n] = a00[n] * b[3 * N + n] + a01[n] * b[4 * N + n];
0197       c[3 * N + n] = b[3 * N + n];
0198       c[4 * N + n] = b[4 * N + n];
0199       c[5 * N + n] = b[5 * N + n];
0200       c[6 * N + n] = a01[n] * b[0 * N + n] - a00[n] * b[1 * N + n];
0201       c[7 * N + n] = a01[n] * b[1 * N + n] - a00[n] * b[2 * N + n];
0202       c[8 * N + n] = a01[n] * b[3 * N + n] - a00[n] * b[4 * N + n];
0203     }
0204   }
0205 
0206   inline void ProjectResErrTransp(const MPlexQF& A00, const MPlexQF& A01, const MPlexHH& B, MPlex2S& C) {
0207     // C = A * B, C is 3x3 sym, A is 3x3 , B is 3x3
0208 
0209     // Based on script generation and adapted to custom sizes.
0210 
0211     typedef float T;
0212     const idx_t N = NN;
0213 
0214     const T* a00 = A00.fArray;
0215     ASSUME_ALIGNED(a00, 64);
0216     const T* a01 = A01.fArray;
0217     ASSUME_ALIGNED(a01, 64);
0218     const T* b = B.fArray;
0219     ASSUME_ALIGNED(b, 64);
0220     T* c = C.fArray;
0221     ASSUME_ALIGNED(c, 64);
0222 
0223 #pragma omp simd
0224     for (int n = 0; n < N; ++n) {
0225       c[0 * N + n] = b[0 * N + n] * a00[n] + b[1 * N + n] * a01[n];
0226       c[1 * N + n] = b[3 * N + n] * a00[n] + b[4 * N + n] * a01[n];
0227       c[2 * N + n] = b[5 * N + n];
0228     }
0229   }
0230 
0231   inline void RotateResidualsOnTangentPlane(const MPlexQF& R00,  //r00
0232                                             const MPlexQF& R01,  //r01
0233                                             const MPlexHV& A,    //res_glo
0234                                             MPlex2V& B)          //res_loc
0235   {
0236     RotateResidualsOnTangentPlane_impl(R00, R01, A, B, 0, NN);
0237   }
0238 
0239   inline void KalmanHTG(const MPlexQF& A00, const MPlexQF& A01, const MPlex2S& B, MPlexHH& C) {
0240     // HTG  = rot * res_loc
0241     //   C  =  A  *    B
0242 
0243     // Based on script generation and adapted to custom sizes.
0244 
0245     typedef float T;
0246     const idx_t N = NN;
0247 
0248     const T* a00 = A00.fArray;
0249     ASSUME_ALIGNED(a00, 64);
0250     const T* a01 = A01.fArray;
0251     ASSUME_ALIGNED(a01, 64);
0252     const T* b = B.fArray;
0253     ASSUME_ALIGNED(b, 64);
0254     T* c = C.fArray;
0255     ASSUME_ALIGNED(c, 64);
0256 
0257 #pragma omp simd
0258     for (int n = 0; n < N; ++n) {
0259       c[0 * N + n] = a00[n] * b[0 * N + n];
0260       c[1 * N + n] = a00[n] * b[1 * N + n];
0261       c[2 * N + n] = 0.;
0262       c[3 * N + n] = a01[n] * b[0 * N + n];
0263       c[4 * N + n] = a01[n] * b[1 * N + n];
0264       c[5 * N + n] = 0.;
0265       c[6 * N + n] = b[1 * N + n];
0266       c[7 * N + n] = b[2 * N + n];
0267       c[8 * N + n] = 0.;
0268     }
0269   }
0270 
0271   inline void KalmanGain(const MPlexLS& A, const MPlexHH& B, MPlexLH& C) {
0272     // C = A * B, C is 6x3, A is 6x6 sym , B is 3x3
0273 
0274     typedef float T;
0275     const idx_t N = NN;
0276 
0277     const T* a = A.fArray;
0278     ASSUME_ALIGNED(a, 64);
0279     const T* b = B.fArray;
0280     ASSUME_ALIGNED(b, 64);
0281     T* c = C.fArray;
0282     ASSUME_ALIGNED(c, 64);
0283 
0284 #pragma omp simd
0285     for (int n = 0; n < N; ++n) {
0286       c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[1 * N + n] * b[3 * N + n] + a[3 * N + n] * b[6 * N + n];
0287       c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[1 * N + n] * b[4 * N + n] + a[3 * N + n] * b[7 * N + n];
0288       c[2 * N + n] = 0;
0289       c[3 * N + n] = a[1 * N + n] * b[0 * N + n] + a[2 * N + n] * b[3 * N + n] + a[4 * N + n] * b[6 * N + n];
0290       c[4 * N + n] = a[1 * N + n] * b[1 * N + n] + a[2 * N + n] * b[4 * N + n] + a[4 * N + n] * b[7 * N + n];
0291       c[5 * N + n] = 0;
0292       c[6 * N + n] = a[3 * N + n] * b[0 * N + n] + a[4 * N + n] * b[3 * N + n] + a[5 * N + n] * b[6 * N + n];
0293       c[7 * N + n] = a[3 * N + n] * b[1 * N + n] + a[4 * N + n] * b[4 * N + n] + a[5 * N + n] * b[7 * N + n];
0294       c[8 * N + n] = 0;
0295       c[9 * N + n] = a[6 * N + n] * b[0 * N + n] + a[7 * N + n] * b[3 * N + n] + a[8 * N + n] * b[6 * N + n];
0296       c[10 * N + n] = a[6 * N + n] * b[1 * N + n] + a[7 * N + n] * b[4 * N + n] + a[8 * N + n] * b[7 * N + n];
0297       c[11 * N + n] = 0;
0298       c[12 * N + n] = a[10 * N + n] * b[0 * N + n] + a[11 * N + n] * b[3 * N + n] + a[12 * N + n] * b[6 * N + n];
0299       c[13 * N + n] = a[10 * N + n] * b[1 * N + n] + a[11 * N + n] * b[4 * N + n] + a[12 * N + n] * b[7 * N + n];
0300       c[14 * N + n] = 0;
0301       c[15 * N + n] = a[15 * N + n] * b[0 * N + n] + a[16 * N + n] * b[3 * N + n] + a[17 * N + n] * b[6 * N + n];
0302       c[16 * N + n] = a[15 * N + n] * b[1 * N + n] + a[16 * N + n] * b[4 * N + n] + a[17 * N + n] * b[7 * N + n];
0303       c[17 * N + n] = 0;
0304     }
0305   }
0306 
0307   inline void CovXYconstrain(const MPlexQF& R00, const MPlexQF& R01, const MPlexLS& Ci, MPlexLS& Co) {
0308     // C is transformed to align along y after rotation and rotated back
0309 
0310     typedef float T;
0311     const idx_t N = NN;
0312 
0313     const T* r00 = R00.fArray;
0314     ASSUME_ALIGNED(r00, 64);
0315     const T* r01 = R01.fArray;
0316     ASSUME_ALIGNED(r01, 64);
0317     const T* ci = Ci.fArray;
0318     ASSUME_ALIGNED(ci, 64);
0319     T* co = Co.fArray;
0320     ASSUME_ALIGNED(co, 64);
0321 
0322 #pragma omp simd
0323     for (int n = 0; n < N; ++n) {
0324       // a bit loopy to avoid temporaries
0325       co[0 * N + n] =
0326           r00[n] * r00[n] * ci[0 * N + n] + 2 * r00[n] * r01[n] * ci[1 * N + n] + r01[n] * r01[n] * ci[2 * N + n];
0327       co[1 * N + n] = r00[n] * r01[n] * co[0 * N + n];
0328       co[2 * N + n] = r01[n] * r01[n] * co[0 * N + n];
0329       co[0 * N + n] = r00[n] * r00[n] * co[0 * N + n];
0330 
0331       co[3 * N + n] = r00[n] * ci[3 * N + n] + r01[n] * ci[4 * N + n];
0332       co[4 * N + n] = r01[n] * co[3 * N + n];
0333       co[3 * N + n] = r00[n] * co[3 * N + n];
0334 
0335       co[6 * N + n] = r00[n] * ci[6 * N + n] + r01[n] * ci[7 * N + n];
0336       co[7 * N + n] = r01[n] * co[6 * N + n];
0337       co[6 * N + n] = r00[n] * co[6 * N + n];
0338 
0339       co[10 * N + n] = r00[n] * ci[10 * N + n] + r01[n] * ci[11 * N + n];
0340       co[11 * N + n] = r01[n] * co[10 * N + n];
0341       co[10 * N + n] = r00[n] * co[10 * N + n];
0342 
0343       co[15 * N + n] = r00[n] * ci[15 * N + n] + r01[n] * ci[16 * N + n];
0344       co[16 * N + n] = r01[n] * co[15 * N + n];
0345       co[15 * N + n] = r00[n] * co[15 * N + n];
0346     }
0347   }
0348 
0349   void KalmanGain(const MPlexLS& A, const MPlex2S& B, MPlexL2& C) {
0350     // C = A * B, C is 6x2, A is 6x6 sym , B is 2x2
0351 
0352     typedef float T;
0353     const idx_t N = NN;
0354 
0355     const T* a = A.fArray;
0356     ASSUME_ALIGNED(a, 64);
0357     const T* b = B.fArray;
0358     ASSUME_ALIGNED(b, 64);
0359     T* c = C.fArray;
0360     ASSUME_ALIGNED(c, 64);
0361 
0362 #include "KalmanGain62.ah"
0363   }
0364 
0365   inline void KHMult(const MPlexLH& A, const MPlexQF& B00, const MPlexQF& B01, MPlexLL& C) {
0366     // C = A * B, C is 6x6, A is 6x3 , B is 3x6
0367     KHMult_imp(A, B00, B01, C, 0, NN);
0368   }
0369 
0370   inline void KHC(const MPlexLL& A, const MPlexLS& B, MPlexLS& C) {
0371     // C = A * B, C is 6x6, A is 6x6 , B is 6x6 sym
0372 
0373     typedef float T;
0374     const idx_t N = NN;
0375 
0376     const T* a = A.fArray;
0377     ASSUME_ALIGNED(a, 64);
0378     const T* b = B.fArray;
0379     ASSUME_ALIGNED(b, 64);
0380     T* c = C.fArray;
0381     ASSUME_ALIGNED(c, 64);
0382 
0383 #include "KHC.ah"
0384   }
0385 
0386   inline void KHC(const MPlexL2& A, const MPlexLS& B, MPlexLS& C) {
0387     // C = A * B, C is 6x6 sym, A is 6x2 , B is 6x6 sym
0388 
0389     typedef float T;
0390     const idx_t N = NN;
0391 
0392     const T* a = A.fArray;
0393     ASSUME_ALIGNED(a, 64);
0394     const T* b = B.fArray;
0395     ASSUME_ALIGNED(b, 64);
0396     T* c = C.fArray;
0397     ASSUME_ALIGNED(c, 64);
0398 
0399 #include "K62HC.ah"
0400   }
0401 
0402   //Warning: MultFull is not vectorized!
0403   template <typename T1, typename T2, typename T3>
0404   void MultFull(const T1& A, int nia, int nja, const T2& B, int nib, int njb, T3& C, int nic, int njc) {
0405 #ifdef DEBUG
0406     assert(nja == nib);
0407     assert(nia == nic);
0408     assert(njb == njc);
0409 #endif
0410     for (int n = 0; n < NN; ++n) {
0411       for (int i = 0; i < nia; ++i) {
0412         for (int j = 0; j < njb; ++j) {
0413           C(n, i, j) = 0.;
0414           for (int k = 0; k < nja; ++k)
0415             C(n, i, j) += A.constAt(n, i, k) * B.constAt(n, k, j);
0416         }
0417       }
0418     }
0419   }
0420 
0421   //Warning: MultTranspFull is not vectorized!
0422   // (careful about which one is transposed, I think rows and cols are swapped and the one that is transposed is A)
0423   template <typename T1, typename T2, typename T3>
0424   void MultTranspFull(const T1& A, int nia, int nja, const T2& B, int nib, int njb, T3& C, int nic, int njc) {
0425 #ifdef DEBUG
0426     assert(nja == njb);
0427     assert(nia == nic);
0428     assert(nib == njc);
0429 #endif
0430     for (int n = 0; n < NN; ++n) {
0431       for (int i = 0; i < nia; ++i) {
0432         for (int j = 0; j < nib; ++j) {
0433           C(n, i, j) = 0.;
0434           for (int k = 0; k < nja; ++k)
0435             C(n, i, j) += A.constAt(n, i, k) * B.constAt(n, j, k);
0436         }
0437       }
0438     }
0439   }
0440 
0441 }  // namespace
0442 
0443 //==============================================================================
0444 // Kalman operations - common dummy variables
0445 //==============================================================================
0446 
0447 namespace {
0448   // Dummy variables for parameter consistency to kalmanOperation.
0449   // Through KalmanFilterOperation enum parameter it is guaranteed that
0450   // those will never get accessed in the code (read from or written into).
0451 
0452   CMS_SA_ALLOW mkfit::MPlexLS dummy_err;
0453   CMS_SA_ALLOW mkfit::MPlexLV dummy_par;
0454   CMS_SA_ALLOW mkfit::MPlexQF dummy_chi2;
0455 }  // namespace
0456 
0457 namespace mkfit {
0458 
0459   //==============================================================================
0460   // Kalman operations - Barrel
0461   //==============================================================================
0462 
0463   void kalmanUpdate(const MPlexLS& psErr,
0464                     const MPlexLV& psPar,
0465                     const MPlexHS& msErr,
0466                     const MPlexHV& msPar,
0467                     MPlexLS& outErr,
0468                     MPlexLV& outPar,
0469                     const int N_proc) {
0470     kalmanOperation(KFO_Update_Params | KFO_Local_Cov, psErr, psPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
0471   }
0472 
0473   void kalmanPropagateAndUpdate(const MPlexLS& psErr,
0474                                 const MPlexLV& psPar,
0475                                 MPlexQI& Chg,
0476                                 const MPlexHS& msErr,
0477                                 const MPlexHV& msPar,
0478                                 MPlexLS& outErr,
0479                                 MPlexLV& outPar,
0480                                 const int N_proc,
0481                                 const PropagationFlags propFlags,
0482                                 const bool propToHit) {
0483     if (propToHit) {
0484       MPlexLS propErr;
0485       MPlexLV propPar;
0486       MPlexQF msRad;
0487 #pragma omp simd
0488       for (int n = 0; n < NN; ++n) {
0489         msRad.At(n, 0, 0) = std::hypot(msPar.constAt(n, 0, 0), msPar.constAt(n, 1, 0));
0490       }
0491 
0492       propagateHelixToRMPlex(psErr, psPar, Chg, msRad, propErr, propPar, N_proc, propFlags);
0493 
0494       kalmanOperation(
0495           KFO_Update_Params | KFO_Local_Cov, propErr, propPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
0496     } else {
0497       kalmanOperation(
0498           KFO_Update_Params | KFO_Local_Cov, psErr, psPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
0499     }
0500     for (int n = 0; n < NN; ++n) {
0501       if (outPar.At(n, 3, 0) < 0) {
0502         Chg.At(n, 0, 0) = -Chg.At(n, 0, 0);
0503         outPar.At(n, 3, 0) = -outPar.At(n, 3, 0);
0504       }
0505     }
0506   }
0507 
0508   //------------------------------------------------------------------------------
0509 
0510   void kalmanComputeChi2(const MPlexLS& psErr,
0511                          const MPlexLV& psPar,
0512                          const MPlexQI& inChg,
0513                          const MPlexHS& msErr,
0514                          const MPlexHV& msPar,
0515                          MPlexQF& outChi2,
0516                          const int N_proc) {
0517     kalmanOperation(KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
0518   }
0519 
0520   void kalmanPropagateAndComputeChi2(const MPlexLS& psErr,
0521                                      const MPlexLV& psPar,
0522                                      const MPlexQI& inChg,
0523                                      const MPlexHS& msErr,
0524                                      const MPlexHV& msPar,
0525                                      MPlexQF& outChi2,
0526                                      MPlexLV& propPar,
0527                                      const int N_proc,
0528                                      const PropagationFlags propFlags,
0529                                      const bool propToHit) {
0530     propPar = psPar;
0531     if (propToHit) {
0532       MPlexLS propErr;
0533       MPlexQF msRad;
0534 #pragma omp simd
0535       for (int n = 0; n < NN; ++n) {
0536         msRad.At(n, 0, 0) = std::hypot(msPar.constAt(n, 0, 0), msPar.constAt(n, 1, 0));
0537       }
0538 
0539       propagateHelixToRMPlex(psErr, psPar, inChg, msRad, propErr, propPar, N_proc, propFlags);
0540 
0541       kalmanOperation(KFO_Calculate_Chi2, propErr, propPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
0542     } else {
0543       kalmanOperation(KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
0544     }
0545   }
0546 
0547   //------------------------------------------------------------------------------
0548 
0549   void kalmanOperation(const int kfOp,
0550                        const MPlexLS& psErr,
0551                        const MPlexLV& psPar,
0552                        const MPlexHS& msErr,
0553                        const MPlexHV& msPar,
0554                        MPlexLS& outErr,
0555                        MPlexLV& outPar,
0556                        MPlexQF& outChi2,
0557                        const int N_proc) {
0558 #ifdef DEBUG
0559     {
0560       dmutex_guard;
0561       printf("psPar:\n");
0562       for (int i = 0; i < 6; ++i) {
0563         printf("%8f ", psPar.constAt(0, 0, i));
0564         printf("\n");
0565       }
0566       printf("\n");
0567       printf("psErr:\n");
0568       for (int i = 0; i < 6; ++i) {
0569         for (int j = 0; j < 6; ++j)
0570           printf("%8f ", psErr.constAt(0, i, j));
0571         printf("\n");
0572       }
0573       printf("\n");
0574       printf("msPar:\n");
0575       for (int i = 0; i < 3; ++i) {
0576         printf("%8f ", msPar.constAt(0, 0, i));
0577         printf("\n");
0578       }
0579       printf("\n");
0580       printf("msErr:\n");
0581       for (int i = 0; i < 3; ++i) {
0582         for (int j = 0; j < 3; ++j)
0583           printf("%8f ", msErr.constAt(0, i, j));
0584         printf("\n");
0585       }
0586       printf("\n");
0587     }
0588 #endif
0589 
0590     // Rotate global point on tangent plane to cylinder
0591     // Tangent point is half way between hit and propagate position
0592 
0593     // Rotation matrix
0594     //  rotT00  0  rotT01
0595     //  rotT01  0 -rotT00
0596     //     0    1    0
0597     // Minimize temporaries: only two float are needed!
0598 
0599     MPlexQF rotT00;
0600     MPlexQF rotT01;
0601     for (int n = 0; n < NN; ++n) {
0602       const float r = std::hypot(msPar.constAt(n, 0, 0), msPar.constAt(n, 1, 0));
0603       rotT00.At(n, 0, 0) = -(msPar.constAt(n, 1, 0) + psPar.constAt(n, 1, 0)) / (2 * r);
0604       rotT01.At(n, 0, 0) = (msPar.constAt(n, 0, 0) + psPar.constAt(n, 0, 0)) / (2 * r);
0605     }
0606 
0607     MPlexHV res_glo;  //position residual in global coordinates
0608     SubtractFirst3(msPar, psPar, res_glo);
0609 
0610     MPlexHS resErr_glo;  //covariance sum in global position coordinates
0611     AddIntoUpperLeft3x3(psErr, msErr, resErr_glo);
0612 
0613     MPlex2V res_loc;  //position residual in local coordinates
0614     RotateResidualsOnTangentPlane(rotT00, rotT01, res_glo, res_loc);
0615     MPlex2S resErr_loc;  //covariance sum in local position coordinates
0616     MPlexHH tempHH;
0617     ProjectResErr(rotT00, rotT01, resErr_glo, tempHH);
0618     ProjectResErrTransp(rotT00, rotT01, tempHH, resErr_loc);
0619 
0620 #ifdef DEBUG
0621     {
0622       dmutex_guard;
0623       printf("resErr_loc:\n");
0624       for (int i = 0; i < 2; ++i) {
0625         for (int j = 0; j < 2; ++j)
0626           printf("%8f ", resErr_loc.At(0, i, j));
0627         printf("\n");
0628       }
0629       printf("\n");
0630     }
0631 #endif
0632 
0633     //invert the 2x2 matrix
0634     Matriplex::invertCramerSym(resErr_loc);
0635 
0636     if (kfOp & KFO_Calculate_Chi2) {
0637       Chi2Similarity(res_loc, resErr_loc, outChi2);
0638 
0639 #ifdef DEBUG
0640       {
0641         dmutex_guard;
0642         printf("resErr_loc (Inv):\n");
0643         for (int i = 0; i < 2; ++i) {
0644           for (int j = 0; j < 2; ++j)
0645             printf("%8f ", resErr_loc.At(0, i, j));
0646           printf("\n");
0647         }
0648         printf("\n");
0649         printf("chi2: %8f\n", outChi2.At(0, 0, 0));
0650       }
0651 #endif
0652     }
0653 
0654     if (kfOp & KFO_Update_Params) {
0655       MPlexLS psErrLoc = psErr;
0656       if (kfOp & KFO_Local_Cov)
0657         CovXYconstrain(rotT00, rotT01, psErr, psErrLoc);
0658 
0659       MPlexLH K;                                      // kalman gain, fixme should be L2
0660       KalmanHTG(rotT00, rotT01, resErr_loc, tempHH);  // intermediate term to get kalman gain (H^T*G)
0661       KalmanGain(psErrLoc, tempHH, K);
0662 
0663       MultResidualsAdd(K, psPar, res_loc, outPar);
0664       MPlexLL tempLL;
0665 
0666       squashPhiMPlex(outPar, N_proc);  // ensure phi is between |pi|
0667 
0668       KHMult(K, rotT00, rotT01, tempLL);
0669       KHC(tempLL, psErrLoc, outErr);
0670       outErr.subtract(psErrLoc, outErr);
0671 
0672 #ifdef DEBUG
0673       {
0674         dmutex_guard;
0675         if (kfOp & KFO_Local_Cov) {
0676           printf("psErrLoc:\n");
0677           for (int i = 0; i < 6; ++i) {
0678             for (int j = 0; j < 6; ++j)
0679               printf("% 8e ", psErrLoc.At(0, i, j));
0680             printf("\n");
0681           }
0682           printf("\n");
0683         }
0684         printf("res_glo:\n");
0685         for (int i = 0; i < 3; ++i) {
0686           printf("%8f ", res_glo.At(0, i, 0));
0687         }
0688         printf("\n");
0689         printf("res_loc:\n");
0690         for (int i = 0; i < 2; ++i) {
0691           printf("%8f ", res_loc.At(0, i, 0));
0692         }
0693         printf("\n");
0694         printf("resErr_loc (Inv):\n");
0695         for (int i = 0; i < 2; ++i) {
0696           for (int j = 0; j < 2; ++j)
0697             printf("%8f ", resErr_loc.At(0, i, j));
0698           printf("\n");
0699         }
0700         printf("\n");
0701         printf("K:\n");
0702         for (int i = 0; i < 6; ++i) {
0703           for (int j = 0; j < 3; ++j)
0704             printf("%8f ", K.At(0, i, j));
0705           printf("\n");
0706         }
0707         printf("\n");
0708         printf("outPar:\n");
0709         for (int i = 0; i < 6; ++i) {
0710           printf("%8f  ", outPar.At(0, i, 0));
0711         }
0712         printf("\n");
0713         printf("outErr:\n");
0714         for (int i = 0; i < 6; ++i) {
0715           for (int j = 0; j < 6; ++j)
0716             printf("%8f ", outErr.At(0, i, j));
0717           printf("\n");
0718         }
0719         printf("\n");
0720       }
0721 #endif
0722     }
0723   }
0724 
0725   //==============================================================================
0726   // Kalman operations - Endcap
0727   //==============================================================================
0728 
0729   void kalmanUpdateEndcap(const MPlexLS& psErr,
0730                           const MPlexLV& psPar,
0731                           const MPlexHS& msErr,
0732                           const MPlexHV& msPar,
0733                           MPlexLS& outErr,
0734                           MPlexLV& outPar,
0735                           const int N_proc) {
0736     kalmanOperationEndcap(KFO_Update_Params, psErr, psPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
0737   }
0738 
0739   void kalmanPropagateAndUpdateEndcap(const MPlexLS& psErr,
0740                                       const MPlexLV& psPar,
0741                                       MPlexQI& Chg,
0742                                       const MPlexHS& msErr,
0743                                       const MPlexHV& msPar,
0744                                       MPlexLS& outErr,
0745                                       MPlexLV& outPar,
0746                                       const int N_proc,
0747                                       const PropagationFlags propFlags,
0748                                       const bool propToHit) {
0749     if (propToHit) {
0750       MPlexLS propErr;
0751       MPlexLV propPar;
0752       MPlexQF msZ;
0753 #pragma omp simd
0754       for (int n = 0; n < NN; ++n) {
0755         msZ.At(n, 0, 0) = msPar.constAt(n, 2, 0);
0756       }
0757 
0758       propagateHelixToZMPlex(psErr, psPar, Chg, msZ, propErr, propPar, N_proc, propFlags);
0759 
0760       kalmanOperationEndcap(KFO_Update_Params, propErr, propPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
0761     } else {
0762       kalmanOperationEndcap(KFO_Update_Params, psErr, psPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
0763     }
0764     for (int n = 0; n < NN; ++n) {
0765       if (outPar.At(n, 3, 0) < 0) {
0766         Chg.At(n, 0, 0) = -Chg.At(n, 0, 0);
0767         outPar.At(n, 3, 0) = -outPar.At(n, 3, 0);
0768       }
0769     }
0770   }
0771 
0772   //------------------------------------------------------------------------------
0773 
0774   void kalmanComputeChi2Endcap(const MPlexLS& psErr,
0775                                const MPlexLV& psPar,
0776                                const MPlexQI& inChg,
0777                                const MPlexHS& msErr,
0778                                const MPlexHV& msPar,
0779                                MPlexQF& outChi2,
0780                                const int N_proc) {
0781     kalmanOperationEndcap(KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
0782   }
0783 
0784   void kalmanPropagateAndComputeChi2Endcap(const MPlexLS& psErr,
0785                                            const MPlexLV& psPar,
0786                                            const MPlexQI& inChg,
0787                                            const MPlexHS& msErr,
0788                                            const MPlexHV& msPar,
0789                                            MPlexQF& outChi2,
0790                                            MPlexLV& propPar,
0791                                            const int N_proc,
0792                                            const PropagationFlags propFlags,
0793                                            const bool propToHit) {
0794     propPar = psPar;
0795     if (propToHit) {
0796       MPlexLS propErr;
0797       MPlexQF msZ;
0798 #pragma omp simd
0799       for (int n = 0; n < NN; ++n) {
0800         msZ.At(n, 0, 0) = msPar.constAt(n, 2, 0);
0801       }
0802 
0803       propagateHelixToZMPlex(psErr, psPar, inChg, msZ, propErr, propPar, N_proc, propFlags);
0804 
0805       kalmanOperationEndcap(KFO_Calculate_Chi2, propErr, propPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
0806     } else {
0807       kalmanOperationEndcap(KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
0808     }
0809   }
0810 
0811   //------------------------------------------------------------------------------
0812 
0813   void kalmanOperationEndcap(const int kfOp,
0814                              const MPlexLS& psErr,
0815                              const MPlexLV& psPar,
0816                              const MPlexHS& msErr,
0817                              const MPlexHV& msPar,
0818                              MPlexLS& outErr,
0819                              MPlexLV& outPar,
0820                              MPlexQF& outChi2,
0821                              const int N_proc) {
0822 #ifdef DEBUG
0823     {
0824       dmutex_guard;
0825       printf("updateParametersEndcapMPlex\n");
0826       printf("psPar:\n");
0827       for (int i = 0; i < 6; ++i) {
0828         printf("%8f ", psPar.constAt(0, 0, i));
0829         printf("\n");
0830       }
0831       printf("\n");
0832       printf("msPar:\n");
0833       for (int i = 0; i < 3; ++i) {
0834         printf("%8f ", msPar.constAt(0, 0, i));
0835         printf("\n");
0836       }
0837       printf("\n");
0838       printf("psErr:\n");
0839       for (int i = 0; i < 6; ++i) {
0840         for (int j = 0; j < 6; ++j)
0841           printf("%8f ", psErr.constAt(0, i, j));
0842         printf("\n");
0843       }
0844       printf("\n");
0845       printf("msErr:\n");
0846       for (int i = 0; i < 3; ++i) {
0847         for (int j = 0; j < 3; ++j)
0848           printf("%8f ", msErr.constAt(0, i, j));
0849         printf("\n");
0850       }
0851       printf("\n");
0852     }
0853 #endif
0854 
0855     MPlex2V res;
0856     SubtractFirst2(msPar, psPar, res);
0857 
0858     MPlex2S resErr;
0859     AddIntoUpperLeft2x2(psErr, msErr, resErr);
0860 
0861 #ifdef DEBUG
0862     {
0863       dmutex_guard;
0864       printf("resErr:\n");
0865       for (int i = 0; i < 2; ++i) {
0866         for (int j = 0; j < 2; ++j)
0867           printf("%8f ", resErr.At(0, i, j));
0868         printf("\n");
0869       }
0870       printf("\n");
0871     }
0872 #endif
0873 
0874     //invert the 2x2 matrix
0875     Matriplex::invertCramerSym(resErr);
0876 
0877     if (kfOp & KFO_Calculate_Chi2) {
0878       Chi2Similarity(res, resErr, outChi2);
0879 
0880 #ifdef DEBUG
0881       {
0882         dmutex_guard;
0883         printf("resErr_loc (Inv):\n");
0884         for (int i = 0; i < 2; ++i) {
0885           for (int j = 0; j < 2; ++j)
0886             printf("%8f ", resErr.At(0, i, j));
0887           printf("\n");
0888         }
0889         printf("\n");
0890         printf("chi2: %8f\n", outChi2.At(0, 0, 0));
0891       }
0892 #endif
0893     }
0894 
0895     if (kfOp & KFO_Update_Params) {
0896       MPlexL2 K;
0897       KalmanGain(psErr, resErr, K);
0898 
0899       MultResidualsAdd(K, psPar, res, outPar);
0900 
0901       squashPhiMPlex(outPar, N_proc);  // ensure phi is between |pi|
0902 
0903       KHC(K, psErr, outErr);
0904 
0905 #ifdef DEBUG
0906       {
0907         dmutex_guard;
0908         printf("outErr before subtract:\n");
0909         for (int i = 0; i < 6; ++i) {
0910           for (int j = 0; j < 6; ++j)
0911             printf("%8f ", outErr.At(0, i, j));
0912           printf("\n");
0913         }
0914         printf("\n");
0915       }
0916 #endif
0917 
0918       outErr.subtract(psErr, outErr);
0919 
0920 #ifdef DEBUG
0921       {
0922         dmutex_guard;
0923         printf("res:\n");
0924         for (int i = 0; i < 2; ++i) {
0925           printf("%8f ", res.At(0, i, 0));
0926         }
0927         printf("\n");
0928         printf("resErr (Inv):\n");
0929         for (int i = 0; i < 2; ++i) {
0930           for (int j = 0; j < 2; ++j)
0931             printf("%8f ", resErr.At(0, i, j));
0932           printf("\n");
0933         }
0934         printf("\n");
0935         printf("K:\n");
0936         for (int i = 0; i < 6; ++i) {
0937           for (int j = 0; j < 2; ++j)
0938             printf("%8f ", K.At(0, i, j));
0939           printf("\n");
0940         }
0941         printf("\n");
0942         printf("outPar:\n");
0943         for (int i = 0; i < 6; ++i) {
0944           printf("%8f  ", outPar.At(0, i, 0));
0945         }
0946         printf("\n");
0947         printf("outErr:\n");
0948         for (int i = 0; i < 6; ++i) {
0949           for (int j = 0; j < 6; ++j)
0950             printf("%8f ", outErr.At(0, i, j));
0951           printf("\n");
0952         }
0953         printf("\n");
0954       }
0955 #endif
0956     }
0957   }
0958 
0959 }  // end namespace mkfit