PixelTrackFitting/test/testEigenGPUNoFit.cu

0001 #include <iostream>
0002
0003 #include <Eigen/Core>
0004 #include <Eigen/Eigenvalues>
0005
0006 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0007 #include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h"
0008 #include "test_common.h"
0009
0010 using namespace Eigen;
0011
0012 using Matrix5d = Matrix<double, 5, 5>;
0013
0014 __host__ __device__ void eigenValues(Matrix3d *m, Eigen::SelfAdjointEigenSolver<Matrix3d>::RealVectorType *ret) {
0015 #if TEST_DEBUG
0016   printf("Matrix(0,0): %f\n", (*m)(0, 0));
0017   printf("Matrix(1,1): %f\n", (*m)(1, 1));
0018   printf("Matrix(2,2): %f\n", (*m)(2, 2));
0019 #endif
0020   SelfAdjointEigenSolver<Matrix3d> es;
0021   es.computeDirect(*m);
0022   (*ret) = es.eigenvalues();
0023   return;
0024 }
0025
0026 __global__ void kernel(Matrix3d *m, Eigen::SelfAdjointEigenSolver<Matrix3d>::RealVectorType *ret) {
0027   eigenValues(m, ret);
0028 }
0029
0030 __global__ void kernelInverse3x3(Matrix3d *in, Matrix3d *out) { (*out) = in->inverse(); }
0031
0032 __global__ void kernelInverse4x4(Matrix4d *in, Matrix4d *out) { (*out) = in->inverse(); }
0033
0034 __global__ void kernelInverse5x5(Matrix5d *in, Matrix5d *out) { (*out) = in->inverse(); }
0035
0036 template <typename M1, typename M2, typename M3>
0037 __global__ void kernelMultiply(M1 *J, M2 *C, M3 *result) {
0038 //  Map<M3> res(result->data());
0039 #if TEST_DEBUG
0040   printf("*** GPU IN ***\n");
0041 #endif
0042   printIt(J);
0043   printIt(C);
0044   //  res.noalias() = (*J) * (*C);
0045   //  printIt(&res);
0046   (*result) = (*J) * (*C);
0047 #if TEST_DEBUG
0048   printf("*** GPU OUT ***\n");
0049 #endif
0050   return;
0051 }
0052
0053 template <int row1, int col1, int row2, int col2>
0054 void testMultiply() {
0055   std::cout << "TEST MULTIPLY" << std::endl;
0056   std::cout << "Product of type " << row1 << "x" << col1 << " * " << row2 << "x" << col2 << std::endl;
0057   Eigen::Matrix<double, row1, col1> J;
0058   fillMatrix(J);
0059   Eigen::Matrix<double, row2, col2> C;
0060   fillMatrix(C);
0061   Eigen::Matrix<double, row1, col2> multiply_result = J * C;
0062 #if TEST_DEBUG
0063   std::cout << "Input J:" << std::endl;
0064   printIt(&J);
0065   std::cout << "Input C:" << std::endl;
0066   printIt(&C);
0067   std::cout << "Output:" << std::endl;
0068   printIt(&multiply_result);
0069 #endif
0070   // GPU
0071   Eigen::Matrix<double, row1, col1> *JGPU = nullptr;
0072   Eigen::Matrix<double, row2, col2> *CGPU = nullptr;
0073   Eigen::Matrix<double, row1, col2> *multiply_resultGPU = nullptr;
0074   Eigen::Matrix<double, row1, col2> *multiply_resultGPUret = new Eigen::Matrix<double, row1, col2>();
0075
0076   cudaCheck(cudaMalloc((void **)&JGPU, sizeof(Eigen::Matrix<double, row1, col1>)));
0077   cudaCheck(cudaMalloc((void **)&CGPU, sizeof(Eigen::Matrix<double, row2, col2>)));
0078   cudaCheck(cudaMalloc((void **)&multiply_resultGPU, sizeof(Eigen::Matrix<double, row1, col2>)));
0079   cudaCheck(cudaMemcpy(JGPU, &J, sizeof(Eigen::Matrix<double, row1, col1>), cudaMemcpyHostToDevice));
0080   cudaCheck(cudaMemcpy(CGPU, &C, sizeof(Eigen::Matrix<double, row2, col2>), cudaMemcpyHostToDevice));
0081   cudaCheck(cudaMemcpy(
0082       multiply_resultGPU, &multiply_result, sizeof(Eigen::Matrix<double, row1, col2>), cudaMemcpyHostToDevice));
0083
0084   kernelMultiply<<<1, 1>>>(JGPU, CGPU, multiply_resultGPU);
0085   cudaDeviceSynchronize();
0086
0087   cudaCheck(cudaMemcpy(
0088       multiply_resultGPUret, multiply_resultGPU, sizeof(Eigen::Matrix<double, row1, col2>), cudaMemcpyDeviceToHost));
0089   printIt(multiply_resultGPUret);
0090   assert(isEqualFuzzy(multiply_result, (*multiply_resultGPUret)));
0091 }
0092
0093 void testInverse3x3() {
0094   std::cout << "TEST INVERSE 3x3" << std::endl;
0095   Matrix3d m;
0096   fillMatrix(m);
0097   m += m.transpose().eval();
0098
0099   Matrix3d m_inv = m.inverse();
0100   Matrix3d *mGPU = nullptr;
0101   Matrix3d *mGPUret = nullptr;
0102   Matrix3d *mCPUret = new Matrix3d();
0103
0104 #if TEST_DEBUG
0105   std::cout << "Here is the matrix m:" << std::endl << m << std::endl;
0106   std::cout << "Its inverse is:" << std::endl << m.inverse() << std::endl;
0107 #endif
0108   cudaCheck(cudaMalloc((void **)&mGPU, sizeof(Matrix3d)));
0109   cudaCheck(cudaMalloc((void **)&mGPUret, sizeof(Matrix3d)));
0110   cudaCheck(cudaMemcpy(mGPU, &m, sizeof(Matrix3d), cudaMemcpyHostToDevice));
0111
0112   kernelInverse3x3<<<1, 1>>>(mGPU, mGPUret);
0113   cudaDeviceSynchronize();
0114
0115   cudaCheck(cudaMemcpy(mCPUret, mGPUret, sizeof(Matrix3d), cudaMemcpyDeviceToHost));
0116 #if TEST_DEBUG
0117   std::cout << "Its GPU inverse is:" << std::endl << (*mCPUret) << std::endl;
0118 #endif
0119   assert(isEqualFuzzy(m_inv, *mCPUret));
0120 }
0121
0122 void testInverse4x4() {
0123   std::cout << "TEST INVERSE 4x4" << std::endl;
0124   Matrix4d m;
0125   fillMatrix(m);
0126   m += m.transpose().eval();
0127
0128   Matrix4d m_inv = m.inverse();
0129   Matrix4d *mGPU = nullptr;
0130   Matrix4d *mGPUret = nullptr;
0131   Matrix4d *mCPUret = new Matrix4d();
0132
0133 #if TEST_DEBUG
0134   std::cout << "Here is the matrix m:" << std::endl << m << std::endl;
0135   std::cout << "Its inverse is:" << std::endl << m.inverse() << std::endl;
0136 #endif
0137   cudaCheck(cudaMalloc((void **)&mGPU, sizeof(Matrix4d)));
0138   cudaCheck(cudaMalloc((void **)&mGPUret, sizeof(Matrix4d)));
0139   cudaCheck(cudaMemcpy(mGPU, &m, sizeof(Matrix4d), cudaMemcpyHostToDevice));
0140
0141   kernelInverse4x4<<<1, 1>>>(mGPU, mGPUret);
0142   cudaDeviceSynchronize();
0143
0144   cudaCheck(cudaMemcpy(mCPUret, mGPUret, sizeof(Matrix4d), cudaMemcpyDeviceToHost));
0145 #if TEST_DEBUG
0146   std::cout << "Its GPU inverse is:" << std::endl << (*mCPUret) << std::endl;
0147 #endif
0148   assert(isEqualFuzzy(m_inv, *mCPUret));
0149 }
0150
0151 void testInverse5x5() {
0152   std::cout << "TEST INVERSE 5x5" << std::endl;
0153   Matrix5d m;
0154   fillMatrix(m);
0155   m += m.transpose().eval();
0156
0157   Matrix5d m_inv = m.inverse();
0158   Matrix5d *mGPU = nullptr;
0159   Matrix5d *mGPUret = nullptr;
0160   Matrix5d *mCPUret = new Matrix5d();
0161
0162 #if TEST_DEBUG
0163   std::cout << "Here is the matrix m:" << std::endl << m << std::endl;
0164   std::cout << "Its inverse is:" << std::endl << m.inverse() << std::endl;
0165 #endif
0166   cudaCheck(cudaMalloc((void **)&mGPU, sizeof(Matrix5d)));
0167   cudaCheck(cudaMalloc((void **)&mGPUret, sizeof(Matrix5d)));
0168   cudaCheck(cudaMemcpy(mGPU, &m, sizeof(Matrix5d), cudaMemcpyHostToDevice));
0169
0170   kernelInverse5x5<<<1, 1>>>(mGPU, mGPUret);
0171   cudaDeviceSynchronize();
0172
0173   cudaCheck(cudaMemcpy(mCPUret, mGPUret, sizeof(Matrix5d), cudaMemcpyDeviceToHost));
0174 #if TEST_DEBUG
0175   std::cout << "Its GPU inverse is:" << std::endl << (*mCPUret) << std::endl;
0176 #endif
0177   assert(isEqualFuzzy(m_inv, *mCPUret));
0178 }
0179
0180 void testEigenvalues() {
0181   std::cout << "TEST EIGENVALUES" << std::endl;
0182   Matrix3d m;
0183   fillMatrix(m);
0184   m += m.transpose().eval();
0185
0186   Matrix3d *m_gpu = nullptr;
0187   Matrix3d *mgpudebug = new Matrix3d();
0188   Eigen::SelfAdjointEigenSolver<Matrix3d>::RealVectorType *ret =
0189       new Eigen::SelfAdjointEigenSolver<Matrix3d>::RealVectorType;
0190   Eigen::SelfAdjointEigenSolver<Matrix3d>::RealVectorType *ret1 =
0191       new Eigen::SelfAdjointEigenSolver<Matrix3d>::RealVectorType;
0192   Eigen::SelfAdjointEigenSolver<Matrix3d>::RealVectorType *ret_gpu = nullptr;
0193   eigenValues(&m, ret);
0194 #if TEST_DEBUG
0195   std::cout << "Generated Matrix M 3x3:\n" << m << std::endl;
0196   std::cout << "The eigenvalues of M are:" << std::endl << (*ret) << std::endl;
0197   std::cout << "*************************\n\n" << std::endl;
0198 #endif
0199   cudaCheck(cudaMalloc((void **)&m_gpu, sizeof(Matrix3d)));
0200   cudaCheck(cudaMalloc((void **)&ret_gpu, sizeof(Eigen::SelfAdjointEigenSolver<Matrix3d>::RealVectorType)));
0201   cudaCheck(cudaMemcpy(m_gpu, &m, sizeof(Matrix3d), cudaMemcpyHostToDevice));
0202
0203   kernel<<<1, 1>>>(m_gpu, ret_gpu);
0204   cudaDeviceSynchronize();
0205
0206   cudaCheck(cudaMemcpy(mgpudebug, m_gpu, sizeof(Matrix3d), cudaMemcpyDeviceToHost));
0207   cudaCheck(cudaMemcpy(
0208       ret1, ret_gpu, sizeof(Eigen::SelfAdjointEigenSolver<Matrix3d>::RealVectorType), cudaMemcpyDeviceToHost));
0209 #if TEST_DEBUG
0210   std::cout << "GPU Generated Matrix M 3x3:\n" << (*mgpudebug) << std::endl;
0211   std::cout << "GPU The eigenvalues of M are:" << std::endl << (*ret1) << std::endl;
0212   std::cout << "*************************\n\n" << std::endl;
0213 #endif
0214   assert(isEqualFuzzy(*ret, *ret1));
0215 }
0216
0217 int main(int argc, char *argv[]) {
0218   cms::cudatest::requireDevices();
0219
0220   testEigenvalues();
0221   testInverse3x3();
0222   testInverse4x4();
0223   testInverse5x5();
0224
0225   testMultiply<1, 2, 2, 1>();
0226   testMultiply<1, 2, 2, 2>();
0227   testMultiply<1, 2, 2, 3>();
0228   testMultiply<1, 2, 2, 4>();
0229   testMultiply<1, 2, 2, 5>();
0230   testMultiply<2, 1, 1, 2>();
0231   testMultiply<2, 1, 1, 3>();
0232   testMultiply<2, 1, 1, 4>();
0233   testMultiply<2, 1, 1, 5>();
0234   testMultiply<2, 2, 2, 2>();
0235   testMultiply<2, 3, 3, 1>();
0236   testMultiply<2, 3, 3, 2>();
0237   testMultiply<2, 3, 3, 4>();
0238   testMultiply<2, 3, 3, 5>();
0239   testMultiply<3, 2, 2, 3>();
0240   testMultiply<2, 3, 3, 3>();  // DOES NOT COMPILE W/O PATCHING EIGEN
0241   testMultiply<3, 3, 3, 3>();
0242   testMultiply<8, 8, 8, 8>();
0243   testMultiply<3, 4, 4, 3>();
0244   testMultiply<2, 4, 4, 2>();
0245   testMultiply<3, 4, 4, 2>();  // DOES NOT COMPILE W/O PATCHING EIGEN
0246
0247   return 0;
0248 }