Warning, /RecoTracker/PixelTrackFitting/test/testEigenGPUNoFit.cu is written in an unsupported language. File is not indexed.
0001 #include <iostream>
0002
0003 #include <Eigen/Core>
0004 #include <Eigen/Eigenvalues>
0005
0006 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0007 #include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h"
0008 #include "test_common.h"
0009
0010 using namespace Eigen;
0011
0012 using Matrix5d = Matrix<double, 5, 5>;
0013
0014 __host__ __device__ void eigenValues(Matrix3d *m, Eigen::SelfAdjointEigenSolver<Matrix3d>::RealVectorType *ret) {
0015 #if TEST_DEBUG
0016 printf("Matrix(0,0): %f\n", (*m)(0, 0));
0017 printf("Matrix(1,1): %f\n", (*m)(1, 1));
0018 printf("Matrix(2,2): %f\n", (*m)(2, 2));
0019 #endif
0020 SelfAdjointEigenSolver<Matrix3d> es;
0021 es.computeDirect(*m);
0022 (*ret) = es.eigenvalues();
0023 return;
0024 }
0025
0026 __global__ void kernel(Matrix3d *m, Eigen::SelfAdjointEigenSolver<Matrix3d>::RealVectorType *ret) {
0027 eigenValues(m, ret);
0028 }
0029
0030 __global__ void kernelInverse3x3(Matrix3d *in, Matrix3d *out) { (*out) = in->inverse(); }
0031
0032 __global__ void kernelInverse4x4(Matrix4d *in, Matrix4d *out) { (*out) = in->inverse(); }
0033
0034 __global__ void kernelInverse5x5(Matrix5d *in, Matrix5d *out) { (*out) = in->inverse(); }
0035
0036 template <typename M1, typename M2, typename M3>
0037 __global__ void kernelMultiply(M1 *J, M2 *C, M3 *result) {
0038 // Map<M3> res(result->data());
0039 #if TEST_DEBUG
0040 printf("*** GPU IN ***\n");
0041 #endif
0042 printIt(J);
0043 printIt(C);
0044 // res.noalias() = (*J) * (*C);
0045 // printIt(&res);
0046 (*result) = (*J) * (*C);
0047 #if TEST_DEBUG
0048 printf("*** GPU OUT ***\n");
0049 #endif
0050 return;
0051 }
0052
0053 template <int row1, int col1, int row2, int col2>
0054 void testMultiply() {
0055 std::cout << "TEST MULTIPLY" << std::endl;
0056 std::cout << "Product of type " << row1 << "x" << col1 << " * " << row2 << "x" << col2 << std::endl;
0057 Eigen::Matrix<double, row1, col1> J;
0058 fillMatrix(J);
0059 Eigen::Matrix<double, row2, col2> C;
0060 fillMatrix(C);
0061 Eigen::Matrix<double, row1, col2> multiply_result = J * C;
0062 #if TEST_DEBUG
0063 std::cout << "Input J:" << std::endl;
0064 printIt(&J);
0065 std::cout << "Input C:" << std::endl;
0066 printIt(&C);
0067 std::cout << "Output:" << std::endl;
0068 printIt(&multiply_result);
0069 #endif
0070 // GPU
0071 Eigen::Matrix<double, row1, col1> *JGPU = nullptr;
0072 Eigen::Matrix<double, row2, col2> *CGPU = nullptr;
0073 Eigen::Matrix<double, row1, col2> *multiply_resultGPU = nullptr;
0074 Eigen::Matrix<double, row1, col2> *multiply_resultGPUret = new Eigen::Matrix<double, row1, col2>();
0075
0076 cudaCheck(cudaMalloc((void **)&JGPU, sizeof(Eigen::Matrix<double, row1, col1>)));
0077 cudaCheck(cudaMalloc((void **)&CGPU, sizeof(Eigen::Matrix<double, row2, col2>)));
0078 cudaCheck(cudaMalloc((void **)&multiply_resultGPU, sizeof(Eigen::Matrix<double, row1, col2>)));
0079 cudaCheck(cudaMemcpy(JGPU, &J, sizeof(Eigen::Matrix<double, row1, col1>), cudaMemcpyHostToDevice));
0080 cudaCheck(cudaMemcpy(CGPU, &C, sizeof(Eigen::Matrix<double, row2, col2>), cudaMemcpyHostToDevice));
0081 cudaCheck(cudaMemcpy(
0082 multiply_resultGPU, &multiply_result, sizeof(Eigen::Matrix<double, row1, col2>), cudaMemcpyHostToDevice));
0083
0084 kernelMultiply<<<1, 1>>>(JGPU, CGPU, multiply_resultGPU);
0085 cudaDeviceSynchronize();
0086
0087 cudaCheck(cudaMemcpy(
0088 multiply_resultGPUret, multiply_resultGPU, sizeof(Eigen::Matrix<double, row1, col2>), cudaMemcpyDeviceToHost));
0089 printIt(multiply_resultGPUret);
0090 assert(isEqualFuzzy(multiply_result, (*multiply_resultGPUret)));
0091 }
0092
0093 void testInverse3x3() {
0094 std::cout << "TEST INVERSE 3x3" << std::endl;
0095 Matrix3d m;
0096 fillMatrix(m);
0097 m += m.transpose().eval();
0098
0099 Matrix3d m_inv = m.inverse();
0100 Matrix3d *mGPU = nullptr;
0101 Matrix3d *mGPUret = nullptr;
0102 Matrix3d *mCPUret = new Matrix3d();
0103
0104 #if TEST_DEBUG
0105 std::cout << "Here is the matrix m:" << std::endl << m << std::endl;
0106 std::cout << "Its inverse is:" << std::endl << m.inverse() << std::endl;
0107 #endif
0108 cudaCheck(cudaMalloc((void **)&mGPU, sizeof(Matrix3d)));
0109 cudaCheck(cudaMalloc((void **)&mGPUret, sizeof(Matrix3d)));
0110 cudaCheck(cudaMemcpy(mGPU, &m, sizeof(Matrix3d), cudaMemcpyHostToDevice));
0111
0112 kernelInverse3x3<<<1, 1>>>(mGPU, mGPUret);
0113 cudaDeviceSynchronize();
0114
0115 cudaCheck(cudaMemcpy(mCPUret, mGPUret, sizeof(Matrix3d), cudaMemcpyDeviceToHost));
0116 #if TEST_DEBUG
0117 std::cout << "Its GPU inverse is:" << std::endl << (*mCPUret) << std::endl;
0118 #endif
0119 assert(isEqualFuzzy(m_inv, *mCPUret));
0120 }
0121
0122 void testInverse4x4() {
0123 std::cout << "TEST INVERSE 4x4" << std::endl;
0124 Matrix4d m;
0125 fillMatrix(m);
0126 m += m.transpose().eval();
0127
0128 Matrix4d m_inv = m.inverse();
0129 Matrix4d *mGPU = nullptr;
0130 Matrix4d *mGPUret = nullptr;
0131 Matrix4d *mCPUret = new Matrix4d();
0132
0133 #if TEST_DEBUG
0134 std::cout << "Here is the matrix m:" << std::endl << m << std::endl;
0135 std::cout << "Its inverse is:" << std::endl << m.inverse() << std::endl;
0136 #endif
0137 cudaCheck(cudaMalloc((void **)&mGPU, sizeof(Matrix4d)));
0138 cudaCheck(cudaMalloc((void **)&mGPUret, sizeof(Matrix4d)));
0139 cudaCheck(cudaMemcpy(mGPU, &m, sizeof(Matrix4d), cudaMemcpyHostToDevice));
0140
0141 kernelInverse4x4<<<1, 1>>>(mGPU, mGPUret);
0142 cudaDeviceSynchronize();
0143
0144 cudaCheck(cudaMemcpy(mCPUret, mGPUret, sizeof(Matrix4d), cudaMemcpyDeviceToHost));
0145 #if TEST_DEBUG
0146 std::cout << "Its GPU inverse is:" << std::endl << (*mCPUret) << std::endl;
0147 #endif
0148 assert(isEqualFuzzy(m_inv, *mCPUret));
0149 }
0150
0151 void testInverse5x5() {
0152 std::cout << "TEST INVERSE 5x5" << std::endl;
0153 Matrix5d m;
0154 fillMatrix(m);
0155 m += m.transpose().eval();
0156
0157 Matrix5d m_inv = m.inverse();
0158 Matrix5d *mGPU = nullptr;
0159 Matrix5d *mGPUret = nullptr;
0160 Matrix5d *mCPUret = new Matrix5d();
0161
0162 #if TEST_DEBUG
0163 std::cout << "Here is the matrix m:" << std::endl << m << std::endl;
0164 std::cout << "Its inverse is:" << std::endl << m.inverse() << std::endl;
0165 #endif
0166 cudaCheck(cudaMalloc((void **)&mGPU, sizeof(Matrix5d)));
0167 cudaCheck(cudaMalloc((void **)&mGPUret, sizeof(Matrix5d)));
0168 cudaCheck(cudaMemcpy(mGPU, &m, sizeof(Matrix5d), cudaMemcpyHostToDevice));
0169
0170 kernelInverse5x5<<<1, 1>>>(mGPU, mGPUret);
0171 cudaDeviceSynchronize();
0172
0173 cudaCheck(cudaMemcpy(mCPUret, mGPUret, sizeof(Matrix5d), cudaMemcpyDeviceToHost));
0174 #if TEST_DEBUG
0175 std::cout << "Its GPU inverse is:" << std::endl << (*mCPUret) << std::endl;
0176 #endif
0177 assert(isEqualFuzzy(m_inv, *mCPUret));
0178 }
0179
0180 void testEigenvalues() {
0181 std::cout << "TEST EIGENVALUES" << std::endl;
0182 Matrix3d m;
0183 fillMatrix(m);
0184 m += m.transpose().eval();
0185
0186 Matrix3d *m_gpu = nullptr;
0187 Matrix3d *mgpudebug = new Matrix3d();
0188 Eigen::SelfAdjointEigenSolver<Matrix3d>::RealVectorType *ret =
0189 new Eigen::SelfAdjointEigenSolver<Matrix3d>::RealVectorType;
0190 Eigen::SelfAdjointEigenSolver<Matrix3d>::RealVectorType *ret1 =
0191 new Eigen::SelfAdjointEigenSolver<Matrix3d>::RealVectorType;
0192 Eigen::SelfAdjointEigenSolver<Matrix3d>::RealVectorType *ret_gpu = nullptr;
0193 eigenValues(&m, ret);
0194 #if TEST_DEBUG
0195 std::cout << "Generated Matrix M 3x3:\n" << m << std::endl;
0196 std::cout << "The eigenvalues of M are:" << std::endl << (*ret) << std::endl;
0197 std::cout << "*************************\n\n" << std::endl;
0198 #endif
0199 cudaCheck(cudaMalloc((void **)&m_gpu, sizeof(Matrix3d)));
0200 cudaCheck(cudaMalloc((void **)&ret_gpu, sizeof(Eigen::SelfAdjointEigenSolver<Matrix3d>::RealVectorType)));
0201 cudaCheck(cudaMemcpy(m_gpu, &m, sizeof(Matrix3d), cudaMemcpyHostToDevice));
0202
0203 kernel<<<1, 1>>>(m_gpu, ret_gpu);
0204 cudaDeviceSynchronize();
0205
0206 cudaCheck(cudaMemcpy(mgpudebug, m_gpu, sizeof(Matrix3d), cudaMemcpyDeviceToHost));
0207 cudaCheck(cudaMemcpy(
0208 ret1, ret_gpu, sizeof(Eigen::SelfAdjointEigenSolver<Matrix3d>::RealVectorType), cudaMemcpyDeviceToHost));
0209 #if TEST_DEBUG
0210 std::cout << "GPU Generated Matrix M 3x3:\n" << (*mgpudebug) << std::endl;
0211 std::cout << "GPU The eigenvalues of M are:" << std::endl << (*ret1) << std::endl;
0212 std::cout << "*************************\n\n" << std::endl;
0213 #endif
0214 assert(isEqualFuzzy(*ret, *ret1));
0215 }
0216
0217 int main(int argc, char *argv[]) {
0218 cms::cudatest::requireDevices();
0219
0220 testEigenvalues();
0221 testInverse3x3();
0222 testInverse4x4();
0223 testInverse5x5();
0224
0225 testMultiply<1, 2, 2, 1>();
0226 testMultiply<1, 2, 2, 2>();
0227 testMultiply<1, 2, 2, 3>();
0228 testMultiply<1, 2, 2, 4>();
0229 testMultiply<1, 2, 2, 5>();
0230 testMultiply<2, 1, 1, 2>();
0231 testMultiply<2, 1, 1, 3>();
0232 testMultiply<2, 1, 1, 4>();
0233 testMultiply<2, 1, 1, 5>();
0234 testMultiply<2, 2, 2, 2>();
0235 testMultiply<2, 3, 3, 1>();
0236 testMultiply<2, 3, 3, 2>();
0237 testMultiply<2, 3, 3, 4>();
0238 testMultiply<2, 3, 3, 5>();
0239 testMultiply<3, 2, 2, 3>();
0240 testMultiply<2, 3, 3, 3>(); // DOES NOT COMPILE W/O PATCHING EIGEN
0241 testMultiply<3, 3, 3, 3>();
0242 testMultiply<8, 8, 8, 8>();
0243 testMultiply<3, 4, 4, 3>();
0244 testMultiply<2, 4, 4, 2>();
0245 testMultiply<3, 4, 4, 2>(); // DOES NOT COMPILE W/O PATCHING EIGEN
0246
0247 return 0;
0248 }