Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2021-02-14 12:49:25

0001 #include <cassert>
0002 #include <iostream>
0003 #include <limits>
0004 #include <string>
0005 #include <utility>
0006 
0007 #include <cuda_runtime_api.h>
0008 
0009 #include "catch.hpp"
0010 
0011 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0012 #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
0013 #include "FWCore/Utilities/interface/Exception.h"
0014 #include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
0015 
0016 namespace {
0017   CUDAService makeCUDAService(edm::ParameterSet ps) {
0018     auto desc = edm::ConfigurationDescriptions("Service", "CUDAService");
0019     CUDAService::fillDescriptions(desc);
0020     desc.validate(ps, "CUDAService");
0021     return CUDAService(ps);
0022   }
0023 }  // namespace
0024 
0025 TEST_CASE("Tests of CUDAService", "[CUDAService]") {
0026   // Test setup: check if a simple CUDA runtime API call fails:
0027   // if so, skip the test with the CUDAService enabled
0028   int deviceCount = 0;
0029   auto ret = cudaGetDeviceCount(&deviceCount);
0030 
0031   if (ret != cudaSuccess) {
0032     WARN("Unable to query the CUDA capable devices from the CUDA runtime API: ("
0033          << ret << ") " << cudaGetErrorString(ret) << ". Running only tests not requiring devices.");
0034   }
0035 
0036   SECTION("CUDAService enabled") {
0037     edm::ParameterSet ps;
0038     ps.addUntrackedParameter("enabled", true);
0039     SECTION("Enabled only if there are CUDA capable GPUs") {
0040       auto cs = makeCUDAService(ps);
0041       if (deviceCount <= 0) {
0042         REQUIRE(cs.enabled() == false);
0043         WARN("CUDAService is disabled as there are no CUDA GPU devices");
0044       } else {
0045         REQUIRE(cs.enabled() == true);
0046         INFO("CUDAService is enabled");
0047       }
0048     }
0049 
0050     if (deviceCount <= 0) {
0051       return;
0052     }
0053 
0054     auto cs = makeCUDAService(ps);
0055 
0056     SECTION("CUDA Queries") {
0057       int driverVersion = 0, runtimeVersion = 0;
0058       ret = cudaDriverGetVersion(&driverVersion);
0059       if (ret != cudaSuccess) {
0060         FAIL("Unable to query the CUDA driver version from the CUDA runtime API: (" << ret << ") "
0061                                                                                     << cudaGetErrorString(ret));
0062       }
0063       ret = cudaRuntimeGetVersion(&runtimeVersion);
0064       if (ret != cudaSuccess) {
0065         FAIL("Unable to query the CUDA runtime API version: (" << ret << ") " << cudaGetErrorString(ret));
0066       }
0067 
0068       WARN("CUDA Driver Version / Runtime Version: " << driverVersion / 1000 << "." << (driverVersion % 100) / 10
0069                                                      << " / " << runtimeVersion / 1000 << "."
0070                                                      << (runtimeVersion % 100) / 10);
0071 
0072       // Test that the number of devices found by the service
0073       // is the same as detected by the CUDA runtime API
0074       REQUIRE(cs.numberOfDevices() == deviceCount);
0075       WARN("Detected " << cs.numberOfDevices() << " CUDA Capable device(s)");
0076 
0077       // Test that the compute capabilities of each device
0078       // are the same as detected by the CUDA runtime API
0079       for (int i = 0; i < deviceCount; ++i) {
0080         cudaDeviceProp deviceProp;
0081         ret = cudaGetDeviceProperties(&deviceProp, i);
0082         if (ret != cudaSuccess) {
0083           FAIL("Unable to query the CUDA properties for device " << i << " from the CUDA runtime API: (" << ret << ") "
0084                                                                  << cudaGetErrorString(ret));
0085         }
0086 
0087         REQUIRE(deviceProp.major == cs.computeCapability(i).first);
0088         REQUIRE(deviceProp.minor == cs.computeCapability(i).second);
0089         INFO("Device " << i << ": " << deviceProp.name << "\n CUDA Capability Major/Minor version number: "
0090                        << deviceProp.major << "." << deviceProp.minor);
0091       }
0092     }
0093 
0094     SECTION("CUDAService device free memory") {
0095       size_t mem = 0;
0096       int dev = -1;
0097       for (int i = 0; i < deviceCount; ++i) {
0098         size_t free, tot;
0099         cudaSetDevice(i);
0100         cudaMemGetInfo(&free, &tot);
0101         WARN("Device " << i << " memory total " << tot << " free " << free);
0102         if (free > mem) {
0103           mem = free;
0104           dev = i;
0105         }
0106       }
0107       WARN("Device with most free memory " << dev << "\n"
0108                                            << "     as given by CUDAService " << cs.deviceWithMostFreeMemory());
0109     }
0110   }
0111 
0112   SECTION("Force to be disabled") {
0113     edm::ParameterSet ps;
0114     ps.addUntrackedParameter("enabled", false);
0115     auto cs = makeCUDAService(ps);
0116     REQUIRE(cs.enabled() == false);
0117     REQUIRE(cs.numberOfDevices() == 0);
0118   }
0119 }