File indexing completed on 2023-03-17 11:05:49
0001 #include <iomanip>
0002 #include <iostream>
0003 #include <limits>
0004 #include <set>
0005 #include <string>
0006 #include <vector>
0007
0008 #include <hip/hip_runtime.h>
0009
0010
0011
0012
0013 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0014 #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
0015 #include "FWCore/ParameterSet/interface/ParameterSet.h"
0016 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
0017 #include "FWCore/ServiceRegistry/interface/Service.h"
0018 #include "FWCore/Utilities/interface/ResourceInformation.h"
0019 #include "HeterogeneousCore/ROCmServices/interface/ROCmInterface.h"
0020 #include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
0021
0022
0023
0024
0025 class ROCmService : public ROCmInterface {
0026 public:
0027 ROCmService(edm::ParameterSet const& config);
0028 ~ROCmService() override;
0029
0030 static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
0031
0032 bool enabled() const final { return enabled_; }
0033
0034 int numberOfDevices() const final { return numberOfDevices_; }
0035
0036
0037 std::pair<int, int> computeCapability(int device) const final {
0038 int size = computeCapabilities_.size();
0039 if (device < 0 or device >= size) {
0040 throw std::out_of_range("Invalid device index" + std::to_string(device) + ": the valid range is from 0 to " +
0041 std::to_string(size - 1));
0042 }
0043 return computeCapabilities_[device];
0044 }
0045
0046 private:
0047 int numberOfDevices_ = 0;
0048 std::vector<std::pair<int, int>> computeCapabilities_;
0049 bool enabled_ = false;
0050 bool verbose_ = false;
0051 };
0052
0053 void setHipLimit(hipLimit_t limit, const char* name, size_t request) {
0054
0055 int device;
0056 hipCheck(hipGetDevice(&device));
0057
0058 auto result = hipDeviceSetLimit(limit, request);
0059 if (hipErrorUnsupportedLimit == result) {
0060 edm::LogWarning("ROCmService") << "ROCm device " << device << ": unsupported limit \"" << name << "\"";
0061 return;
0062 }
0063
0064 size_t value;
0065 result = hipDeviceGetLimit(&value, limit);
0066 if (hipSuccess != result) {
0067 edm::LogWarning("ROCmService") << "ROCm device " << device << ": failed to set limit \"" << name << "\" to "
0068 << request << ", current value is " << value;
0069 } else if (value != request) {
0070 edm::LogWarning("ROCmService") << "ROCm device " << device << ": limit \"" << name << "\" set to " << value
0071 << " instead of requested " << request;
0072 }
0073 }
0074
0075 std::string decodeVersion(int version) {
0076 return std::to_string(version / 1000) + '.' + std::to_string(version % 1000 / 10);
0077 }
0078
0079
0080 ROCmService::ROCmService(edm::ParameterSet const& config) : verbose_(config.getUntrackedParameter<bool>("verbose")) {
0081 if (not config.getUntrackedParameter<bool>("enabled")) {
0082 edm::LogInfo("ROCmService") << "ROCmService disabled by configuration";
0083 return;
0084 }
0085
0086 auto status = hipGetDeviceCount(&numberOfDevices_);
0087 if (hipSuccess != status) {
0088 edm::LogWarning("ROCmService") << "Failed to initialize the ROCm runtime.\n"
0089 << "Disabling the ROCmService.";
0090 return;
0091 }
0092 computeCapabilities_.reserve(numberOfDevices_);
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104 int driverVersion = 0;
0105 hipCheck(hipDriverGetVersion(&driverVersion));
0106
0107
0108
0109 int runtimeVersion = 0;
0110 hipCheck(hipRuntimeGetVersion(&runtimeVersion));
0111
0112 edm::LogInfo log("ROCmService");
0113 if (verbose_) {
0114
0115
0116
0117 log << "ROCm driver API: " << decodeVersion(driverVersion) <<
0118
0119 "\n";
0120 log << "ROCm runtime API: " << decodeVersion(runtimeVersion)
0121 <<
0122
0123 "\n";
0124 log << "ROCm runtime successfully initialised, found " << numberOfDevices_ << " compute devices.\n";
0125 } else {
0126 log << "ROCm runtime version " << decodeVersion(runtimeVersion) << ", driver version "
0127 << decodeVersion(driverVersion)
0128
0129
0130
0131 ;
0132 }
0133
0134 auto const& limits = config.getUntrackedParameter<edm::ParameterSet>("limits");
0135
0136
0137
0138 auto stackSize = limits.getUntrackedParameter<int>("hipLimitStackSize");
0139 auto mallocHeapSize = limits.getUntrackedParameter<int>("hipLimitMallocHeapSize");
0140
0141
0142
0143
0144
0145 std::set<std::string> models;
0146
0147 for (int i = 0; i < numberOfDevices_; ++i) {
0148
0149
0150 hipDeviceProp_t properties;
0151 hipCheck(hipGetDeviceProperties(&properties, i));
0152 log << '\n' << "ROCm device " << i << ": " << properties.name;
0153 if (verbose_) {
0154 log << '\n';
0155 }
0156 models.insert(std::string(properties.name));
0157
0158
0159 computeCapabilities_.emplace_back(properties.major, properties.minor);
0160 if (verbose_) {
0161 log << " compute capability: " << properties.major << "." << properties.minor;
0162 }
0163 log << " (sm_" << properties.major << properties.minor << ")";
0164 if (verbose_) {
0165 log << '\n';
0166 log << " streaming multiprocessors: " << std::setw(13) << properties.multiProcessorCount << '\n';
0167 log << " ROCm cores: " << std::setw(28) << "not yet implemented" << '\n';
0168
0169
0170
0171
0172 }
0173
0174
0175 static constexpr const char* computeModeDescription[] = {
0176 "default (shared)",
0177 "exclusive (single thread)",
0178 "prohibited",
0179 "exclusive (single process)",
0180 "unknown"};
0181 if (verbose_) {
0182 log << " compute mode:" << std::right << std::setw(27)
0183 << computeModeDescription[std::min(properties.computeMode,
0184 static_cast<int>(std::size(computeModeDescription)) - 1)]
0185 << '\n';
0186 }
0187
0188
0189 hipCheck(hipSetDevice(i));
0190 hipCheck(hipSetDeviceFlags(hipDeviceScheduleAuto | hipDeviceMapHost));
0191
0192
0193
0194 if (verbose_) {
0195 size_t freeMemory, totalMemory;
0196 hipCheck(hipMemGetInfo(&freeMemory, &totalMemory));
0197 log << " memory: " << std::setw(6) << freeMemory / (1 << 20) << " MB free / " << std::setw(6)
0198 << totalMemory / (1 << 20) << " MB total\n";
0199 log << " constant memory: " << std::setw(6) << properties.totalConstMem / (1 << 10) << " kB\n";
0200 log << " L2 cache size: " << std::setw(6) << properties.l2CacheSize / (1 << 10) << " kB\n";
0201 }
0202
0203
0204 if (verbose_) {
0205
0206
0207
0208
0209
0210
0211
0212
0213 log << "Other capabilities\n";
0214 log << " " << (properties.canMapHostMemory ? "can" : "cannot")
0215 << " map host memory into the ROCm address space for use with hipHostAlloc()/hipHostGetDevicePointer()\n";
0216 log << " " << (properties.pageableMemoryAccess ? "supports" : "does not support")
0217 << " coherently accessing pageable memory without calling hipHostRegister() on it\n";
0218 log << " " << (properties.pageableMemoryAccessUsesHostPageTables ? "can" : "cannot")
0219 << " access pageable memory via the host's page tables\n";
0220
0221
0222
0223
0224
0225
0226 log << " " << (properties.managedMemory ? "supports" : "does not support")
0227 << " allocating managed memory on this system\n";
0228 log << " " << (properties.concurrentManagedAccess ? "can" : "cannot")
0229 << " coherently access managed memory concurrently with the host\n";
0230 log << " "
0231 << "the host " << (properties.directManagedMemAccessFromHost ? "can" : "cannot")
0232 << " directly access managed memory on the device without migration\n";
0233 log << " " << (properties.cooperativeLaunch ? "supports" : "does not support")
0234 << " launching cooperative kernels via hipLaunchCooperativeKernel()\n";
0235 log << " " << (properties.cooperativeMultiDeviceLaunch ? "supports" : "does not support")
0236 << " launching cooperative kernels via hipLaunchCooperativeKernelMultiDevice()\n";
0237 log << '\n';
0238 }
0239
0240
0241
0242 if (verbose_) {
0243 log << "ROCm flags\n";
0244 unsigned int flags;
0245 hipCheck(hipGetDeviceFlags(&flags));
0246 switch (flags & hipDeviceScheduleMask) {
0247 case hipDeviceScheduleAuto:
0248 log << " thread policy: default\n";
0249 break;
0250 case hipDeviceScheduleSpin:
0251 log << " thread policy: spin\n";
0252 break;
0253 case hipDeviceScheduleYield:
0254 log << " thread policy: yield\n";
0255 break;
0256 case hipDeviceScheduleBlockingSync:
0257 log << " thread policy: blocking sync\n";
0258 break;
0259 default:
0260 log << " thread policy: undefined\n";
0261 }
0262 if (flags & hipDeviceMapHost) {
0263 log << " pinned host memory allocations: enabled\n";
0264 } else {
0265 log << " pinned host memory allocations: disabled\n";
0266 }
0267 if (flags & hipDeviceLmemResizeToMax) {
0268 log << " kernel host memory reuse: enabled\n";
0269 } else {
0270 log << " kernel host memory reuse: disabled\n";
0271 }
0272 log << '\n';
0273 }
0274
0275
0276
0277
0278
0279
0280
0281
0282
0283
0284
0285
0286 if (stackSize >= 0) {
0287 setHipLimit(hipLimitStackSize, "hipLimitStackSize", stackSize);
0288 }
0289
0290
0291 if (mallocHeapSize >= 0) {
0292 setHipLimit(hipLimitMallocHeapSize, "hipLimitMallocHeapSize", mallocHeapSize);
0293 }
0294
0295
0296
0297
0298
0299
0300
0301
0302
0303
0304
0305
0306
0307
0308
0309
0310 if (verbose_) {
0311 size_t value;
0312 log << "ROCm limits\n";
0313
0314
0315
0316
0317 hipCheck(hipDeviceGetLimit(&value, hipLimitStackSize));
0318 log << " stack size: " << std::setw(10) << value / (1 << 10) << " kB\n";
0319 hipCheck(hipDeviceGetLimit(&value, hipLimitMallocHeapSize));
0320 log << " malloc heap size: " << std::setw(10) << value / (1 << 20) << " MB\n";
0321
0322
0323
0324
0325
0326
0327
0328
0329 }
0330 }
0331
0332 edm::Service<edm::ResourceInformation> resourceInformationService;
0333 if (resourceInformationService.isAvailable()) {
0334 std::vector<std::string> modelsV(models.begin(), models.end());
0335 resourceInformationService->setGPUModels(modelsV);
0336
0337
0338
0339
0340
0341
0342 }
0343
0344 if (verbose_) {
0345 log << '\n' << "ROCmService fully initialized";
0346 }
0347 enabled_ = true;
0348 }
0349
0350 ROCmService::~ROCmService() {
0351 if (enabled_) {
0352 for (int i = 0; i < numberOfDevices_; ++i) {
0353 hipCheck(hipSetDevice(i));
0354 hipCheck(hipDeviceSynchronize());
0355
0356
0357
0358 hipCheck(hipDeviceReset());
0359 }
0360 }
0361 }
0362
0363 void ROCmService::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
0364 edm::ParameterSetDescription desc;
0365 desc.addUntracked<bool>("enabled", true);
0366 desc.addUntracked<bool>("verbose", false);
0367
0368 edm::ParameterSetDescription limits;
0369
0370
0371
0372
0373 limits.addUntracked<int>("hipLimitStackSize", -1)->setComment("Stack size in bytes of each GPU thread.");
0374 limits.addUntracked<int>("hipLimitMallocHeapSize", -1)
0375 ->setComment("Size in bytes of the heap used by the malloc() and free() device system calls.");
0376 limits.addUntracked<int>("hipLimitDevRuntimeSyncDepth", -1)
0377 ->setComment("Maximum nesting depth of a grid at which a thread can safely call hipDeviceSynchronize().");
0378 limits.addUntracked<int>("hipLimitDevRuntimePendingLaunchCount", -1)
0379 ->setComment("Maximum number of outstanding device runtime launches that can be made from the current device.");
0380 desc.addUntracked<edm::ParameterSetDescription>("limits", limits)
0381 ->setComment(
0382 "See the documentation of hipDeviceSetLimit for more information.\nSetting any of these options to -1 keeps "
0383 "the default value.");
0384
0385 descriptions.add("ROCmService", desc);
0386 }
0387
0388 namespace edm {
0389 namespace service {
0390 inline bool isProcessWideService(ROCmService const*) { return true; }
0391 }
0392 }
0393
0394 #include "FWCore/ServiceRegistry/interface/ServiceMaker.h"
0395 using ROCmServiceMaker = edm::serviceregistry::ParameterSetMaker<ROCmInterface, ROCmService>;
0396 DEFINE_FWK_SERVICE_MAKER(ROCmService, ROCmServiceMaker);