File indexing completed on 2025-03-26 01:51:19
0001 import FWCore.ParameterSet.Config as cms
0002
0003 import os
0004
0005 from HeterogeneousCore.Common.PlatformStatus import PlatformStatus
0006
0007 class ModuleTypeResolverAlpaka:
0008 def __init__(self, accelerators, backend, synchronize):
0009
0010 self._valid_backends = []
0011 if "gpu-nvidia" in accelerators:
0012 self._valid_backends.append("cuda_async")
0013 if "gpu-amd" in accelerators:
0014 self._valid_backends.append("rocm_async")
0015 if "cpu" in accelerators:
0016 self._valid_backends.append("serial_sync")
0017 if len(self._valid_backends) == 0:
0018 raise cms.EDMException(cms.edm.errors.UnavailableAccelerator, "ModuleTypeResolverAlpaka had no backends available because of the combination of the job configuration and accelerator availability of on the machine. The job sees {} accelerators.".format(", ".join(accelerators)))
0019 if backend is not None:
0020 if not backend in self._valid_backends:
0021 raise cms.EDMException(cms.edm.errors.UnavailableAccelerator, "The ProcessAcceleratorAlpaka was configured to use {} backend, but that backend is not available because of the combination of the job configuration and accelerator availability on the machine. The job was configured to use {} accelerators, which translates to {} Alpaka backends.".format(
0022 backend, ", ".join(accelerators), ", ".join(self._valid_backends)))
0023 if backend != self._valid_backends[0]:
0024 self._valid_backends.remove(backend)
0025 self._valid_backends.insert(0, backend)
0026 self._synchronize = synchronize
0027
0028 def plugin(self):
0029 return "ModuleTypeResolverAlpaka"
0030
0031 def setModuleVariant(self, module):
0032 if module.type_().endswith("@alpaka"):
0033 defaultBackend = self._valid_backends[0]
0034 if hasattr(module, "alpaka"):
0035
0036
0037
0038 if module.alpaka.isTracked():
0039 raise cms.EDMException(cms.edm.errors.Configuration, "The 'alpaka' PSet in module '{}' is tracked, but it should be untracked".format(module.label()))
0040 if hasattr(module.alpaka, "backend"):
0041 if module.alpaka.backend == "":
0042 module.alpaka.backend = defaultBackend
0043 elif module.alpaka.backend.value() not in self._valid_backends:
0044 raise cms.EDMException(cms.edm.errors.UnavailableAccelerator, "Module {} has the Alpaka backend set explicitly, but its accelerator is not available for the job because of the combination of the job configuration and accelerator availability on the machine. The following Alpaka backends are available for the job {}.".format(module.label_(), ", ".join(self._valid_backends)))
0045 else:
0046 module.alpaka.backend = cms.untracked.string(defaultBackend)
0047 else:
0048 module.alpaka = cms.untracked.PSet(
0049 backend = cms.untracked.string(defaultBackend)
0050 )
0051 isDefaultValue = lambda v: \
0052 isinstance(v, type(cms.optional.untracked.bool)) \
0053 and not v.isTracked() \
0054 and v.isCompatibleCMSType(cms.bool)
0055 if not hasattr(module.alpaka, "synchronize") or isDefaultValue(module.alpaka.synchronize):
0056 module.alpaka.synchronize = cms.untracked.bool(self._synchronize)
0057
0058 class ProcessAcceleratorAlpaka(cms.ProcessAccelerator):
0059 """ProcessAcceleratorAlpaka itself does not define or inspect
0060 availability of any accelerator devices. It merely sets up
0061 necessary Alpaka infrastructure based on the availability of
0062 accelerators that the concrete ProcessAccelerators (like
0063 ProcessAcceleratorCUDA) define.
0064 """
0065 def __init__(self):
0066 super(ProcessAcceleratorAlpaka, self).__init__()
0067 self._backend = None
0068 self._synchronize = False
0069
0070
0071 def setBackend(self, backend):
0072 self._backend = backend
0073
0074 def setSynchronize(self, synchronize):
0075 self._synchronize = synchronize
0076
0077
0078 def moduleTypeResolver(self, accelerators):
0079 return ModuleTypeResolverAlpaka(accelerators, self._backend, self._synchronize)
0080
0081 def apply(self, process, accelerators):
0082
0083 if not hasattr(process.MessageLogger, "AlpakaService"):
0084 process.MessageLogger.AlpakaService = cms.untracked.PSet()
0085
0086
0087 if not hasattr(process, "AlpakaServiceSerialSync"):
0088 from HeterogeneousCore.AlpakaServices.AlpakaServiceSerialSync_cfi import AlpakaServiceSerialSync
0089 process.add_(AlpakaServiceSerialSync)
0090
0091
0092 try:
0093 if not "gpu-nvidia" in accelerators:
0094 raise False
0095 from HeterogeneousCore.AlpakaServices.AlpakaServiceCudaAsync_cfi import AlpakaServiceCudaAsync
0096 except:
0097
0098 if hasattr(process, "AlpakaServiceCudaAsync"):
0099 del process.AlpakaServiceCudaAsync
0100 else:
0101
0102 if not hasattr(process, "AlpakaServiceCudaAsync"):
0103 process.add_(AlpakaServiceCudaAsync)
0104
0105
0106 try:
0107 if not "gpu-amd" in accelerators:
0108 raise False
0109 from HeterogeneousCore.AlpakaServices.AlpakaServiceROCmAsync_cfi import AlpakaServiceROCmAsync
0110 except:
0111
0112 if hasattr(process, "AlpakaServiceROCmAsync"):
0113 del process.AlpakaServiceROCmAsync
0114 else:
0115
0116 if not hasattr(process, "AlpakaServiceROCmAsync"):
0117 process.add_(AlpakaServiceROCmAsync)
0118
0119
0120
0121 cms.specialImportRegistry.registerSpecialImportForType(ProcessAcceleratorAlpaka, "from HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka import ProcessAcceleratorAlpaka")