File indexing completed on 2024-12-30 23:27:57
0001 import FWCore.ParameterSet.Config as cms
0002
0003 import os
0004
0005 from HeterogeneousCore.Common.PlatformStatus import PlatformStatus
0006
0007 class ModuleTypeResolverAlpaka:
0008 def __init__(self, accelerators, backend, synchronize):
0009
0010 self._valid_backends = []
0011 if "gpu-nvidia" in accelerators:
0012 self._valid_backends.append("cuda_async")
0013 if "gpu-amd" in accelerators:
0014 self._valid_backends.append("rocm_async")
0015 if "cpu" in accelerators:
0016 self._valid_backends.append("serial_sync")
0017 if len(self._valid_backends) == 0:
0018 raise cms.EDMException(cms.edm.errors.UnavailableAccelerator, "ModuleTypeResolverAlpaka had no backends available because of the combination of the job configuration and accelerator availability of on the machine. The job sees {} accelerators.".format(", ".join(accelerators)))
0019 if backend is not None:
0020 if not backend in self._valid_backends:
0021 raise cms.EDMException(cms.edm.errors.UnavailableAccelerator, "The ProcessAcceleratorAlpaka was configured to use {} backend, but that backend is not available because of the combination of the job configuration and accelerator availability on the machine. The job was configured to use {} accelerators, which translates to {} Alpaka backends.".format(
0022 backend, ", ".join(accelerators), ", ".join(self._valid_backends)))
0023 if backend != self._valid_backends[0]:
0024 self._valid_backends.remove(backend)
0025 self._valid_backends.insert(0, backend)
0026 self._synchronize = synchronize
0027
0028 def plugin(self):
0029 return "ModuleTypeResolverAlpaka"
0030
0031 def setModuleVariant(self, module):
0032 if module.type_().endswith("@alpaka"):
0033 defaultBackend = self._valid_backends[0]
0034 if hasattr(module, "alpaka"):
0035
0036
0037
0038 if module.alpaka.isTracked():
0039 raise cms.EDMException(cms.edm.errors.Configuration, "The 'alpaka' PSet in module '{}' is tracked, but it should be untracked".format(module.label()))
0040 if hasattr(module.alpaka, "backend"):
0041 if module.alpaka.backend == "":
0042 module.alpaka.backend = defaultBackend
0043 elif module.alpaka.backend.value() not in self._valid_backends:
0044 raise cms.EDMException(cms.edm.errors.UnavailableAccelerator, "Module {} has the Alpaka backend set explicitly, but its accelerator is not available for the job because of the combination of the job configuration and accelerator availability on the machine. The following Alpaka backends are available for the job {}.".format(module.label_(), ", ".join(self._valid_backends)))
0045 else:
0046 module.alpaka.backend = cms.untracked.string(defaultBackend)
0047 else:
0048 module.alpaka = cms.untracked.PSet(
0049 backend = cms.untracked.string(defaultBackend)
0050 )
0051 isDefaultValue = lambda v: \
0052 isinstance(v, type(cms.optional.untracked.bool)) \
0053 and not v.isTracked() \
0054 and v.isCompatibleCMSType(cms.bool)
0055 if not hasattr(module.alpaka, "synchronize") or isDefaultValue(module.alpaka.synchronize):
0056 module.alpaka.synchronize = cms.untracked.bool(self._synchronize)
0057
0058 class ProcessAcceleratorAlpaka(cms.ProcessAccelerator):
0059 """ProcessAcceleratorAlpaka itself does not define or inspect
0060 availability of any accelerator devices. It merely sets up
0061 necessary Alpaka infrastructure based on the availability of
0062 accelerators that the concrete ProcessAccelerators (like
0063 ProcessAcceleratorCUDA) define.
0064 """
0065 def __init__(self):
0066 super(ProcessAcceleratorAlpaka, self).__init__()
0067 self._backend = None
0068 self._synchronize = False
0069
0070
0071 def setBackend(self, backend):
0072 self._backend = backend
0073
0074 def setSynchronize(self, synchronize):
0075 self._synchronize = synchronize
0076
0077
0078 def moduleTypeResolver(self, accelerators):
0079 return ModuleTypeResolverAlpaka(accelerators, self._backend, self._synchronize)
0080
0081 def apply(self, process, accelerators):
0082
0083 if not hasattr(process.MessageLogger, "AlpakaService"):
0084 process.MessageLogger.AlpakaService = cms.untracked.PSet()
0085
0086
0087 try:
0088 if not "cpu" in accelerators:
0089 raise False
0090 from HeterogeneousCore.AlpakaServices.AlpakaServiceSerialSync_cfi import AlpakaServiceSerialSync
0091 except:
0092
0093 if hasattr(process, "AlpakaServiceSerialSync"):
0094 del process.AlpakaServiceSerialSync
0095 else:
0096
0097 if not hasattr(process, "AlpakaServiceSerialSync"):
0098 process.add_(AlpakaServiceSerialSync)
0099
0100
0101 try:
0102 if not "gpu-nvidia" in accelerators:
0103 raise False
0104 from HeterogeneousCore.AlpakaServices.AlpakaServiceCudaAsync_cfi import AlpakaServiceCudaAsync
0105 except:
0106
0107 if hasattr(process, "AlpakaServiceCudaAsync"):
0108 del process.AlpakaServiceCudaAsync
0109 else:
0110
0111 if not hasattr(process, "AlpakaServiceCudaAsync"):
0112 process.add_(AlpakaServiceCudaAsync)
0113
0114
0115 try:
0116 if not "gpu-amd" in accelerators:
0117 raise False
0118 from HeterogeneousCore.AlpakaServices.AlpakaServiceROCmAsync_cfi import AlpakaServiceROCmAsync
0119 except:
0120
0121 if hasattr(process, "AlpakaServiceROCmAsync"):
0122 del process.AlpakaServiceROCmAsync
0123 else:
0124
0125 if not hasattr(process, "AlpakaServiceROCmAsync"):
0126 process.add_(AlpakaServiceROCmAsync)
0127
0128
0129
0130 cms.specialImportRegistry.registerSpecialImportForType(ProcessAcceleratorAlpaka, "from HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka import ProcessAcceleratorAlpaka")