Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-12-30 23:27:57

0001 import FWCore.ParameterSet.Config as cms
0002 
0003 import os
0004 
0005 from HeterogeneousCore.Common.PlatformStatus import PlatformStatus
0006 
0007 class ModuleTypeResolverAlpaka:
0008     def __init__(self, accelerators, backend, synchronize):
0009         # first element is used as the default if nothing is set
0010         self._valid_backends = []
0011         if "gpu-nvidia" in accelerators:
0012             self._valid_backends.append("cuda_async")
0013         if "gpu-amd" in accelerators:
0014             self._valid_backends.append("rocm_async")
0015         if "cpu" in accelerators:
0016             self._valid_backends.append("serial_sync")
0017         if len(self._valid_backends) == 0:
0018             raise cms.EDMException(cms.edm.errors.UnavailableAccelerator, "ModuleTypeResolverAlpaka had no backends available because of the combination of the job configuration and accelerator availability of on the machine. The job sees {} accelerators.".format(", ".join(accelerators)))
0019         if backend is not None:
0020             if not backend in self._valid_backends:
0021                 raise cms.EDMException(cms.edm.errors.UnavailableAccelerator, "The ProcessAcceleratorAlpaka was configured to use {} backend, but that backend is not available because of the combination of the job configuration and accelerator availability on the machine. The job was configured to use {} accelerators, which translates to {} Alpaka backends.".format(
0022                     backend, ", ".join(accelerators), ", ".join(self._valid_backends)))
0023             if backend != self._valid_backends[0]:
0024                 self._valid_backends.remove(backend)
0025                 self._valid_backends.insert(0, backend)
0026         self._synchronize = synchronize
0027 
0028     def plugin(self):
0029         return "ModuleTypeResolverAlpaka"
0030 
0031     def setModuleVariant(self, module):
0032         if module.type_().endswith("@alpaka"):
0033             defaultBackend = self._valid_backends[0]
0034             if hasattr(module, "alpaka"):
0035                 # Ensure the untrackedness already here, because the
0036                 # C++ ModuleTypeResolverAlpaka relies on the
0037                 # untrackedness (before the configuration validation)
0038                 if module.alpaka.isTracked():
0039                     raise cms.EDMException(cms.edm.errors.Configuration, "The 'alpaka' PSet in module '{}' is tracked, but it should be untracked".format(module.label()))
0040                 if hasattr(module.alpaka, "backend"):
0041                     if module.alpaka.backend == "":
0042                         module.alpaka.backend = defaultBackend
0043                     elif module.alpaka.backend.value() not in self._valid_backends:
0044                         raise cms.EDMException(cms.edm.errors.UnavailableAccelerator, "Module {} has the Alpaka backend set explicitly, but its accelerator is not available for the job because of the combination of the job configuration and accelerator availability on the machine. The following Alpaka backends are available for the job {}.".format(module.label_(), ", ".join(self._valid_backends)))
0045                 else:
0046                     module.alpaka.backend = cms.untracked.string(defaultBackend)
0047             else:
0048                 module.alpaka = cms.untracked.PSet(
0049                     backend = cms.untracked.string(defaultBackend)
0050                 )
0051             isDefaultValue = lambda v: \
0052                 isinstance(v, type(cms.optional.untracked.bool)) \
0053                 and not v.isTracked() \
0054                 and v.isCompatibleCMSType(cms.bool)
0055             if not hasattr(module.alpaka, "synchronize") or isDefaultValue(module.alpaka.synchronize):
0056                 module.alpaka.synchronize = cms.untracked.bool(self._synchronize)
0057 
0058 class ProcessAcceleratorAlpaka(cms.ProcessAccelerator):
0059     """ProcessAcceleratorAlpaka itself does not define or inspect
0060     availability of any accelerator devices. It merely sets up
0061     necessary Alpaka infrastructure based on the availability of
0062     accelerators that the concrete ProcessAccelerators (like
0063     ProcessAcceleratorCUDA) define.
0064     """
0065     def __init__(self):
0066         super(ProcessAcceleratorAlpaka, self).__init__()
0067         self._backend = None
0068         self._synchronize = False
0069 
0070     # User-facing interface
0071     def setBackend(self, backend):
0072         self._backend = backend
0073 
0074     def setSynchronize(self, synchronize):
0075         self._synchronize = synchronize
0076 
0077     # Framework-facing interface
0078     def moduleTypeResolver(self, accelerators):
0079         return ModuleTypeResolverAlpaka(accelerators, self._backend, self._synchronize)
0080 
0081     def apply(self, process, accelerators):
0082         # Propagate the AlpakaService messages through the MessageLogger
0083         if not hasattr(process.MessageLogger, "AlpakaService"):
0084             process.MessageLogger.AlpakaService = cms.untracked.PSet()
0085 
0086         # Check if the CPU backend is available
0087         try:
0088             if not "cpu" in accelerators:
0089                 raise False
0090             from HeterogeneousCore.AlpakaServices.AlpakaServiceSerialSync_cfi import AlpakaServiceSerialSync
0091         except:
0092             # the CPU backend is not available, do not load the AlpakaServiceSerialSync
0093             if hasattr(process, "AlpakaServiceSerialSync"):
0094                 del process.AlpakaServiceSerialSync
0095         else:
0096             # the CPU backend is available, ensure the AlpakaServiceSerialSync is loaded
0097             if not hasattr(process, "AlpakaServiceSerialSync"):
0098                 process.add_(AlpakaServiceSerialSync)
0099 
0100         # Check if CUDA is available, and if the system has at least one usable NVIDIA GPU
0101         try:
0102             if not "gpu-nvidia" in accelerators:
0103                 raise False
0104             from HeterogeneousCore.AlpakaServices.AlpakaServiceCudaAsync_cfi import AlpakaServiceCudaAsync
0105         except:
0106             # CUDA is not available, do not load the AlpakaServiceCudaAsync
0107             if hasattr(process, "AlpakaServiceCudaAsync"):
0108                 del process.AlpakaServiceCudaAsync
0109         else:
0110             # CUDA is available, ensure the AlpakaServiceCudaAsync is loaded
0111             if not hasattr(process, "AlpakaServiceCudaAsync"):
0112                 process.add_(AlpakaServiceCudaAsync)
0113 
0114         # Check if ROCm is available, and if the system has at least one usable AMD GPU
0115         try:
0116             if not "gpu-amd" in accelerators:
0117                 raise False
0118             from HeterogeneousCore.AlpakaServices.AlpakaServiceROCmAsync_cfi import AlpakaServiceROCmAsync
0119         except:
0120             # ROCm is not available, do not load the AlpakaServiceROCmAsync
0121             if hasattr(process, "AlpakaServiceROCmAsync"):
0122                 del process.AlpakaServiceROCmAsync
0123         else:
0124             # ROCm is available, ensure the AlpakaServiceROCmAsync is loaded
0125             if not hasattr(process, "AlpakaServiceROCmAsync"):
0126                 process.add_(AlpakaServiceROCmAsync)
0127 
0128 
0129 # Ensure this module is kept in the configuration when dumping it
0130 cms.specialImportRegistry.registerSpecialImportForType(ProcessAcceleratorAlpaka, "from HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka import ProcessAcceleratorAlpaka")