Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:15:39

0001 import FWCore.ParameterSet.Config as cms
0002 
0003 import os
0004 
0005 from HeterogeneousCore.Common.PlatformStatus import PlatformStatus
0006 
0007 class ModuleTypeResolverAlpaka:
0008     def __init__(self, accelerators, backend):
0009         # first element is used as the default if nothing is set
0010         self._valid_backends = []
0011         if "gpu-nvidia" in accelerators:
0012             self._valid_backends.append("cuda_async")
0013         if "gpu-amd" in accelerators:
0014             self._valid_backends.append("rocm_async")
0015         if "cpu" in accelerators:
0016             self._valid_backends.append("serial_sync")
0017         if len(self._valid_backends) == 0:
0018             raise cms.EDMException(cms.edm.errors.UnavailableAccelerator, "ModuleTypeResolverAlpaka had no backends available because of the combination of the job configuration and accelerator availability of on the machine. The job sees {} accelerators.".format(", ".join(accelerators)))
0019         if backend is not None:
0020             if not backend in self._valid_backends:
0021                 raise cms.EDMException(cms.edm.errors.UnavailableAccelerator, "The ProcessAcceleratorAlpaka was configured to use {} backend, but that backend is not available because of the combination of the job configuration and accelerator availability on the machine. The job was configured to use {} accelerators, which translates to {} Alpaka backends.".format(
0022                     backend, ", ".join(accelerators), ", ".join(self._valid_backends)))
0023             if backend != self._valid_backends[0]:
0024                 self._valid_backends.remove(backend)
0025                 self._valid_backends.insert(0, backend)
0026 
0027     def plugin(self):
0028         return "ModuleTypeResolverAlpaka"
0029 
0030     def setModuleVariant(self, module):
0031         if module.type_().endswith("@alpaka"):
0032             defaultBackend = self._valid_backends[0]
0033             if hasattr(module, "alpaka"):
0034                 if hasattr(module.alpaka, "backend"):
0035                     if module.alpaka.backend == "":
0036                         module.alpaka.backend = defaultBackend
0037                     elif module.alpaka.backend.value() not in self._valid_backends:
0038                         raise cms.EDMException(cms.edm.errors.UnavailableAccelerator, "Module {} has the Alpaka backend set explicitly, but its accelerator is not available for the job because of the combination of the job configuration and accelerator availability on the machine. The following Alpaka backends are available for the job {}.".format(module.label_(), ", ".join(self._valid_backends)))
0039                 else:
0040                     module.alpaka.backend = cms.untracked.string(defaultBackend)
0041             else:
0042                 module.alpaka = cms.untracked.PSet(
0043                     backend = cms.untracked.string(defaultBackend)
0044                 )
0045 
0046 class ProcessAcceleratorAlpaka(cms.ProcessAccelerator):
0047     """ProcessAcceleratorAlpaka itself does not define or inspect
0048     availability of any accelerator devices. It merely sets up
0049     necessary Alpaka infrastructure based on the availability of
0050     accelerators that the concrete ProcessAccelerators (like
0051     ProcessAcceleratorCUDA) define.
0052     """
0053     def __init__(self):
0054         super(ProcessAcceleratorAlpaka, self).__init__()
0055         self._backend = None
0056 
0057     # User-facing interface
0058     def setBackend(self, backend):
0059         self._backend = backend
0060 
0061     # Framework-facing interface
0062     def moduleTypeResolver(self, accelerators):
0063         return ModuleTypeResolverAlpaka(accelerators, self._backend)
0064 
0065     def apply(self, process, accelerators):
0066         # Propagate the AlpakaService messages through the MessageLogger
0067         if not hasattr(process.MessageLogger, "AlpakaService"):
0068             process.MessageLogger.AlpakaService = cms.untracked.PSet()
0069 
0070         # Check if the CPU backend is available
0071         try:
0072             if not "cpu" in accelerators:
0073                 raise False
0074             from HeterogeneousCore.AlpakaServices.AlpakaServiceSerialSync_cfi import AlpakaServiceSerialSync
0075         except:
0076             # the CPU backend is not available, do not load the AlpakaServiceSerialSync
0077             if hasattr(process, "AlpakaServiceSerialSync"):
0078                 del process.AlpakaServiceSerialSync
0079         else:
0080             # the CPU backend is available, ensure the AlpakaServiceSerialSync is loaded
0081             if not hasattr(process, "AlpakaServiceSerialSync"):
0082                 process.add_(AlpakaServiceSerialSync)
0083 
0084         # Check if CUDA is available, and if the system has at least one usable NVIDIA GPU
0085         try:
0086             if not "gpu-nvidia" in accelerators:
0087                 raise False
0088             from HeterogeneousCore.AlpakaServices.AlpakaServiceCudaAsync_cfi import AlpakaServiceCudaAsync
0089         except:
0090             # CUDA is not available, do not load the AlpakaServiceCudaAsync
0091             if hasattr(process, "AlpakaServiceCudaAsync"):
0092                 del process.AlpakaServiceCudaAsync
0093         else:
0094             # CUDA is available, ensure the AlpakaServiceCudaAsync is loaded
0095             if not hasattr(process, "AlpakaServiceCudaAsync"):
0096                 process.add_(AlpakaServiceCudaAsync)
0097 
0098         # Check if ROCm is available, and if the system has at least one usable AMD GPU
0099         try:
0100             if not "gpu-amd" in accelerators:
0101                 raise False
0102             from HeterogeneousCore.AlpakaServices.AlpakaServiceROCmAsync_cfi import AlpakaServiceROCmAsync
0103         except:
0104             # ROCm is not available, do not load the AlpakaServiceROCmAsync
0105             if hasattr(process, "AlpakaServiceROCmAsync"):
0106                 del process.AlpakaServiceROCmAsync
0107         else:
0108             # ROCm is available, ensure the AlpakaServiceROCmAsync is loaded
0109             if not hasattr(process, "AlpakaServiceROCmAsync"):
0110                 process.add_(AlpakaServiceROCmAsync)
0111 
0112 
0113 # Ensure this module is kept in the configuration when dumping it
0114 cms.specialImportRegistry.registerSpecialImportForType(ProcessAcceleratorAlpaka, "from HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka import ProcessAcceleratorAlpaka")