Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
#include "CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h"
#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"

SiStripClustersCUDADevice::SiStripClustersCUDADevice(uint32_t maxClusters,
                                                     uint32_t maxStripsPerCluster,
                                                     cudaStream_t stream) {
  maxClusterSize_ = maxStripsPerCluster;

  clusterIndex_ = cms::cuda::make_device_unique<uint32_t[]>(maxClusters, stream);
  clusterSize_ = cms::cuda::make_device_unique<uint32_t[]>(maxClusters, stream);
  clusterADCs_ = cms::cuda::make_device_unique<uint8_t[]>(maxClusters * maxStripsPerCluster, stream);
  clusterDetId_ = cms::cuda::make_device_unique<stripgpu::detId_t[]>(maxClusters, stream);
  firstStrip_ = cms::cuda::make_device_unique<stripgpu::stripId_t[]>(maxClusters, stream);
  trueCluster_ = cms::cuda::make_device_unique<bool[]>(maxClusters, stream);
  barycenter_ = cms::cuda::make_device_unique<float[]>(maxClusters, stream);
  charge_ = cms::cuda::make_device_unique<float[]>(maxClusters, stream);

  auto view = cms::cuda::make_host_unique<DeviceView>(stream);
  view->clusterIndex_ = clusterIndex_.get();
  view->clusterSize_ = clusterSize_.get();
  view->clusterADCs_ = clusterADCs_.get();
  view->clusterDetId_ = clusterDetId_.get();
  view->firstStrip_ = firstStrip_.get();
  view->trueCluster_ = trueCluster_.get();
  view->barycenter_ = barycenter_.get();
  view->charge_ = charge_.get();
  view->maxClusterSize_ = maxStripsPerCluster;

  view_d = cms::cuda::make_device_unique<DeviceView>(stream);
  cms::cuda::copyAsync(view_d, view, stream);
#ifdef GPU_CHECK
  cudaCheck(cudaStreamSynchronize(stream));
#endif
}

SiStripClustersCUDAHost::SiStripClustersCUDAHost(const SiStripClustersCUDADevice& clusters_d, cudaStream_t stream) {
  nClusters_ = clusters_d.nClusters();
  maxClusterSize_ = clusters_d.maxClusterSize();
  clusterIndex_ = cms::cuda::make_host_unique<uint32_t[]>(nClusters_, stream);
  clusterSize_ = cms::cuda::make_host_unique<uint32_t[]>(nClusters_, stream);
  clusterADCs_ = cms::cuda::make_host_unique<uint8_t[]>(nClusters_ * maxClusterSize_, stream);
  clusterDetId_ = cms::cuda::make_host_unique<stripgpu::detId_t[]>(nClusters_, stream);
  firstStrip_ = cms::cuda::make_host_unique<stripgpu::stripId_t[]>(nClusters_, stream);
  trueCluster_ = cms::cuda::make_host_unique<bool[]>(nClusters_, stream);
  barycenter_ = cms::cuda::make_host_unique<float[]>(nClusters_, stream);
  charge_ = cms::cuda::make_host_unique<float[]>(nClusters_, stream);

  cms::cuda::copyAsync(clusterIndex_, clusters_d.clusterIndex(), nClusters_, stream);
  cms::cuda::copyAsync(clusterSize_, clusters_d.clusterSize(), nClusters_, stream);
  cms::cuda::copyAsync(clusterADCs_, clusters_d.clusterADCs(), nClusters_ * maxClusterSize_, stream);
  cms::cuda::copyAsync(clusterDetId_, clusters_d.clusterDetId(), nClusters_, stream);
  cms::cuda::copyAsync(firstStrip_, clusters_d.firstStrip(), nClusters_, stream);
  cms::cuda::copyAsync(trueCluster_, clusters_d.trueCluster(), nClusters_, stream);
  cms::cuda::copyAsync(barycenter_, clusters_d.barycenter(), nClusters_, stream);
  cms::cuda::copyAsync(charge_, clusters_d.charge(), nClusters_, stream);
#ifdef GPU_CHECK
  cudaCheck(cudaStreamSynchronize(stream));
#endif
}