Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:25:54

0001 #include "KernelManagerHGCalCellPositions.h"
0002 #include "CondFormats/HGCalObjects/interface/HeterogeneousHGCalHEFCellPositionsConditions.h"
0003 
0004 HeterogeneousHGCalHEFCellPositionsConditions::HeterogeneousHGCalHEFCellPositionsConditions(
0005     cpos::HGCalPositionsMapping* cpuPos) {
0006   //HGCalPositions as defined in hgcal_conditions::positions
0007   this->sizes_ = calculate_memory_bytes_(cpuPos);
0008   this->chunk_ = allocate_memory_(this->sizes_);
0009   transfer_data_to_heterogeneous_pointers_(this->sizes_, cpuPos);
0010   transfer_data_to_heterogeneous_vars_(cpuPos);
0011 }
0012 
0013 size_t HeterogeneousHGCalHEFCellPositionsConditions::allocate_memory_(const std::vector<size_t>& sz) {
0014   size_t chunk = std::accumulate(sz.begin(), sz.end(), 0);  //total memory required in bytes
0015   cudaCheck(cudaMallocHost(&this->posmap_.x, chunk));
0016   return chunk;
0017 }
0018 
0019 void HeterogeneousHGCalHEFCellPositionsConditions::transfer_data_to_heterogeneous_pointers_(
0020     const std::vector<size_t>& sz, cpos::HGCalPositionsMapping* cpuPos) {
0021   //store cumulative sum in bytes and convert it to sizes in units of C++ typesHEF, i.e., number if items to be transferred to GPU
0022   std::vector<size_t> cumsum_sizes(sz.size() + 1, 0);  //starting with zero
0023   std::partial_sum(sz.begin(), sz.end(), cumsum_sizes.begin() + 1);
0024   for (unsigned int i = 1; i < cumsum_sizes.size(); ++i)  //start at second element (the first is zero)
0025   {
0026     size_t types_size = 0;
0027     if (cpos::types[i - 1] == cpos::HeterogeneousHGCalPositionsType::Float)
0028       types_size = sizeof(float);
0029     else if (cpos::types[i - 1] == cpos::HeterogeneousHGCalPositionsType::Int32_t)
0030       types_size = sizeof(int32_t);
0031     else if (cpos::types[i - 1] == cpos::HeterogeneousHGCalPositionsType::Uint32_t)
0032       types_size = sizeof(uint32_t);
0033     else
0034       throw cms::Exception("HeterogeneousHGCalHEFCellPositionsConditions")
0035           << "Wrong HeterogeneousHGCalPositionsMapping type";
0036     cumsum_sizes[i] /= types_size;
0037   }
0038 
0039   for (unsigned int j = 0; j < sz.size(); ++j) {
0040     //setting the pointers
0041     if (j != 0) {
0042       const unsigned int jm1 = j - 1;
0043       const size_t shift = cumsum_sizes[j] - cumsum_sizes[jm1];
0044       if (cpos::types[jm1] == cpos::HeterogeneousHGCalPositionsType::Float and
0045           cpos::types[j] == cpos::HeterogeneousHGCalPositionsType::Float)
0046         select_pointer_f_(&this->posmap_, j) = select_pointer_f_(&this->posmap_, jm1) + shift;
0047       else if (cpos::types[jm1] == cpos::HeterogeneousHGCalPositionsType::Float and
0048                cpos::types[j] == cpos::HeterogeneousHGCalPositionsType::Int32_t)
0049         select_pointer_i_(&this->posmap_, j) =
0050             reinterpret_cast<int32_t*>(select_pointer_f_(&this->posmap_, jm1) + shift);
0051       else if (cpos::types[jm1] == cpos::HeterogeneousHGCalPositionsType::Int32_t and
0052                cpos::types[j] == cpos::HeterogeneousHGCalPositionsType::Int32_t)
0053         select_pointer_i_(&this->posmap_, j) = select_pointer_i_(&this->posmap_, jm1) + shift;
0054       else if (cpos::types[jm1] == cpos::HeterogeneousHGCalPositionsType::Int32_t and
0055                cpos::types[j] == cpos::HeterogeneousHGCalPositionsType::Uint32_t)
0056         select_pointer_u_(&this->posmap_, j) =
0057             reinterpret_cast<uint32_t*>(select_pointer_i_(&this->posmap_, jm1) + shift);
0058       else
0059         throw cms::Exception("HeterogeneousHGCalHEFCellPositionsConditions")
0060             << "Wrong HeterogeneousHGCalPositionsMapping type";
0061     }
0062 
0063     //copying the pointers' content
0064     if (j >=
0065         this->number_position_arrays)  //required due to the assymetry between cpos::HeterogeneousHGCalPositionsMapping and cpos::HGCalPositionsMapping
0066     {
0067       for (unsigned int i = cumsum_sizes[j]; i < cumsum_sizes[j + 1]; ++i) {
0068         unsigned int index = i - cumsum_sizes[j];
0069         if (cpos::types[j] == cpos::HeterogeneousHGCalPositionsType::Float) {
0070           select_pointer_f_(&this->posmap_, j)[index] =
0071               select_pointer_f_(cpuPos, j - this->number_position_arrays)[index];
0072         } else if (cpos::types[j] == cpos::HeterogeneousHGCalPositionsType::Int32_t) {
0073           select_pointer_i_(&this->posmap_, j)[index] =
0074               select_pointer_i_(cpuPos, j - this->number_position_arrays)[index];
0075         } else if (cpos::types[j] == cpos::HeterogeneousHGCalPositionsType::Uint32_t) {
0076           select_pointer_u_(&this->posmap_, j)[index] =
0077               select_pointer_u_(cpuPos, j - this->number_position_arrays)[index];
0078         } else
0079           throw cms::Exception("HeterogeneousHGCalHEFCellPositionsConditions")
0080               << "Wrong HeterogeneousHGCalPositions type";
0081       }
0082     }
0083   }
0084 }
0085 
0086 void HeterogeneousHGCalHEFCellPositionsConditions::transfer_data_to_heterogeneous_vars_(
0087     const cpos::HGCalPositionsMapping* cpuPos) {
0088   this->posmap_.waferSize = cpuPos->waferSize;
0089   this->posmap_.sensorSeparation = cpuPos->sensorSeparation;
0090   this->posmap_.firstLayer = cpuPos->firstLayer;
0091   this->posmap_.lastLayer = cpuPos->lastLayer;
0092   this->posmap_.waferMin = cpuPos->waferMin;
0093   this->posmap_.waferMax = cpuPos->waferMax;
0094   this->nelems_posmap_ = cpuPos->detid.size();
0095 }
0096 
0097 std::vector<size_t> HeterogeneousHGCalHEFCellPositionsConditions::calculate_memory_bytes_(
0098     cpos::HGCalPositionsMapping* cpuPos) {
0099   size_t npointers = cpos::types.size();
0100   std::vector<size_t> sizes(npointers);
0101   for (unsigned int i = 0; i < npointers; ++i) {
0102     const unsigned detid_index = 4;
0103     const unsigned nlayers_index = 3;
0104     if (cpos::types[i] == cpos::HeterogeneousHGCalPositionsType::Float and (i == 0 or i == 1))
0105       sizes[i] = select_pointer_u_(cpuPos, detid_index)
0106                      .size();  //x and y position array will have the same size as the detid array
0107     else if (cpos::types[i] == cpos::HeterogeneousHGCalPositionsType::Float and i == 2)
0108       sizes[i] = select_pointer_i_(cpuPos, nlayers_index).size();  //z position's size is equal to the #layers
0109     else if (cpos::types[i] == cpos::HeterogeneousHGCalPositionsType::Float and i > 2)
0110       throw cms::Exception("HeterogeneousHGCalHEFCellPositionsConditions")
0111           << "Wrong HeterogeneousHGCalPositions type (Float)";
0112     else if (cpos::types[i] == cpos::HeterogeneousHGCalPositionsType::Int32_t)
0113       sizes[i] = select_pointer_i_(cpuPos, i - this->number_position_arrays).size();
0114     else if (cpos::types[i] == cpos::HeterogeneousHGCalPositionsType::Uint32_t)
0115       sizes[i] = select_pointer_u_(cpuPos, detid_index).size();
0116   }
0117 
0118   std::vector<size_t> sizes_units(npointers);
0119   for (unsigned int i = 0; i < npointers; ++i) {
0120     if (cpos::types[i] == cpos::HeterogeneousHGCalPositionsType::Float)
0121       sizes_units[i] = sizeof(float);
0122     else if (cpos::types[i] == cpos::HeterogeneousHGCalPositionsType::Int32_t)
0123       sizes_units[i] = sizeof(int32_t);
0124     else if (cpos::types[i] == cpos::HeterogeneousHGCalPositionsType::Uint32_t)
0125       sizes_units[i] = sizeof(uint32_t);
0126   }
0127 
0128   //element by element multiplication
0129   this->sizes_.resize(npointers);
0130   std::transform(sizes.begin(), sizes.end(), sizes_units.begin(), this->sizes_.begin(), std::multiplies<size_t>());
0131   return this->sizes_;
0132 }
0133 
0134 HeterogeneousHGCalHEFCellPositionsConditions::~HeterogeneousHGCalHEFCellPositionsConditions() {
0135   cudaCheck(cudaFreeHost(this->posmap_.x));
0136 }
0137 
0138 //I could use template specializations
0139 //try to use std::variant in the future to avoid similar functions with different return values
0140 float*& HeterogeneousHGCalHEFCellPositionsConditions::select_pointer_f_(
0141     cpos::HeterogeneousHGCalPositionsMapping* cpuObject, const unsigned int& item) const {
0142   switch (item) {
0143     case 0:
0144       return cpuObject->x;
0145     case 1:
0146       return cpuObject->y;
0147     case 2:
0148       return cpuObject->zLayer;
0149     default:
0150       throw cms::Exception("HeterogeneousHGCalHEFCellPositionsConditions")
0151           << "select_pointer_f(heterogeneous): no item (typed " << item << ").";
0152       return cpuObject->x;
0153   }
0154 }
0155 
0156 std::vector<float>& HeterogeneousHGCalHEFCellPositionsConditions::select_pointer_f_(
0157     cpos::HGCalPositionsMapping* cpuObject, const unsigned int& item) {
0158   switch (item) {
0159     case 0:
0160       return cpuObject->zLayer;
0161     default:
0162       throw cms::Exception("HeterogeneousHGCalHEFCellPositionsConditions")
0163           << "select_pointer_f(non-heterogeneous): no item (typed " << item << ").";
0164       return cpuObject->zLayer;
0165   }
0166 }
0167 
0168 int32_t*& HeterogeneousHGCalHEFCellPositionsConditions::select_pointer_i_(
0169     cpos::HeterogeneousHGCalPositionsMapping* cpuObject, const unsigned int& item) const {
0170   switch (item) {
0171     case 3:
0172       return cpuObject->nCellsLayer;
0173     case 4:
0174       return cpuObject->nCellsWaferUChunk;
0175     case 5:
0176       return cpuObject->nCellsHexagon;
0177     default:
0178       throw cms::Exception("HeterogeneousHGCalHEFCellPositionsConditions")
0179           << "select_pointer_i(heterogeneous): no item (typed " << item << ").";
0180       return cpuObject->nCellsHexagon;
0181   }
0182 }
0183 
0184 std::vector<int32_t>& HeterogeneousHGCalHEFCellPositionsConditions::select_pointer_i_(
0185     cpos::HGCalPositionsMapping* cpuObject, const unsigned int& item) {
0186   switch (item) {
0187     case 1:
0188       return cpuObject->nCellsLayer;
0189     case 2:
0190       return cpuObject->nCellsWaferUChunk;
0191     case 3:
0192       return cpuObject->nCellsHexagon;
0193     default:
0194       throw cms::Exception("HeterogeneousHGCalHEFCellPositionsConditions")
0195           << "select_pointer_i(non-heterogeneous): no item (typed " << item << ").";
0196       return cpuObject->nCellsHexagon;
0197   }
0198 }
0199 
0200 uint32_t*& HeterogeneousHGCalHEFCellPositionsConditions::select_pointer_u_(
0201     cpos::HeterogeneousHGCalPositionsMapping* cpuObject, const unsigned int& item) const {
0202   switch (item) {
0203     case 6:
0204       return cpuObject->detid;
0205     default:
0206       throw cms::Exception("HeterogeneousHGCalHEFCellPositionsConditions")
0207           << "select_pointer_u(heterogeneous): no item (typed " << item << ").";
0208       return cpuObject->detid;
0209   }
0210 }
0211 
0212 std::vector<uint32_t>& HeterogeneousHGCalHEFCellPositionsConditions::select_pointer_u_(
0213     cpos::HGCalPositionsMapping* cpuObject, const unsigned int& item) {
0214   switch (item) {
0215     case 4:
0216       return cpuObject->detid;
0217     default:
0218       throw cms::Exception("HeterogeneousHGCalHEFCellPositionsConditions")
0219           << "select_pointer_u(non-heterogeneous): no item (typed " << item << ").";
0220       return cpuObject->detid;
0221   }
0222 }
0223 
0224 hgcal_conditions::HeterogeneousHEFCellPositionsConditionsESProduct const*
0225 HeterogeneousHGCalHEFCellPositionsConditions::getHeterogeneousConditionsESProductAsync(cudaStream_t stream) const {
0226   // cms::cuda::ESProduct<T> essentially holds an array of GPUData objects,
0227   // one per device. If the data have already been transferred to the
0228   // current device (or the transfer has been queued), the helper just
0229   // returns a reference to that GPUData object. Otherwise, i.e. data are
0230   // not yet on the current device, the helper calls the lambda to do the
0231   // necessary memory allocations and to queue the transfers.
0232   auto const& data = gpuData_.dataForCurrentDeviceAsync(stream, [this](GPUData& data, cudaStream_t stream) {
0233     // Allocate the payload object on pinned host memory.
0234     cudaCheck(cudaMallocHost(&data.host, sizeof(hgcal_conditions::HeterogeneousHEFCellPositionsConditionsESProduct)));
0235     // Allocate the payload array(s) on device memory.
0236     cudaCheck(cudaMalloc(&(data.host->posmap.x), this->chunk_));
0237     // Complete the host-side information on the payload
0238     data.host->posmap.waferSize = this->posmap_.waferSize;
0239     data.host->posmap.sensorSeparation = this->posmap_.sensorSeparation;
0240     data.host->posmap.firstLayer = this->posmap_.firstLayer;
0241     data.host->posmap.lastLayer = this->posmap_.lastLayer;
0242     data.host->posmap.waferMax = this->posmap_.waferMax;
0243     data.host->posmap.waferMin = this->posmap_.waferMin;
0244     data.host->nelems_posmap = this->nelems_posmap_;
0245 
0246     //(set the pointers of the positions' mapping)
0247     size_t sfloat = sizeof(float);
0248     size_t sint32 = sizeof(int32_t);
0249     for (unsigned int j = 0; j < this->sizes_.size() - 1; ++j) {
0250       if (cpos::types[j] == cpos::HeterogeneousHGCalPositionsType::Float and
0251           cpos::types[j + 1] == cpos::HeterogeneousHGCalPositionsType::Float)
0252         select_pointer_f_(&(data.host->posmap), j + 1) =
0253             select_pointer_f_(&(data.host->posmap), j) + (this->sizes_[j] / sfloat);
0254       else if (cpos::types[j] == cpos::HeterogeneousHGCalPositionsType::Float and
0255                cpos::types[j + 1] == cpos::HeterogeneousHGCalPositionsType::Int32_t)
0256         select_pointer_i_(&(data.host->posmap), j + 1) =
0257             reinterpret_cast<int32_t*>(select_pointer_f_(&(data.host->posmap), j) + (this->sizes_[j] / sfloat));
0258       else if (cpos::types[j] == cpos::HeterogeneousHGCalPositionsType::Int32_t and
0259                cpos::types[j + 1] == cpos::HeterogeneousHGCalPositionsType::Int32_t)
0260         select_pointer_i_(&(data.host->posmap), j + 1) =
0261             select_pointer_i_(&(data.host->posmap), j) + (this->sizes_[j] / sint32);
0262       else if (cpos::types[j] == cpos::HeterogeneousHGCalPositionsType::Int32_t and
0263                cpos::types[j + 1] == cpos::HeterogeneousHGCalPositionsType::Uint32_t)
0264         select_pointer_u_(&(data.host->posmap), j + 1) =
0265             reinterpret_cast<uint32_t*>(select_pointer_i_(&(data.host->posmap), j) + (this->sizes_[j] / sint32));
0266     }
0267 
0268     // Allocate the payload object on the device memory.
0269     cudaCheck(cudaMalloc(&data.device, sizeof(hgcal_conditions::HeterogeneousHEFCellPositionsConditionsESProduct)));
0270 
0271     // Transfer the payload, first the array(s) ...
0272     //Important: The transfer does *not* start at posmap.x because the positions are not known in the CPU side!
0273     size_t non_position_memory_size_to_transfer =
0274         this->chunk_ - this->number_position_arrays * this->nelems_posmap_ *
0275                            sfloat;  //size in bytes occupied by the non-position information
0276     cudaCheck(cudaMemcpyAsync(data.host->posmap.zLayer,
0277                               this->posmap_.zLayer,
0278                               non_position_memory_size_to_transfer,
0279                               cudaMemcpyHostToDevice,
0280                               stream));
0281 
0282     // ... and then the payload object
0283     cudaCheck(cudaMemcpyAsync(data.device,
0284                               data.host,
0285                               sizeof(hgcal_conditions::HeterogeneousHEFCellPositionsConditionsESProduct),
0286                               cudaMemcpyHostToDevice,
0287                               stream));
0288 
0289     //Fill x and y positions in the GPU
0290     KernelManagerHGCalCellPositions km(this->nelems_posmap_);
0291     km.fill_positions(data.device);
0292   });  //gpuData_.dataForCurrentDeviceAsync
0293 
0294   // Returns the payload object on the memory of the current device
0295   return data.device;
0296 }
0297 
0298 // Destructor frees all member pointers
0299 HeterogeneousHGCalHEFCellPositionsConditions::GPUData::~GPUData() {
0300   if (host != nullptr) {
0301     cudaCheck(cudaFree(host->posmap.x));
0302     cudaCheck(cudaFreeHost(host));
0303   }
0304   cudaCheck(cudaFree(device));
0305 }