Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-10-25 09:50:29

0001 #include "HeterogeneousCore/SonicTriton/interface/TritonData.h"
0002 #include "HeterogeneousCore/SonicTriton/interface/TritonClient.h"
0003 #include "HeterogeneousCore/SonicTriton/interface/TritonMemResource.h"
0004 #include "HeterogeneousCore/SonicTriton/interface/triton_utils.h"
0005 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0006 
0007 #include <cstring>
0008 #include <fcntl.h>
0009 #include <sys/mman.h>
0010 #include <unistd.h>
0011 
0012 namespace tc = triton::client;
0013 
0014 template <typename IO>
0015 TritonMemResource<IO>::TritonMemResource(TritonData<IO>* data, const std::string& name, size_t size)
0016     : data_(data), name_(name), size_(size), addr_(nullptr), closed_(false) {}
0017 
0018 template <typename IO>
0019 void TritonMemResource<IO>::set() {
0020   for (auto& entry : data_->entries_) {
0021     TRITON_THROW_IF_ERROR(entry.data_->SetSharedMemory(name_, entry.totalByteSize_, entry.offset_),
0022                           "unable to set shared memory (" + name_ + ")");
0023   }
0024 }
0025 
0026 template <typename IO>
0027 TritonHeapResource<IO>::TritonHeapResource(TritonData<IO>* data, const std::string& name, size_t size)
0028     : TritonMemResource<IO>(data, name, size) {}
0029 
0030 template <>
0031 void TritonInputHeapResource::copyInput(const void* values, size_t offset, unsigned entry) {
0032   TRITON_THROW_IF_ERROR(data_->entries_[entry].data_->AppendRaw(reinterpret_cast<const uint8_t*>(values),
0033                                                                 data_->entries_[entry].byteSizePerBatch_),
0034                         data_->name_ + " toServer(): unable to set data for batch entry " +
0035                             (data_->entries_.size() > 1 ? std::to_string(entry)
0036                              : data_->entries_[entry].byteSizePerBatch_
0037                                  ? std::to_string(offset / data_->entries_[entry].byteSizePerBatch_)
0038                                  : ""));
0039 }
0040 
0041 template <>
0042 void TritonOutputHeapResource::copyOutput() {
0043   size_t contentByteSize = 0;
0044   for (auto& entry : data_->entries_) {
0045     size_t contentByteSizeEntry(0);
0046     if (entry.totalByteSize_ > 0)
0047       TRITON_THROW_IF_ERROR(entry.result_->RawData(data_->name_, &entry.output_, &contentByteSizeEntry),
0048                             data_->name_ + " fromServer(): unable to get raw");
0049     contentByteSize += contentByteSizeEntry;
0050   }
0051   if (contentByteSize != data_->totalByteSize_) {
0052     throw cms::Exception("TritonDataError") << data_->name_ << " fromServer(): unexpected content byte size "
0053                                             << contentByteSize << " (expected " << data_->totalByteSize_ << ")";
0054   }
0055 }
0056 
0057 //shared memory helpers based on:
0058 // https://github.com/triton-inference-server/server/blob/v2.3.0/src/clients/c++/examples/shm_utils.cc (cpu)
0059 // https://github.com/triton-inference-server/server/blob/v2.3.0/src/clients/c++/examples/simple_grpc_cudashm_client.cc (gpu)
0060 
0061 template <typename IO>
0062 TritonCpuShmResource<IO>::TritonCpuShmResource(TritonData<IO>* data, const std::string& name, size_t size)
0063     : TritonMemResource<IO>(data, name, size), sizeOrig_(size) {
0064   //mmap of size zero is required to fail by POSIX, but still need to have some shared memory region available for Triton
0065   this->size_ = std::max<size_t>(this->size_, 1);
0066 
0067   //get shared memory region descriptor
0068   int shm_fd = shm_open(this->name_.c_str(), O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
0069   if (shm_fd == -1)
0070     throw cms::Exception("TritonError") << "unable to get shared memory descriptor for key: " + this->name_;
0071 
0072   //extend shared memory object
0073   int res = ftruncate(shm_fd, this->size_);
0074   if (res == -1)
0075     throw cms::Exception("TritonError") << "unable to initialize shared memory key " + this->name_ +
0076                                                " to requested size: " + std::to_string(this->size_);
0077 
0078   //map to process address space
0079   constexpr size_t offset(0);
0080   this->addr_ = (uint8_t*)mmap(nullptr, this->size_, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, offset);
0081   if (this->addr_ == MAP_FAILED)
0082     throw cms::Exception("TritonError") << "unable to map to process address space for shared memory key: " +
0083                                                this->name_;
0084 
0085   //close descriptor
0086   if (::close(shm_fd) == -1)
0087     throw cms::Exception("TritonError") << "unable to close descriptor for shared memory key: " + this->name_;
0088 
0089   TRITON_THROW_IF_ERROR(this->data_->client()->RegisterSystemSharedMemory(this->name_, this->name_, this->size_),
0090                         "unable to register shared memory region: " + this->name_);
0091 }
0092 
0093 template <typename IO>
0094 TritonCpuShmResource<IO>::~TritonCpuShmResource() {
0095   close();
0096 }
0097 
0098 template <typename IO>
0099 void TritonCpuShmResource<IO>::close() {
0100   if (this->closed_)
0101     return;
0102 
0103   TRITON_THROW_IF_ERROR(this->data_->client()->UnregisterSystemSharedMemory(this->name_),
0104                         "unable to unregister shared memory region: " + this->name_);
0105 
0106   //unmap
0107   int tmp_fd = munmap(this->addr_, this->size_);
0108   if (tmp_fd == -1)
0109     throw cms::Exception("TritonError") << "unable to munmap for shared memory key: " << this->name_;
0110 
0111   //unlink
0112   int shm_fd = shm_unlink(this->name_.c_str());
0113   if (shm_fd == -1)
0114     throw cms::Exception("TritonError") << "unable to unlink for shared memory key: " << this->name_;
0115 
0116   this->closed_ = true;
0117 }
0118 
0119 template <>
0120 void TritonInputCpuShmResource::copyInput(const void* values, size_t offset, unsigned entry) {
0121   if (sizeOrig_ > 0)
0122     std::memcpy(addr_ + offset, values, data_->entries_[entry].byteSizePerBatch_);
0123 }
0124 
0125 template <>
0126 void TritonOutputCpuShmResource::copyOutput() {
0127   for (auto& entry : data_->entries_) {
0128     entry.output_ = addr_ + entry.offset_;
0129   }
0130 }
0131 
0132 template class TritonHeapResource<tc::InferInput>;
0133 template class TritonCpuShmResource<tc::InferInput>;
0134 template class TritonHeapResource<tc::InferRequestedOutput>;
0135 template class TritonCpuShmResource<tc::InferRequestedOutput>;
0136 
0137 #ifdef TRITON_ENABLE_GPU
0138 template <typename IO>
0139 TritonGpuShmResource<IO>::TritonGpuShmResource(TritonData<IO>* data, const std::string& name, size_t size)
0140     : TritonMemResource<IO>(data, name, size), deviceId_(0), handle_(std::make_shared<cudaIpcMemHandle_t>()) {
0141   //todo: get server device id somehow?
0142   cudaCheck(cudaSetDevice(deviceId_), "unable to set device ID to " + std::to_string(deviceId_));
0143   cudaCheck(cudaMalloc((void**)&this->addr_, this->size_), "unable to allocate GPU memory for key: " + this->name_);
0144   cudaCheck(cudaIpcGetMemHandle(handle_.get(), this->addr_), "unable to get IPC handle for key: " + this->name_);
0145   TRITON_THROW_IF_ERROR(this->data_->client()->RegisterCudaSharedMemory(this->name_, *handle_, deviceId_, this->size_),
0146                         "unable to register CUDA shared memory region: " + this->name_);
0147 }
0148 
0149 template <typename IO>
0150 TritonGpuShmResource<IO>::~TritonGpuShmResource() {
0151   close();
0152 }
0153 
0154 template <typename IO>
0155 void TritonGpuShmResource<IO>::close() {
0156   if (this->closed_)
0157     return;
0158   TRITON_THROW_IF_ERROR(this->data_->client()->UnregisterCudaSharedMemory(this->name_),
0159                         "unable to unregister CUDA shared memory region: " + this->name_);
0160   cudaCheck(cudaFree(this->addr_), "unable to free GPU memory for key: " + this->name_);
0161   this->closed_ = true;
0162 }
0163 
0164 template <>
0165 void TritonInputGpuShmResource::copyInput(const void* values, size_t offset, unsigned entry) {
0166   cudaCheck(cudaMemcpy(addr_ + offset, values, data_->entries_[entry].byteSizePerBatch_, cudaMemcpyHostToDevice),
0167             data_->name_ + " toServer(): unable to memcpy " + std::to_string(data_->entries_[entry].byteSizePerBatch_) +
0168                 " bytes to GPU");
0169 }
0170 
0171 template <>
0172 void TritonOutputGpuShmResource::copyOutput() {
0173   //copy back from gpu, keep in scope
0174   auto ptr = std::make_shared<std::vector<uint8_t>>(data_->totalByteSize_);
0175   cudaCheck(
0176       cudaMemcpy(ptr->data(), addr_, data_->totalByteSize_, cudaMemcpyDeviceToHost),
0177       data_->name_ + " fromServer(): unable to memcpy " + std::to_string(data_->totalByteSize_) + " bytes from GPU");
0178   data_->holder_ = ptr;
0179   for (auto& entry : data_->entries_) {
0180     entry.output_ = ptr->data() + entry.offset_;
0181   }
0182 }
0183 
0184 template class TritonGpuShmResource<tc::InferInput>;
0185 template class TritonGpuShmResource<tc::InferRequestedOutput>;
0186 #endif