Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2022-02-25 02:40:54

0001 #include "HeterogeneousCore/SonicTriton/interface/TritonData.h"
0002 #include "HeterogeneousCore/SonicTriton/interface/TritonClient.h"
0003 #include "HeterogeneousCore/SonicTriton/interface/TritonMemResource.h"
0004 #include "HeterogeneousCore/SonicTriton/interface/triton_utils.h"
0005 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0006 
0007 #include <cstring>
0008 #include <fcntl.h>
0009 #include <sys/mman.h>
0010 #include <unistd.h>
0011 
0012 namespace tc = triton::client;
0013 
0014 template <typename IO>
0015 TritonMemResource<IO>::TritonMemResource(TritonData<IO>* data, const std::string& name, size_t size)
0016     : data_(data), name_(name), size_(size), addr_(nullptr), closed_(false) {}
0017 
0018 template <typename IO>
0019 void TritonMemResource<IO>::set() {
0020   TRITON_THROW_IF_ERROR(data_->data_->SetSharedMemory(name_, data_->totalByteSize_, 0),
0021                         "unable to set shared memory (" + name_ + ")");
0022 }
0023 
0024 template <typename IO>
0025 TritonHeapResource<IO>::TritonHeapResource(TritonData<IO>* data, const std::string& name, size_t size)
0026     : TritonMemResource<IO>(data, name, size) {}
0027 
0028 template <>
0029 void TritonInputHeapResource::copyInput(const void* values, size_t offset) {
0030   TRITON_THROW_IF_ERROR(data_->data_->AppendRaw(reinterpret_cast<const uint8_t*>(values), data_->byteSizePerBatch_),
0031                         data_->name_ + " toServer(): unable to set data for batch entry " +
0032                             (data_->byteSizePerBatch_ ? std::to_string(offset / data_->byteSizePerBatch_) : ""));
0033 }
0034 
0035 template <>
0036 const uint8_t* TritonOutputHeapResource::copyOutput() {
0037   size_t contentByteSize;
0038   const uint8_t* values;
0039   TRITON_THROW_IF_ERROR(data_->result_->RawData(data_->name_, &values, &contentByteSize),
0040                         data_->name_ + " fromServer(): unable to get raw");
0041   if (contentByteSize != data_->totalByteSize_) {
0042     throw cms::Exception("TritonDataError") << data_->name_ << " fromServer(): unexpected content byte size "
0043                                             << contentByteSize << " (expected " << data_->totalByteSize_ << ")";
0044   }
0045   return values;
0046 }
0047 
0048 //shared memory helpers based on:
0049 // https://github.com/triton-inference-server/server/blob/v2.3.0/src/clients/c++/examples/shm_utils.cc (cpu)
0050 // https://github.com/triton-inference-server/server/blob/v2.3.0/src/clients/c++/examples/simple_grpc_cudashm_client.cc (gpu)
0051 
0052 template <typename IO>
0053 TritonCpuShmResource<IO>::TritonCpuShmResource(TritonData<IO>* data, const std::string& name, size_t size)
0054     : TritonMemResource<IO>(data, name, size) {
0055   //mmap of size zero is required to fail by POSIX, but still need to have some shared memory region available for Triton
0056   this->size_ = std::max<size_t>(this->size_, 1);
0057 
0058   //get shared memory region descriptor
0059   int shm_fd = shm_open(this->name_.c_str(), O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
0060   if (shm_fd == -1)
0061     throw cms::Exception("TritonError") << "unable to get shared memory descriptor for key: " + this->name_;
0062 
0063   //extend shared memory object
0064   int res = ftruncate(shm_fd, this->size_);
0065   if (res == -1)
0066     throw cms::Exception("TritonError") << "unable to initialize shared memory key " + this->name_ +
0067                                                " to requested size: " + std::to_string(this->size_);
0068 
0069   //map to process address space
0070   constexpr size_t offset(0);
0071   this->addr_ = (uint8_t*)mmap(nullptr, this->size_, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, offset);
0072   if (this->addr_ == MAP_FAILED)
0073     throw cms::Exception("TritonError") << "unable to map to process address space for shared memory key: " +
0074                                                this->name_;
0075 
0076   //close descriptor
0077   if (::close(shm_fd) == -1)
0078     throw cms::Exception("TritonError") << "unable to close descriptor for shared memory key: " + this->name_;
0079 
0080   TRITON_THROW_IF_ERROR(this->data_->client()->RegisterSystemSharedMemory(this->name_, this->name_, this->size_),
0081                         "unable to register shared memory region: " + this->name_);
0082 }
0083 
0084 template <typename IO>
0085 TritonCpuShmResource<IO>::~TritonCpuShmResource<IO>() {
0086   close();
0087 }
0088 
0089 template <typename IO>
0090 void TritonCpuShmResource<IO>::close() {
0091   if (this->closed_)
0092     return;
0093 
0094   TRITON_THROW_IF_ERROR(this->data_->client()->UnregisterSystemSharedMemory(this->name_),
0095                         "unable to unregister shared memory region: " + this->name_);
0096 
0097   //unmap
0098   int tmp_fd = munmap(this->addr_, this->size_);
0099   if (tmp_fd == -1)
0100     throw cms::Exception("TritonError") << "unable to munmap for shared memory key: " << this->name_;
0101 
0102   //unlink
0103   int shm_fd = shm_unlink(this->name_.c_str());
0104   if (shm_fd == -1)
0105     throw cms::Exception("TritonError") << "unable to unlink for shared memory key: " << this->name_;
0106 
0107   this->closed_ = true;
0108 }
0109 
0110 template <>
0111 void TritonInputCpuShmResource::copyInput(const void* values, size_t offset) {
0112   if (size_ > 0)
0113     std::memcpy(addr_ + offset, values, data_->byteSizePerBatch_);
0114 }
0115 
0116 template <>
0117 const uint8_t* TritonOutputCpuShmResource::copyOutput() {
0118   return addr_;
0119 }
0120 
0121 template class TritonHeapResource<tc::InferInput>;
0122 template class TritonCpuShmResource<tc::InferInput>;
0123 template class TritonHeapResource<tc::InferRequestedOutput>;
0124 template class TritonCpuShmResource<tc::InferRequestedOutput>;
0125 
0126 #ifdef TRITON_ENABLE_GPU
0127 template <typename IO>
0128 TritonGpuShmResource<IO>::TritonGpuShmResource(TritonData<IO>* data, const std::string& name, size_t size)
0129     : TritonMemResource<IO>(data, name, size), deviceId_(0), handle_(std::make_shared<cudaIpcMemHandle_t>()) {
0130   //todo: get server device id somehow?
0131   cudaCheck(cudaSetDevice(deviceId_), "unable to set device ID to " + std::to_string(deviceId_));
0132   cudaCheck(cudaMalloc((void**)&this->addr_, this->size_), "unable to allocate GPU memory for key: " + this->name_);
0133   cudaCheck(cudaIpcGetMemHandle(handle_.get(), this->addr_), "unable to get IPC handle for key: " + this->name_);
0134   TRITON_THROW_IF_ERROR(this->data_->client()->RegisterCudaSharedMemory(this->name_, *handle_, deviceId_, this->size_),
0135                         "unable to register CUDA shared memory region: " + this->name_);
0136 }
0137 
0138 template <typename IO>
0139 TritonGpuShmResource<IO>::~TritonGpuShmResource<IO>() {
0140   close();
0141 }
0142 
0143 template <typename IO>
0144 void TritonGpuShmResource<IO>::close() {
0145   if (this->closed_)
0146     return;
0147   TRITON_THROW_IF_ERROR(this->data_->client()->UnregisterCudaSharedMemory(this->name_),
0148                         "unable to unregister CUDA shared memory region: " + this->name_);
0149   cudaCheck(cudaFree(this->addr_), "unable to free GPU memory for key: " + this->name_);
0150   this->closed_ = true;
0151 }
0152 
0153 template <>
0154 void TritonInputGpuShmResource::copyInput(const void* values, size_t offset) {
0155   cudaCheck(
0156       cudaMemcpy(addr_ + offset, values, data_->byteSizePerBatch_, cudaMemcpyHostToDevice),
0157       data_->name_ + " toServer(): unable to memcpy " + std::to_string(data_->byteSizePerBatch_) + " bytes to GPU");
0158 }
0159 
0160 template <>
0161 const uint8_t* TritonOutputGpuShmResource::copyOutput() {
0162   //copy back from gpu, keep in scope
0163   auto ptr = std::make_shared<std::vector<uint8_t>>(data_->totalByteSize_);
0164   cudaCheck(
0165       cudaMemcpy(ptr->data(), addr_, data_->totalByteSize_, cudaMemcpyDeviceToHost),
0166       data_->name_ + " fromServer(): unable to memcpy " + std::to_string(data_->totalByteSize_) + " bytes from GPU");
0167   data_->holder_ = ptr;
0168   return ptr->data();
0169 }
0170 
0171 template class TritonGpuShmResource<tc::InferInput>;
0172 template class TritonGpuShmResource<tc::InferRequestedOutput>;
0173 #endif