File indexing completed on 2023-10-25 09:50:29
0001 #include "HeterogeneousCore/SonicTriton/interface/TritonData.h"
0002 #include "HeterogeneousCore/SonicTriton/interface/TritonClient.h"
0003 #include "HeterogeneousCore/SonicTriton/interface/TritonMemResource.h"
0004 #include "HeterogeneousCore/SonicTriton/interface/triton_utils.h"
0005 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0006
0007 #include <cstring>
0008 #include <fcntl.h>
0009 #include <sys/mman.h>
0010 #include <unistd.h>
0011
0012 namespace tc = triton::client;
0013
0014 template <typename IO>
0015 TritonMemResource<IO>::TritonMemResource(TritonData<IO>* data, const std::string& name, size_t size)
0016 : data_(data), name_(name), size_(size), addr_(nullptr), closed_(false) {}
0017
0018 template <typename IO>
0019 void TritonMemResource<IO>::set() {
0020 for (auto& entry : data_->entries_) {
0021 TRITON_THROW_IF_ERROR(entry.data_->SetSharedMemory(name_, entry.totalByteSize_, entry.offset_),
0022 "unable to set shared memory (" + name_ + ")");
0023 }
0024 }
0025
0026 template <typename IO>
0027 TritonHeapResource<IO>::TritonHeapResource(TritonData<IO>* data, const std::string& name, size_t size)
0028 : TritonMemResource<IO>(data, name, size) {}
0029
0030 template <>
0031 void TritonInputHeapResource::copyInput(const void* values, size_t offset, unsigned entry) {
0032 TRITON_THROW_IF_ERROR(data_->entries_[entry].data_->AppendRaw(reinterpret_cast<const uint8_t*>(values),
0033 data_->entries_[entry].byteSizePerBatch_),
0034 data_->name_ + " toServer(): unable to set data for batch entry " +
0035 (data_->entries_.size() > 1 ? std::to_string(entry)
0036 : data_->entries_[entry].byteSizePerBatch_
0037 ? std::to_string(offset / data_->entries_[entry].byteSizePerBatch_)
0038 : ""));
0039 }
0040
0041 template <>
0042 void TritonOutputHeapResource::copyOutput() {
0043 size_t contentByteSize = 0;
0044 for (auto& entry : data_->entries_) {
0045 size_t contentByteSizeEntry(0);
0046 if (entry.totalByteSize_ > 0)
0047 TRITON_THROW_IF_ERROR(entry.result_->RawData(data_->name_, &entry.output_, &contentByteSizeEntry),
0048 data_->name_ + " fromServer(): unable to get raw");
0049 contentByteSize += contentByteSizeEntry;
0050 }
0051 if (contentByteSize != data_->totalByteSize_) {
0052 throw cms::Exception("TritonDataError") << data_->name_ << " fromServer(): unexpected content byte size "
0053 << contentByteSize << " (expected " << data_->totalByteSize_ << ")";
0054 }
0055 }
0056
0057
0058
0059
0060
0061 template <typename IO>
0062 TritonCpuShmResource<IO>::TritonCpuShmResource(TritonData<IO>* data, const std::string& name, size_t size)
0063 : TritonMemResource<IO>(data, name, size), sizeOrig_(size) {
0064
0065 this->size_ = std::max<size_t>(this->size_, 1);
0066
0067
0068 int shm_fd = shm_open(this->name_.c_str(), O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
0069 if (shm_fd == -1)
0070 throw cms::Exception("TritonError") << "unable to get shared memory descriptor for key: " + this->name_;
0071
0072
0073 int res = ftruncate(shm_fd, this->size_);
0074 if (res == -1)
0075 throw cms::Exception("TritonError") << "unable to initialize shared memory key " + this->name_ +
0076 " to requested size: " + std::to_string(this->size_);
0077
0078
0079 constexpr size_t offset(0);
0080 this->addr_ = (uint8_t*)mmap(nullptr, this->size_, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, offset);
0081 if (this->addr_ == MAP_FAILED)
0082 throw cms::Exception("TritonError") << "unable to map to process address space for shared memory key: " +
0083 this->name_;
0084
0085
0086 if (::close(shm_fd) == -1)
0087 throw cms::Exception("TritonError") << "unable to close descriptor for shared memory key: " + this->name_;
0088
0089 TRITON_THROW_IF_ERROR(this->data_->client()->RegisterSystemSharedMemory(this->name_, this->name_, this->size_),
0090 "unable to register shared memory region: " + this->name_);
0091 }
0092
0093 template <typename IO>
0094 TritonCpuShmResource<IO>::~TritonCpuShmResource() {
0095 close();
0096 }
0097
0098 template <typename IO>
0099 void TritonCpuShmResource<IO>::close() {
0100 if (this->closed_)
0101 return;
0102
0103 TRITON_THROW_IF_ERROR(this->data_->client()->UnregisterSystemSharedMemory(this->name_),
0104 "unable to unregister shared memory region: " + this->name_);
0105
0106
0107 int tmp_fd = munmap(this->addr_, this->size_);
0108 if (tmp_fd == -1)
0109 throw cms::Exception("TritonError") << "unable to munmap for shared memory key: " << this->name_;
0110
0111
0112 int shm_fd = shm_unlink(this->name_.c_str());
0113 if (shm_fd == -1)
0114 throw cms::Exception("TritonError") << "unable to unlink for shared memory key: " << this->name_;
0115
0116 this->closed_ = true;
0117 }
0118
0119 template <>
0120 void TritonInputCpuShmResource::copyInput(const void* values, size_t offset, unsigned entry) {
0121 if (sizeOrig_ > 0)
0122 std::memcpy(addr_ + offset, values, data_->entries_[entry].byteSizePerBatch_);
0123 }
0124
0125 template <>
0126 void TritonOutputCpuShmResource::copyOutput() {
0127 for (auto& entry : data_->entries_) {
0128 entry.output_ = addr_ + entry.offset_;
0129 }
0130 }
0131
0132 template class TritonHeapResource<tc::InferInput>;
0133 template class TritonCpuShmResource<tc::InferInput>;
0134 template class TritonHeapResource<tc::InferRequestedOutput>;
0135 template class TritonCpuShmResource<tc::InferRequestedOutput>;
0136
0137 #ifdef TRITON_ENABLE_GPU
0138 template <typename IO>
0139 TritonGpuShmResource<IO>::TritonGpuShmResource(TritonData<IO>* data, const std::string& name, size_t size)
0140 : TritonMemResource<IO>(data, name, size), deviceId_(0), handle_(std::make_shared<cudaIpcMemHandle_t>()) {
0141
0142 cudaCheck(cudaSetDevice(deviceId_), "unable to set device ID to " + std::to_string(deviceId_));
0143 cudaCheck(cudaMalloc((void**)&this->addr_, this->size_), "unable to allocate GPU memory for key: " + this->name_);
0144 cudaCheck(cudaIpcGetMemHandle(handle_.get(), this->addr_), "unable to get IPC handle for key: " + this->name_);
0145 TRITON_THROW_IF_ERROR(this->data_->client()->RegisterCudaSharedMemory(this->name_, *handle_, deviceId_, this->size_),
0146 "unable to register CUDA shared memory region: " + this->name_);
0147 }
0148
0149 template <typename IO>
0150 TritonGpuShmResource<IO>::~TritonGpuShmResource() {
0151 close();
0152 }
0153
0154 template <typename IO>
0155 void TritonGpuShmResource<IO>::close() {
0156 if (this->closed_)
0157 return;
0158 TRITON_THROW_IF_ERROR(this->data_->client()->UnregisterCudaSharedMemory(this->name_),
0159 "unable to unregister CUDA shared memory region: " + this->name_);
0160 cudaCheck(cudaFree(this->addr_), "unable to free GPU memory for key: " + this->name_);
0161 this->closed_ = true;
0162 }
0163
0164 template <>
0165 void TritonInputGpuShmResource::copyInput(const void* values, size_t offset, unsigned entry) {
0166 cudaCheck(cudaMemcpy(addr_ + offset, values, data_->entries_[entry].byteSizePerBatch_, cudaMemcpyHostToDevice),
0167 data_->name_ + " toServer(): unable to memcpy " + std::to_string(data_->entries_[entry].byteSizePerBatch_) +
0168 " bytes to GPU");
0169 }
0170
0171 template <>
0172 void TritonOutputGpuShmResource::copyOutput() {
0173
0174 auto ptr = std::make_shared<std::vector<uint8_t>>(data_->totalByteSize_);
0175 cudaCheck(
0176 cudaMemcpy(ptr->data(), addr_, data_->totalByteSize_, cudaMemcpyDeviceToHost),
0177 data_->name_ + " fromServer(): unable to memcpy " + std::to_string(data_->totalByteSize_) + " bytes from GPU");
0178 data_->holder_ = ptr;
0179 for (auto& entry : data_->entries_) {
0180 entry.output_ = ptr->data() + entry.offset_;
0181 }
0182 }
0183
0184 template class TritonGpuShmResource<tc::InferInput>;
0185 template class TritonGpuShmResource<tc::InferRequestedOutput>;
0186 #endif