File indexing completed on 2024-04-06 12:15:48
0001 #include "HeterogeneousCore/SonicTriton/interface/TritonData.h"
0002 #include "HeterogeneousCore/SonicTriton/interface/TritonClient.h"
0003 #include "HeterogeneousCore/SonicTriton/interface/TritonMemResource.h"
0004 #include "HeterogeneousCore/SonicTriton/interface/triton_utils.h"
0005 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
0006
0007 #include <cstring>
0008 #include <fcntl.h>
0009 #include <sys/mman.h>
0010 #include <unistd.h>
0011
0012 namespace tc = triton::client;
0013
0014 template <typename IO>
0015 TritonMemResource<IO>::TritonMemResource(TritonData<IO>* data, const std::string& name, size_t size)
0016 : data_(data), name_(name), size_(size), addr_(nullptr), closed_(false) {}
0017
0018 template <typename IO>
0019 void TritonMemResource<IO>::set() {
0020 for (auto& entry : data_->entries_) {
0021 TRITON_THROW_IF_ERROR(entry.data_->SetSharedMemory(name_, entry.totalByteSize_, entry.offset_),
0022 "unable to set shared memory (" + name_ + ")",
0023 true);
0024 }
0025 }
0026
0027 template <typename IO>
0028 void TritonMemResource<IO>::closeSafe() {
0029 CMS_SA_ALLOW try { close(); } catch (TritonException& e) {
0030 e.convertToWarning();
0031 } catch (cms::Exception& e) {
0032 triton_utils::convertToWarning(e);
0033 } catch (std::exception& e) {
0034 edm::LogWarning("UnknownFailure") << e.what();
0035 } catch (...) {
0036 edm::LogWarning("UnknownFailure") << "An unknown exception was thrown";
0037 }
0038 }
0039
0040 template <typename IO>
0041 TritonHeapResource<IO>::TritonHeapResource(TritonData<IO>* data, const std::string& name, size_t size)
0042 : TritonMemResource<IO>(data, name, size) {}
0043
0044 template <>
0045 void TritonInputHeapResource::copyInput(const void* values, size_t offset, unsigned entry) {
0046 TRITON_THROW_IF_ERROR(data_->entries_[entry].data_->AppendRaw(reinterpret_cast<const uint8_t*>(values),
0047 data_->entries_[entry].byteSizePerBatch_),
0048 data_->name_ + " toServer(): unable to set data for batch entry " +
0049 (data_->entries_.size() > 1 ? std::to_string(entry)
0050 : data_->entries_[entry].byteSizePerBatch_
0051 ? std::to_string(offset / data_->entries_[entry].byteSizePerBatch_)
0052 : ""),
0053 false);
0054 }
0055
0056 template <>
0057 void TritonOutputHeapResource::copyOutput() {
0058 size_t contentByteSize = 0;
0059 for (auto& entry : data_->entries_) {
0060 size_t contentByteSizeEntry(0);
0061 if (entry.totalByteSize_ > 0)
0062 TRITON_THROW_IF_ERROR(entry.result_->RawData(data_->name_, &entry.output_, &contentByteSizeEntry),
0063 data_->name_ + " fromServer(): unable to get raw",
0064 false);
0065 contentByteSize += contentByteSizeEntry;
0066 }
0067 if (contentByteSize != data_->totalByteSize_) {
0068 throw cms::Exception("TritonDataError") << data_->name_ << " fromServer(): unexpected content byte size "
0069 << contentByteSize << " (expected " << data_->totalByteSize_ << ")";
0070 }
0071 }
0072
0073
0074
0075
0076
0077 template <typename IO>
0078 TritonCpuShmResource<IO>::TritonCpuShmResource(TritonData<IO>* data, const std::string& name, size_t size)
0079 : TritonMemResource<IO>(data, name, size), sizeOrig_(size) {
0080
0081 this->size_ = std::max<size_t>(this->size_, 1);
0082
0083
0084 int shm_fd = shm_open(this->name_.c_str(), O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
0085 if (shm_fd == -1)
0086 throw cms::Exception("TritonError") << "unable to get shared memory descriptor for key: " + this->name_;
0087
0088
0089 int res = ftruncate(shm_fd, this->size_);
0090 if (res == -1)
0091 throw cms::Exception("TritonError") << "unable to initialize shared memory key " + this->name_ +
0092 " to requested size: " + std::to_string(this->size_);
0093
0094
0095 constexpr size_t offset(0);
0096 this->addr_ = (uint8_t*)mmap(nullptr, this->size_, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, offset);
0097 if (this->addr_ == MAP_FAILED)
0098 throw cms::Exception("TritonError") << "unable to map to process address space for shared memory key: " +
0099 this->name_;
0100
0101
0102 if (::close(shm_fd) == -1)
0103 throw cms::Exception("TritonError") << "unable to close descriptor for shared memory key: " + this->name_;
0104
0105 TRITON_THROW_IF_ERROR(this->data_->client()->RegisterSystemSharedMemory(this->name_, this->name_, this->size_),
0106 "unable to register shared memory region: " + this->name_,
0107 true);
0108 }
0109
0110 template <typename IO>
0111 TritonCpuShmResource<IO>::~TritonCpuShmResource() {
0112 this->closeSafe();
0113 }
0114
0115 template <typename IO>
0116 void TritonCpuShmResource<IO>::close() {
0117 if (this->closed_)
0118 return;
0119
0120 TRITON_THROW_IF_ERROR(this->data_->client()->UnregisterSystemSharedMemory(this->name_),
0121 "unable to unregister shared memory region: " + this->name_,
0122 true);
0123
0124
0125 int tmp_fd = munmap(this->addr_, this->size_);
0126 if (tmp_fd == -1)
0127 throw cms::Exception("TritonError") << "unable to munmap for shared memory key: " << this->name_;
0128
0129
0130 int shm_fd = shm_unlink(this->name_.c_str());
0131 if (shm_fd == -1)
0132 throw cms::Exception("TritonError") << "unable to unlink for shared memory key: " << this->name_;
0133
0134 this->closed_ = true;
0135 }
0136
0137 template <>
0138 void TritonInputCpuShmResource::copyInput(const void* values, size_t offset, unsigned entry) {
0139 if (sizeOrig_ > 0)
0140 std::memcpy(addr_ + offset, values, data_->entries_[entry].byteSizePerBatch_);
0141 }
0142
0143 template <>
0144 void TritonOutputCpuShmResource::copyOutput() {
0145 for (auto& entry : data_->entries_) {
0146 entry.output_ = addr_ + entry.offset_;
0147 }
0148 }
0149
0150 template class TritonHeapResource<tc::InferInput>;
0151 template class TritonCpuShmResource<tc::InferInput>;
0152 template class TritonHeapResource<tc::InferRequestedOutput>;
0153 template class TritonCpuShmResource<tc::InferRequestedOutput>;
0154
0155 #ifdef TRITON_ENABLE_GPU
0156 template <typename IO>
0157 TritonGpuShmResource<IO>::TritonGpuShmResource(TritonData<IO>* data, const std::string& name, size_t size)
0158 : TritonMemResource<IO>(data, name, size), deviceId_(0), handle_(std::make_shared<cudaIpcMemHandle_t>()) {
0159
0160 cudaCheck(cudaSetDevice(deviceId_), "unable to set device ID to " + std::to_string(deviceId_));
0161 cudaCheck(cudaMalloc((void**)&this->addr_, this->size_), "unable to allocate GPU memory for key: " + this->name_);
0162 cudaCheck(cudaIpcGetMemHandle(handle_.get(), this->addr_), "unable to get IPC handle for key: " + this->name_);
0163 TRITON_THROW_IF_ERROR(this->data_->client()->RegisterCudaSharedMemory(this->name_, *handle_, deviceId_, this->size_),
0164 "unable to register CUDA shared memory region: " + this->name_,
0165 true);
0166 }
0167
0168 template <typename IO>
0169 TritonGpuShmResource<IO>::~TritonGpuShmResource() {
0170 this->closeSafe();
0171 }
0172
0173 template <typename IO>
0174 void TritonGpuShmResource<IO>::close() {
0175 if (this->closed_)
0176 return;
0177 TRITON_THROW_IF_ERROR(this->data_->client()->UnregisterCudaSharedMemory(this->name_),
0178 "unable to unregister CUDA shared memory region: " + this->name_,
0179 true);
0180 cudaCheck(cudaFree(this->addr_), "unable to free GPU memory for key: " + this->name_);
0181 this->closed_ = true;
0182 }
0183
0184 template <>
0185 void TritonInputGpuShmResource::copyInput(const void* values, size_t offset, unsigned entry) {
0186 cudaCheck(cudaMemcpy(addr_ + offset, values, data_->entries_[entry].byteSizePerBatch_, cudaMemcpyHostToDevice),
0187 data_->name_ + " toServer(): unable to memcpy " + std::to_string(data_->entries_[entry].byteSizePerBatch_) +
0188 " bytes to GPU");
0189 }
0190
0191 template <>
0192 void TritonOutputGpuShmResource::copyOutput() {
0193
0194 auto ptr = std::make_shared<std::vector<uint8_t>>(data_->totalByteSize_);
0195 cudaCheck(
0196 cudaMemcpy(ptr->data(), addr_, data_->totalByteSize_, cudaMemcpyDeviceToHost),
0197 data_->name_ + " fromServer(): unable to memcpy " + std::to_string(data_->totalByteSize_) + " bytes from GPU");
0198 data_->holder_ = ptr;
0199 for (auto& entry : data_->entries_) {
0200 entry.output_ = ptr->data() + entry.offset_;
0201 }
0202 }
0203
0204 template class TritonGpuShmResource<tc::InferInput>;
0205 template class TritonGpuShmResource<tc::InferRequestedOutput>;
0206 #endif