Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2022-02-25 02:40:54

0001 #include "HeterogeneousCore/SonicTriton/interface/TritonData.h"
0002 #include "HeterogeneousCore/SonicTriton/interface/TritonClient.h"
0003 #include "HeterogeneousCore/SonicTriton/interface/TritonMemResource.h"
0004 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0005 
0006 #include "model_config.pb.h"
0007 #include "model_config.h"
0008 
0009 #include <sstream>
0010 
0011 namespace ni = nvidia::inferenceserver;
0012 namespace tc = triton::client;
0013 
0014 //dims: kept constant, represents config.pbtxt parameters of model (converted from google::protobuf::RepeatedField to vector)
0015 //fullShape: if batching is enabled, first entry is batch size; values can be modified
0016 //shape: view into fullShape, excluding batch size entry
0017 template <typename IO>
0018 TritonData<IO>::TritonData(const std::string& name,
0019                            const TritonData<IO>::TensorMetadata& model_info,
0020                            TritonClient* client,
0021                            const std::string& pid)
0022     : name_(name),
0023       client_(client),
0024       useShm_(client_->useSharedMemory()),
0025       //ensure unique name for shared memory region
0026       shmName_(useShm_ ? pid + "_" + xput() + std::to_string(uid()) : ""),
0027       dims_(model_info.shape().begin(), model_info.shape().end()),
0028       noBatch_(client_->noBatch()),
0029       batchSize_(0),
0030       fullShape_(dims_),
0031       shape_(fullShape_.begin() + (noBatch_ ? 0 : 1), fullShape_.end()),
0032       variableDims_(anyNeg(shape_)),
0033       productDims_(variableDims_ ? -1 : dimProduct(shape_)),
0034       dname_(model_info.datatype()),
0035       dtype_(ni::ProtocolStringToDataType(dname_)),
0036       byteSize_(ni::GetDataTypeByteSize(dtype_)),
0037       totalByteSize_(0) {
0038   //create input or output object
0039   IO* iotmp;
0040   createObject(&iotmp);
0041   data_.reset(iotmp);
0042 }
0043 
0044 template <>
0045 void TritonInputData::createObject(tc::InferInput** ioptr) {
0046   tc::InferInput::Create(ioptr, name_, fullShape_, dname_);
0047 }
0048 
0049 template <>
0050 void TritonOutputData::createObject(tc::InferRequestedOutput** ioptr) {
0051   tc::InferRequestedOutput::Create(ioptr, name_);
0052   //another specialization for output: can't use shared memory if output size is not known
0053   useShm_ &= !variableDims_;
0054 }
0055 
0056 template <>
0057 std::string TritonInputData::xput() const {
0058   return "input";
0059 }
0060 
0061 template <>
0062 std::string TritonOutputData::xput() const {
0063   return "output";
0064 }
0065 
0066 template <typename IO>
0067 tc::InferenceServerGrpcClient* TritonData<IO>::client() {
0068   return client_->client();
0069 }
0070 
0071 //setters
0072 template <typename IO>
0073 void TritonData<IO>::setShape(const TritonData<IO>::ShapeType& newShape) {
0074   for (unsigned i = 0; i < newShape.size(); ++i) {
0075     setShape(i, newShape[i]);
0076   }
0077 }
0078 
0079 template <typename IO>
0080 void TritonData<IO>::setShape(unsigned loc, int64_t val) {
0081   unsigned locFull = fullLoc(loc);
0082 
0083   //check boundary
0084   if (locFull >= fullShape_.size())
0085     throw cms::Exception("TritonDataError")
0086         << name_ << " setShape(): dimension " << locFull << " out of bounds (" << fullShape_.size() << ")";
0087 
0088   if (val != fullShape_[locFull]) {
0089     if (dims_[locFull] == -1)
0090       fullShape_[locFull] = val;
0091     else
0092       throw cms::Exception("TritonDataError")
0093           << name_ << " setShape(): attempt to change value of non-variable shape dimension " << loc;
0094   }
0095 }
0096 
0097 template <typename IO>
0098 void TritonData<IO>::setBatchSize(unsigned bsize) {
0099   batchSize_ = bsize;
0100   if (!noBatch_)
0101     fullShape_[0] = batchSize_;
0102 }
0103 
0104 template <typename IO>
0105 void TritonData<IO>::computeSizes() {
0106   sizeShape_ = sizeShape();
0107   byteSizePerBatch_ = byteSize_ * sizeShape_;
0108   totalByteSize_ = byteSizePerBatch_ * batchSize_;
0109 }
0110 template <typename IO>
0111 void TritonData<IO>::resetSizes() {
0112   sizeShape_ = 0;
0113   byteSizePerBatch_ = 0;
0114   totalByteSize_ = 0;
0115 }
0116 
0117 //create a memory resource if none exists;
0118 //otherwise, reuse the memory resource, resizing it if necessary
0119 template <typename IO>
0120 void TritonData<IO>::updateMem(size_t size) {
0121   if (!memResource_ or size > memResource_->size()) {
0122     if (useShm_ and client_->serverType() == TritonServerType::LocalCPU) {
0123       //avoid unnecessarily throwing in destructor
0124       if (memResource_)
0125         memResource_->close();
0126       //need to destroy before constructing new instance because shared memory key will be reused
0127       memResource_.reset();
0128       memResource_ = std::make_shared<TritonCpuShmResource<IO>>(this, shmName_, size);
0129     }
0130 #ifdef TRITON_ENABLE_GPU
0131     else if (useShm_ and client_->serverType() == TritonServerType::LocalGPU) {
0132       //avoid unnecessarily throwing in destructor
0133       if (memResource_)
0134         memResource_->close();
0135       //need to destroy before constructing new instance because shared memory key will be reused
0136       memResource_.reset();
0137       memResource_ = std::make_shared<TritonGpuShmResource<IO>>(this, shmName_, size);
0138     }
0139 #endif
0140     //for remote/heap, size increases don't matter
0141     else if (!memResource_)
0142       memResource_ = std::make_shared<TritonHeapResource<IO>>(this, shmName_, size);
0143   }
0144 }
0145 
0146 //io accessors
0147 template <>
0148 template <typename DT>
0149 TritonInputContainer<DT> TritonInputData::allocate(bool reserve) {
0150   //automatically creates a vector for each batch entry (if batch size known)
0151   auto ptr = std::make_shared<TritonInput<DT>>(batchSize_);
0152   if (reserve and !anyNeg(shape_)) {
0153     computeSizes();
0154     for (auto& vec : *ptr) {
0155       vec.reserve(sizeShape_);
0156     }
0157   }
0158   return ptr;
0159 }
0160 
0161 template <>
0162 template <typename DT>
0163 void TritonInputData::toServer(TritonInputContainer<DT> ptr) {
0164   //shouldn't be called twice
0165   if (done_)
0166     throw cms::Exception("TritonDataError") << name_ << " toServer() was already called for this event";
0167 
0168   const auto& data_in = *ptr;
0169 
0170   //check batch size
0171   if (data_in.size() != batchSize_) {
0172     throw cms::Exception("TritonDataError") << name_ << " toServer(): input vector has size " << data_in.size()
0173                                             << " but specified batch size is " << batchSize_;
0174   }
0175 
0176   //shape must be specified for variable dims or if batch size changes
0177   data_->SetShape(fullShape_);
0178 
0179   //check type
0180   checkType<DT>();
0181 
0182   computeSizes();
0183   updateMem(totalByteSize_);
0184   for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
0185     memResource_->copyInput(data_in[i0].data(), i0 * byteSizePerBatch_);
0186   }
0187   memResource_->set();
0188 
0189   //keep input data in scope
0190   holder_ = ptr;
0191   done_ = true;
0192 }
0193 
0194 //sets up shared memory for outputs, if possible
0195 template <>
0196 void TritonOutputData::prepare() {
0197   computeSizes();
0198   updateMem(totalByteSize_);
0199   memResource_->set();
0200 }
0201 
0202 template <>
0203 template <typename DT>
0204 TritonOutput<DT> TritonOutputData::fromServer() const {
0205   //shouldn't be called twice
0206   if (done_)
0207     throw cms::Exception("TritonDataError") << name_ << " fromServer() was already called for this event";
0208 
0209   if (!result_) {
0210     throw cms::Exception("TritonDataError") << name_ << " fromServer(): missing result";
0211   }
0212 
0213   //check type
0214   checkType<DT>();
0215 
0216   const uint8_t* r0 = memResource_->copyOutput();
0217   const DT* r1 = reinterpret_cast<const DT*>(r0);
0218 
0219   TritonOutput<DT> dataOut;
0220   dataOut.reserve(batchSize_);
0221   for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
0222     auto offset = i0 * sizeShape_;
0223     dataOut.emplace_back(r1 + offset, r1 + offset + sizeShape_);
0224   }
0225 
0226   done_ = true;
0227   return dataOut;
0228 }
0229 
0230 template <>
0231 void TritonInputData::reset() {
0232   done_ = false;
0233   holder_.reset();
0234   data_->Reset();
0235   //reset shape
0236   if (variableDims_) {
0237     for (unsigned i = 0; i < shape_.size(); ++i) {
0238       unsigned locFull = fullLoc(i);
0239       fullShape_[locFull] = dims_[locFull];
0240     }
0241   }
0242   resetSizes();
0243 }
0244 
0245 template <>
0246 void TritonOutputData::reset() {
0247   done_ = false;
0248   result_.reset();
0249   holder_.reset();
0250   resetSizes();
0251 }
0252 
0253 //explicit template instantiation declarations
0254 template class TritonData<tc::InferInput>;
0255 template class TritonData<tc::InferRequestedOutput>;
0256 
0257 template TritonInputContainer<char> TritonInputData::allocate(bool reserve);
0258 template TritonInputContainer<uint8_t> TritonInputData::allocate(bool reserve);
0259 template TritonInputContainer<uint16_t> TritonInputData::allocate(bool reserve);
0260 template TritonInputContainer<uint32_t> TritonInputData::allocate(bool reserve);
0261 template TritonInputContainer<uint64_t> TritonInputData::allocate(bool reserve);
0262 template TritonInputContainer<int8_t> TritonInputData::allocate(bool reserve);
0263 template TritonInputContainer<int16_t> TritonInputData::allocate(bool reserve);
0264 template TritonInputContainer<int32_t> TritonInputData::allocate(bool reserve);
0265 template TritonInputContainer<int64_t> TritonInputData::allocate(bool reserve);
0266 template TritonInputContainer<float> TritonInputData::allocate(bool reserve);
0267 template TritonInputContainer<double> TritonInputData::allocate(bool reserve);
0268 
0269 template void TritonInputData::toServer(TritonInputContainer<char> data_in);
0270 template void TritonInputData::toServer(TritonInputContainer<uint8_t> data_in);
0271 template void TritonInputData::toServer(TritonInputContainer<uint16_t> data_in);
0272 template void TritonInputData::toServer(TritonInputContainer<uint32_t> data_in);
0273 template void TritonInputData::toServer(TritonInputContainer<uint64_t> data_in);
0274 template void TritonInputData::toServer(TritonInputContainer<int8_t> data_in);
0275 template void TritonInputData::toServer(TritonInputContainer<int16_t> data_in);
0276 template void TritonInputData::toServer(TritonInputContainer<int32_t> data_in);
0277 template void TritonInputData::toServer(TritonInputContainer<int64_t> data_in);
0278 template void TritonInputData::toServer(TritonInputContainer<float> data_in);
0279 template void TritonInputData::toServer(TritonInputContainer<double> data_in);
0280 
0281 template TritonOutput<char> TritonOutputData::fromServer() const;
0282 template TritonOutput<uint8_t> TritonOutputData::fromServer() const;
0283 template TritonOutput<uint16_t> TritonOutputData::fromServer() const;
0284 template TritonOutput<uint32_t> TritonOutputData::fromServer() const;
0285 template TritonOutput<uint64_t> TritonOutputData::fromServer() const;
0286 template TritonOutput<int8_t> TritonOutputData::fromServer() const;
0287 template TritonOutput<int16_t> TritonOutputData::fromServer() const;
0288 template TritonOutput<int32_t> TritonOutputData::fromServer() const;
0289 template TritonOutput<int64_t> TritonOutputData::fromServer() const;
0290 template TritonOutput<float> TritonOutputData::fromServer() const;
0291 template TritonOutput<double> TritonOutputData::fromServer() const;