Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-09-07 04:36:40

0001 
0002 /**
0003  * Repack a set of read requests from the ROOT layer to be optimized for the
0004  * storage layer.
0005  * 
0006  * The basic technique employed is to coalesce nearby, but not adjacent reads
0007  * into one larger read in the request to the storage system.  We will be
0008  * purposely over-reading from storage.
0009  *
0010  * The read-coalescing is done because the vector reads are typically
0011  * unrolled server-side in a "dumb" fashion, with OS read-ahead disabled.
0012  * The coalescing actually decreases the number of requests sent to disk;
0013  * important, as ROOT I/O is typically latency bound.
0014  *
0015  * The complexity here is in the fact that we must have buffer space to hold
0016  * the extra bytes from the storage system, even through they're going to be
0017  * discarded.
0018  * 
0019  * The approach is to reuse the ROOT buffer as temporary holding space, plus
0020  * a small, fixed-size "spare buffer".  So, in the worst-case, we will use
0021  * about 256KB of extra buffer space.  The read-coalesce algorithm is greedy,
0022  * so we can't provide an a-priori estimate on how many extra I/O transactions
0023  * will be sent to the storage (compared to vector-reads with no coalescing).
0024  * Tests currently indicate that this approach usually causes zero to one
0025  * additional I/O transaction to occur.
0026  */
0027 
0028 #include <vector>
0029 
0030 #include "Utilities/StorageFactory/interface/IOPosBuffer.h"
0031 #include "FWCore/Utilities/interface/propagate_const.h"
0032 
0033 class ReadRepacker {
0034 public:
0035   using IOSize = edm::storage::IOSize;
0036   using IOPosBuffer = edm::storage::IOPosBuffer;
0037   // Returns the number of input buffers it was able to pack into the IO operation.
0038   int pack(long long int *pos,   // An array of file offsets to read.
0039            int *len,             // An array of lengths to read.
0040            int nbuf,             // Size of the pos and len array.
0041            char *buf,            // Temporary buffer to hold I/O result.
0042            IOSize buffer_size);  // Size of the temporary buffer.
0043 
0044   void unpack(
0045       char *buf);  // Buffer to unpack the I/O results into.  Not the temporayr buffer and result buffer may overlap.
0046 
0047   std::vector<IOPosBuffer> &iov() { return m_iov; }  // Returns the IO vector, optimized for storage.
0048 
0049   IOSize bufferUsed() const { return m_buffer_used; }  // Returns the total amount of space in the temp buffer used.
0050   IOSize extraBytes() const {
0051     return m_extra_bytes;
0052   }  // Returns the number of extra bytes to be issued to the I/O system
0053   // Note that (buffer_used - extra_bytes) should equal the number of "real" bytes serviced.
0054   IOSize realBytesProcessed() const {
0055     return m_buffer_used - m_extra_bytes;
0056   }  // Return the number of bytes of the input request that would be processed by the IO vector
0057 
0058   // Two reads distanced by less than READ_COALESCE_SIZE will turn into one
0059   // large read.
0060   static constexpr IOSize TEMPORARY_BUFFER_SIZE = 256 * 1024;
0061 
0062   // A read larger than BIG_READ_SIZE will not be coalesced.
0063   static constexpr IOSize READ_COALESCE_SIZE = 32 * 1024;
0064 
0065   // The size of the temporary holding buffer for read-coalescing.
0066   static constexpr IOSize BIG_READ_SIZE = 256 * 1024;
0067 
0068 private:
0069   int packInternal(long long int *pos,
0070                    int *len,
0071                    int nbuf,
0072                    char *buf,
0073                    IOSize buffer_size);  // Heart of the implementation of Pack; because we pack up to 2 buffers,
0074                                          // its easier to break the method into two.
0075 
0076   void reset(unsigned int nbuf);  // Reset all the internal counters and arrays.  Resize arrays to be about nbuf long.
0077 
0078   std::vector<int> m_idx_to_iopb;  // Mapping from idx in the input array to the iopb in the IO vector
0079   std::vector<int>
0080       m_idx_to_iopb_offset;        // Mapping from idx in the input array to the data offset in the results of the iopb.
0081   std::vector<IOPosBuffer> m_iov;  // Vector of IO for the storage system to perform.
0082   edm::propagate_const<int *> m_len;  // Pointed to the array of read sizes.
0083   IOSize m_buffer_used;               // Bytes in the temporary buffer used.
0084   IOSize m_extra_bytes;               // Number of bytes read from storage that will be discarded.
0085   std::vector<char> m_spare_buffer;  // The spare buffer; allocated if we cannot fit the I/O results into the ROOT buffer.
0086 };