File indexing completed on 2024-10-30 00:11:28
0001 #ifndef libminifloat_h
0002 #define libminifloat_h
0003 #include "FWCore/Utilities/interface/thread_safety_macros.h"
0004 #include "FWCore/Utilities/interface/bit_cast.h"
0005 #include <cstdint>
0006 #include <cassert>
0007 #include <algorithm>
0008
0009
0010 class MiniFloatConverter {
0011 public:
0012 MiniFloatConverter();
0013 inline static float float16to32(uint16_t h) {
0014 uint32_t i32 = mantissatable[offsettable[h >> 10] + (h & 0x3ff)] + exponenttable[h >> 10];
0015 return edm::bit_cast<float>(i32);
0016 }
0017 inline static uint16_t float32to16(float x) { return float32to16round(x); }
0018
0019 inline static uint16_t float32to16crop(float x) {
0020 uint32_t i32 = edm::bit_cast<uint32_t>(x);
0021 return basetable[(i32 >> 23) & 0x1ff] + ((i32 & 0x007fffff) >> shifttable[(i32 >> 23) & 0x1ff]);
0022 }
0023
0024 inline static uint16_t float32to16round(float x) {
0025 uint32_t i32 = edm::bit_cast<uint32_t>(x);
0026 uint8_t shift = shifttable[(i32 >> 23) & 0x1ff];
0027 if (shift == 13) {
0028 uint16_t base2 = (i32 & 0x007fffff) >> 12;
0029 uint16_t base = base2 >> 1;
0030 if (((base2 & 1) != 0) && (base < 1023))
0031 base++;
0032 return basetable[(i32 >> 23) & 0x1ff] + base;
0033 } else {
0034 return basetable[(i32 >> 23) & 0x1ff] + ((i32 & 0x007fffff) >> shifttable[(i32 >> 23) & 0x1ff]);
0035 }
0036 }
0037 template <int bits>
0038 inline static float reduceMantissaToNbits(const float &f) {
0039 static_assert(bits <= 23, "max mantissa size is 23 bits");
0040 constexpr uint32_t mask = (0xFFFFFFFF >> (23 - bits)) << (23 - bits);
0041 uint32_t i32 = edm::bit_cast<uint32_t>(f);
0042 i32 &= mask;
0043 return edm::bit_cast<float>(i32);
0044 }
0045 inline static float reduceMantissaToNbits(const float &f, int bits) {
0046 uint32_t mask = (0xFFFFFFFF >> (23 - bits)) << (23 - bits);
0047 uint32_t i32 = edm::bit_cast<uint32_t>(f);
0048 i32 &= mask;
0049 return edm::bit_cast<float>(i32);
0050 }
0051
0052 class ReduceMantissaToNbitsRounding {
0053 public:
0054 #ifdef CMS_UNDEFINED_SANITIZER
0055
0056 __attribute__((no_sanitize("shift")))
0057 #endif
0058 ReduceMantissaToNbitsRounding(int bits)
0059 : shift(23 - bits), mask((0xFFFFFFFF >> (shift)) << (shift)), test(1 << (shift - 1)), maxn((1 << bits) - 2) {
0060 assert(bits <= 23);
0061 }
0062 float operator()(float f) const {
0063 constexpr uint32_t low23 = (0x007FFFFF);
0064 constexpr uint32_t hi9 = (0xFF800000);
0065 uint32_t i32 = edm::bit_cast<uint32_t>(f);
0066 if (i32 & test) {
0067 uint32_t mantissa = (i32 & low23) >> shift;
0068 if (mantissa < maxn)
0069 mantissa++;
0070 i32 = (i32 & hi9) | (mantissa << shift);
0071 } else {
0072 i32 &= mask;
0073 }
0074 return edm::bit_cast<float>(i32);
0075 }
0076
0077 private:
0078 const int shift;
0079 const uint32_t mask, test, maxn;
0080 };
0081
0082 template <int bits>
0083 inline static float reduceMantissaToNbitsRounding(const float &f) {
0084 static const ReduceMantissaToNbitsRounding reducer(bits);
0085 return reducer(f);
0086 }
0087
0088 inline static float reduceMantissaToNbitsRounding(float f, int bits) {
0089 return ReduceMantissaToNbitsRounding(bits)(f);
0090 }
0091
0092 template <typename InItr, typename OutItr>
0093 static void reduceMantissaToNbitsRounding(int bits, InItr begin, InItr end, OutItr out) {
0094 std::transform(begin, end, out, ReduceMantissaToNbitsRounding(bits));
0095 }
0096
0097 inline static float max() {
0098 constexpr uint32_t i32 = 0x477fe000;
0099 return edm::bit_cast<float>(i32);
0100 }
0101
0102
0103 inline static float max32RoundedToMax16() {
0104
0105
0106 constexpr uint32_t i32 = (0x8f << 23) - 1;
0107 return edm::bit_cast<float>(i32);
0108 }
0109
0110 inline static float min() {
0111 constexpr uint32_t i32 = 0x38800000;
0112 return edm::bit_cast<float>(i32);
0113 }
0114
0115
0116 inline static float min32RoundedToMin16() {
0117
0118
0119 constexpr uint32_t i32 = (0x71 << 23);
0120 return edm::bit_cast<float>(i32);
0121 }
0122
0123 inline static float denorm_min() {
0124 constexpr uint32_t i32 = 0x33800000;
0125 return edm::bit_cast<float>(i32);
0126 }
0127
0128 inline static bool isdenorm(uint16_t h) {
0129
0130 return ((h >> 10) & 0x1f) == 0 && (h & 0x3ff) != 0;
0131 }
0132
0133 private:
0134 CMS_THREAD_SAFE static uint32_t mantissatable[2048];
0135 CMS_THREAD_SAFE static uint32_t exponenttable[64];
0136 CMS_THREAD_SAFE static uint16_t offsettable[64];
0137 CMS_THREAD_SAFE static uint16_t basetable[512];
0138 CMS_THREAD_SAFE static uint8_t shifttable[512];
0139 static void filltables();
0140 };
0141 #endif