File indexing completed on 2023-03-17 10:50:39
0001 #include <cppunit/extensions/HelperMacros.h>
0002 #include <iostream>
0003
0004 #include "DataFormats/Math/interface/libminifloat.h"
0005 #include "FWCore/Utilities/interface/isFinite.h"
0006
0007 class testMiniFloat : public CppUnit::TestFixture {
0008 CPPUNIT_TEST_SUITE(testMiniFloat);
0009
0010 CPPUNIT_TEST(testIsDenorm);
0011 CPPUNIT_TEST(testMax);
0012 CPPUNIT_TEST(testMax32RoundedToMax16);
0013 CPPUNIT_TEST(testMin);
0014 CPPUNIT_TEST(testMin32RoundedToMin16);
0015 CPPUNIT_TEST(testDenormMin);
0016
0017 CPPUNIT_TEST_SUITE_END();
0018
0019 public:
0020 void setUp() {}
0021 void tearDown() {}
0022
0023 void testIsDenorm();
0024 void testMax();
0025 void testMax32RoundedToMax16();
0026 void testMin();
0027 void testMin32RoundedToMin16();
0028 void testDenormMin();
0029
0030 private:
0031 };
0032
0033 CPPUNIT_TEST_SUITE_REGISTRATION(testMiniFloat);
0034
0035 void testMiniFloat::testIsDenorm() {
0036
0037 CPPUNIT_ASSERT(MiniFloatConverter::isdenorm(1));
0038 CPPUNIT_ASSERT(MiniFloatConverter::isdenorm(1 | (1 << 15)));
0039 CPPUNIT_ASSERT(MiniFloatConverter::isdenorm(0x3ff));
0040 CPPUNIT_ASSERT(MiniFloatConverter::isdenorm(0x3ff) | (1 << 15));
0041
0042
0043 CPPUNIT_ASSERT(!MiniFloatConverter::isdenorm(0));
0044 CPPUNIT_ASSERT(!MiniFloatConverter::isdenorm(0x400));
0045 CPPUNIT_ASSERT(!MiniFloatConverter::isdenorm(0x400 | (1 << 15)));
0046 }
0047
0048 void testMiniFloat::testMax() {
0049
0050
0051 const uint16_t minifloatmax = (0x1e << 10) | 0x3ff;
0052 CPPUNIT_ASSERT(MiniFloatConverter::max() == MiniFloatConverter::float16to32(minifloatmax));
0053
0054
0055 const uint16_t minifloatinf = minifloatmax + 1;
0056 CPPUNIT_ASSERT(edm::isNotFinite(MiniFloatConverter::float16to32(minifloatinf)));
0057 }
0058
0059 void testMiniFloat::testMax32RoundedToMax16() {
0060
0061 CPPUNIT_ASSERT(MiniFloatConverter::float16to32(MiniFloatConverter::float32to16(
0062 MiniFloatConverter::max32RoundedToMax16())) == MiniFloatConverter::max());
0063
0064
0065 union {
0066 float flt;
0067 uint32_t i32;
0068 } conv;
0069 conv.flt = MiniFloatConverter::max32RoundedToMax16();
0070 conv.i32 += 1;
0071 const float max32PlusUlp32RoundedTo16 = MiniFloatConverter::float16to32(MiniFloatConverter::float32to16(conv.flt));
0072 CPPUNIT_ASSERT(edm::isNotFinite(max32PlusUlp32RoundedTo16));
0073 }
0074
0075 void testMiniFloat::testMin() {
0076
0077 CPPUNIT_ASSERT(MiniFloatConverter::min() == MiniFloatConverter::float16to32(1 << 10));
0078
0079
0080 const uint16_t minifloat_denorm = MiniFloatConverter::float32to16(MiniFloatConverter::min()) - 1;
0081 CPPUNIT_ASSERT(MiniFloatConverter::isdenorm(minifloat_denorm));
0082
0083
0084 union {
0085 float flt;
0086 uint32_t i32;
0087 } conv;
0088 conv.flt = MiniFloatConverter::min();
0089 conv.i32 -= 1;
0090 const uint16_t min32MinusUlp32CroppedTo16 = MiniFloatConverter::float32to16crop(conv.flt);
0091 CPPUNIT_ASSERT(MiniFloatConverter::isdenorm(min32MinusUlp32CroppedTo16));
0092 const uint16_t min32MinusUlp32RoundedTo16 = MiniFloatConverter::float32to16round(conv.flt);
0093 CPPUNIT_ASSERT(MiniFloatConverter::isdenorm(min32MinusUlp32RoundedTo16));
0094 }
0095
0096 void testMiniFloat::testMin32RoundedToMin16() {
0097
0098 CPPUNIT_ASSERT(MiniFloatConverter::float16to32(MiniFloatConverter::float32to16(
0099 MiniFloatConverter::min32RoundedToMin16())) == MiniFloatConverter::min());
0100
0101
0102 union {
0103 float flt;
0104 uint32_t i32;
0105 } conv;
0106 conv.flt = MiniFloatConverter::min32RoundedToMin16();
0107 conv.i32 -= 1;
0108 const uint16_t min32MinusUlp32RoundedTo16 = MiniFloatConverter::float32to16(conv.flt);
0109 CPPUNIT_ASSERT(MiniFloatConverter::isdenorm(min32MinusUlp32RoundedTo16));
0110 }
0111
0112 void testMiniFloat::testDenormMin() {
0113
0114 CPPUNIT_ASSERT(MiniFloatConverter::denorm_min() == MiniFloatConverter::float16to32(1));
0115
0116
0117 CPPUNIT_ASSERT(
0118 MiniFloatConverter::float16to32(MiniFloatConverter::float32to16(MiniFloatConverter::denorm_min()) - 1) == 0.f);
0119
0120
0121 union {
0122 float flt;
0123 uint32_t i32;
0124 } conv;
0125 conv.flt = MiniFloatConverter::denorm_min();
0126 conv.i32 -= 1;
0127 const float min32MinusUlp32RoundedTo16 =
0128 MiniFloatConverter::float16to32(MiniFloatConverter::float32to16round(conv.flt));
0129 CPPUNIT_ASSERT(min32MinusUlp32RoundedTo16 == 0.f);
0130 const float min32MinusUlp32CroppedTo16 =
0131 MiniFloatConverter::float16to32(MiniFloatConverter::float32to16crop(conv.flt));
0132 CPPUNIT_ASSERT(min32MinusUlp32CroppedTo16 == 0.f);
0133 }