@@ -23,6 +23,35 @@ limitations under the License.
2323namespace tensorflow {
2424namespace {
2525
26+ TEST (Bfloat16Test, ZeroRepresentations) {
27+ ASSERT_EQ (bfloat16{0 .0f }, bfloat16{0 .0f });
28+ ASSERT_EQ (bfloat16{-0 .0f }, bfloat16{0 .0f });
29+ ASSERT_EQ (bfloat16{-0 .0f }, bfloat16{-0 .0f });
30+ ASSERT_EQ (bfloat16{0 .0f }.value , 0x0000 );
31+ ASSERT_EQ (bfloat16{-0 .0f }.value , 0x8000 );
32+ }
33+
34+ TEST (Bfloat16Test, FlushDenormalsToZero) {
35+ for (float denorm = -std::numeric_limits<float >::denorm_min ();
36+ denorm < std::numeric_limits<float >::denorm_min ();
37+ denorm = std::nextafterf (denorm, 1 .0f )) {
38+ bfloat16 bf_trunc = bfloat16::truncate_to_bfloat16 (denorm);
39+ ASSERT_EQ (float {bf_trunc}, 0 .0f );
40+ if (std::signbit (denorm)) {
41+ ASSERT_EQ (bf_trunc.value , 0x8000 ) << denorm;
42+ } else {
43+ ASSERT_EQ (bf_trunc.value , 0x0000 ) << denorm;
44+ }
45+ bfloat16 bf_round = bfloat16::round_to_bfloat16 (denorm);
46+ ASSERT_EQ (float {bf_round}, 0 .0f );
47+ if (std::signbit (denorm)) {
48+ ASSERT_EQ (bf_round.value , 0x8000 ) << denorm;
49+ } else {
50+ ASSERT_EQ (bf_round.value , 0x0000 ) << denorm;
51+ }
52+ }
53+ }
54+
2655TEST (Bfloat16Test, DefaultValueIsZero) {
2756 EXPECT_EQ (0 .0f , static_cast <float >(bfloat16 ()));
2857}
@@ -65,6 +94,7 @@ TEST_P(Bfloat16Test, TruncateTest) {
6594 EXPECT_TRUE (std::isnan (float (truncated)) || std::isinf (float (truncated)));
6695 return ;
6796 }
97+
6898 EXPECT_EQ (GetParam ().expected_truncation , float (truncated));
6999
70100 bfloat16 rounded = bfloat16::round_to_bfloat16 ((GetParam ().input ));
@@ -114,14 +144,16 @@ INSTANTIATE_TEST_SUITE_P(
114144 BinaryToFloat (0 , 0b10000000 , 0b1001000 , 0b1000000000000000 ),
115145 BinaryToFloat (0 , 0b10000000 , 0b1001000 , 0b0000000000000000 ),
116146 BinaryToFloat (0 , 0b10000000 , 0b1001000 , 0b0000000000000000 )},
147+ // The following two floats are denormals and will be flushed
148+ // to zero.
117149 Bfloat16TestParam{
118150 BinaryToFloat (0 , 0b00000000 , 0b1001000 , 0b1000000000000000 ),
119- BinaryToFloat (0 , 0b00000000 , 0b1001000 , 0b0000000000000000 ),
120- BinaryToFloat (0 , 0b00000000 , 0b1001000 , 0b0000000000000000 )},
151+ BinaryToFloat (0 , 0b00000000 , 0b0000000 , 0b0000000000000000 ),
152+ BinaryToFloat (0 , 0b00000000 , 0b0000000 , 0b0000000000000000 )},
121153 Bfloat16TestParam{
122154 BinaryToFloat (0 , 0b00000000 , 0b1111111 , 0b1100000000000000 ),
123- BinaryToFloat (0 , 0b00000000 , 0b1111111 , 0b0000000000000000 ),
124- BinaryToFloat (0 , 0b00000001 , 0b0000000 , 0b0000000000000000 )}));
155+ BinaryToFloat (0 , 0b00000000 , 0b0000000 , 0b0000000000000000 ),
156+ BinaryToFloat (0 , 0b00000000 , 0b0000000 , 0b0000000000000000 )}));
125157
126158TEST (Bfloat16Test, Conversion) {
127159 float a[100 ];
0 commit comments