Skip to content

Commit ed6c309

Browse files
committed
[APFloat] Fix truncation of certain subnormal numbers
Certain subnormals would be incorrectly rounded away from zero. Fixes #55838 Differential Revision: https://reviews.llvm.org/D127140
1 parent d897a14 commit ed6c309

File tree

3 files changed

+53
-10
lines changed

3 files changed

+53
-10
lines changed

llvm/lib/Support/APFloat.cpp

+8-1
Original file line numberDiff line numberDiff line change
@@ -2213,15 +2213,22 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
22132213
// when truncating from PowerPC double-double to double format), the
22142214
// right shift could lose result mantissa bits. Adjust exponent instead
22152215
// of performing excessive shift.
2216+
// Also do a similar trick in case shifting denormal would produce zero
2217+
// significand as this case isn't handled correctly by normalize.
22162218
if (shift < 0 && isFiniteNonZero()) {
2217-
int exponentChange = significandMSB() + 1 - fromSemantics.precision;
2219+
int omsb = significandMSB() + 1;
2220+
int exponentChange = omsb - fromSemantics.precision;
22182221
if (exponent + exponentChange < toSemantics.minExponent)
22192222
exponentChange = toSemantics.minExponent - exponent;
22202223
if (exponentChange < shift)
22212224
exponentChange = shift;
22222225
if (exponentChange < 0) {
22232226
shift -= exponentChange;
22242227
exponent += exponentChange;
2228+
} else if (omsb <= -shift) {
2229+
exponentChange = omsb + shift - 1; // leave at least one bit set
2230+
shift -= exponentChange;
2231+
exponent += exponentChange;
22252232
}
22262233
}
22272234

llvm/test/Transforms/InstSimplify/ConstProp/cast.ll

+3-9
Original file line numberDiff line numberDiff line change
@@ -79,21 +79,17 @@ define float @trunc_denorm_lost_fraction0() {
7979
ret float %b
8080
}
8181

82-
; FIXME: This should be 0.0.
83-
8482
define float @trunc_denorm_lost_fraction1() {
8583
; CHECK-LABEL: @trunc_denorm_lost_fraction1(
86-
; CHECK-NEXT: ret float 0x36A0000000000000
84+
; CHECK-NEXT: ret float 0.000000e+00
8785
;
8886
%b = fptrunc double 0x0000000010000001 to float
8987
ret float %b
9088
}
9189

92-
; FIXME: This should be 0.0.
93-
9490
define float @trunc_denorm_lost_fraction2() {
9591
; CHECK-LABEL: @trunc_denorm_lost_fraction2(
96-
; CHECK-NEXT: ret float 0x36A0000000000000
92+
; CHECK-NEXT: ret float 0.000000e+00
9793
;
9894
%b = fptrunc double 0x000000001fffffff to float
9995
ret float %b
@@ -107,11 +103,9 @@ define float @trunc_denorm_lost_fraction3() {
107103
ret float %b
108104
}
109105

110-
; FIXME: This should be -0.0.
111-
112106
define float @trunc_denorm_lost_fraction4() {
113107
; CHECK-LABEL: @trunc_denorm_lost_fraction4(
114-
; CHECK-NEXT: ret float 0xB6A0000000000000
108+
; CHECK-NEXT: ret float -0.000000e+00
115109
;
116110
%b = fptrunc double 0x8000000010000001 to float
117111
ret float %b

llvm/unittests/ADT/APFloatTest.cpp

+42
Original file line numberDiff line numberDiff line change
@@ -1859,6 +1859,48 @@ TEST(APFloatTest, convert) {
18591859
EXPECT_EQ(0x7fc00000, test.bitcastToAPInt());
18601860
EXPECT_TRUE(losesInfo);
18611861
EXPECT_EQ(status, APFloat::opOK);
1862+
1863+
// Test that subnormals are handled correctly in double to float conversion
1864+
test = APFloat(APFloat::IEEEdouble(), "0x0.0000010000000p-1022");
1865+
test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
1866+
EXPECT_EQ(0.0f, test.convertToFloat());
1867+
EXPECT_TRUE(losesInfo);
1868+
1869+
test = APFloat(APFloat::IEEEdouble(), "0x0.0000010000001p-1022");
1870+
test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
1871+
EXPECT_EQ(0.0f, test.convertToFloat());
1872+
EXPECT_TRUE(losesInfo);
1873+
1874+
test = APFloat(APFloat::IEEEdouble(), "-0x0.0000010000001p-1022");
1875+
test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
1876+
EXPECT_EQ(0.0f, test.convertToFloat());
1877+
EXPECT_TRUE(losesInfo);
1878+
1879+
test = APFloat(APFloat::IEEEdouble(), "0x0.0000020000000p-1022");
1880+
test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
1881+
EXPECT_EQ(0.0f, test.convertToFloat());
1882+
EXPECT_TRUE(losesInfo);
1883+
1884+
test = APFloat(APFloat::IEEEdouble(), "0x0.0000020000001p-1022");
1885+
test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
1886+
EXPECT_EQ(0.0f, test.convertToFloat());
1887+
EXPECT_TRUE(losesInfo);
1888+
1889+
// Test subnormal conversion to bfloat
1890+
test = APFloat(APFloat::IEEEsingle(), "0x0.01p-126");
1891+
test.convert(APFloat::BFloat(), APFloat::rmNearestTiesToEven, &losesInfo);
1892+
EXPECT_EQ(0.0f, test.convertToFloat());
1893+
EXPECT_TRUE(losesInfo);
1894+
1895+
test = APFloat(APFloat::IEEEsingle(), "0x0.02p-126");
1896+
test.convert(APFloat::BFloat(), APFloat::rmNearestTiesToEven, &losesInfo);
1897+
EXPECT_EQ(0x01, test.bitcastToAPInt());
1898+
EXPECT_FALSE(losesInfo);
1899+
1900+
test = APFloat(APFloat::IEEEsingle(), "0x0.01p-126");
1901+
test.convert(APFloat::BFloat(), APFloat::rmNearestTiesToAway, &losesInfo);
1902+
EXPECT_EQ(0x01, test.bitcastToAPInt());
1903+
EXPECT_TRUE(losesInfo);
18621904
}
18631905

18641906
TEST(APFloatTest, PPCDoubleDouble) {

0 commit comments

Comments
 (0)