FLANG
binary-floating-point.h
1//===-- include/flang/Decimal/binary-floating-point.h -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef FORTRAN_DECIMAL_BINARY_FLOATING_POINT_H_
10#define FORTRAN_DECIMAL_BINARY_FLOATING_POINT_H_
11
12// Access and manipulate the fields of an IEEE-754 binary
13// floating-point value via a generalized template.
14
15#include "flang/Common/api-attrs.h"
16#include "flang/Common/real.h"
17#include "flang/Common/uint128.h"
18#include "flang/Runtime/freestanding-tools.h"
19#include <cinttypes>
20#include <climits>
21#include <cstring>
22#include <type_traits>
23
24namespace Fortran::decimal {
25
26enum FortranRounding {
27 RoundNearest, /* RN and RP */
28 RoundUp, /* RU */
29 RoundDown, /* RD */
30 RoundToZero, /* RZ - no rounding */
31 RoundCompatible, /* RC: like RN, but ties go away from 0 */
32};
33
34template <int BINARY_PRECISION> class BinaryFloatingPointNumber {
35public:
36 RT_OFFLOAD_VAR_GROUP_BEGIN
37 static constexpr common::RealCharacteristics realChars{BINARY_PRECISION};
38 static constexpr int binaryPrecision{BINARY_PRECISION};
39 static constexpr int bits{realChars.bits};
40 static constexpr int isImplicitMSB{realChars.isImplicitMSB};
41 static constexpr int significandBits{realChars.significandBits};
42 static constexpr int exponentBits{realChars.exponentBits};
43 static constexpr int exponentBias{realChars.exponentBias};
44 static constexpr int maxExponent{realChars.maxExponent};
45 static constexpr int decimalPrecision{realChars.decimalPrecision};
46 static constexpr int decimalRange{realChars.decimalRange};
47 static constexpr int maxDecimalConversionDigits{
48 realChars.maxDecimalConversionDigits};
49
50 using RawType = common::HostUnsignedIntType<bits>;
51 static_assert(CHAR_BIT * sizeof(RawType) >= bits);
52 static constexpr RawType significandMask{(RawType{1} << significandBits) - 1};
53
54 constexpr RT_API_ATTRS BinaryFloatingPointNumber() {} // zero
55 RT_OFFLOAD_VAR_GROUP_END
56 constexpr BinaryFloatingPointNumber(
57 const BinaryFloatingPointNumber &that) = default;
58 constexpr BinaryFloatingPointNumber(
59 BinaryFloatingPointNumber &&that) = default;
60 constexpr BinaryFloatingPointNumber &operator=(
61 const BinaryFloatingPointNumber &that) = default;
62 constexpr BinaryFloatingPointNumber &operator=(
63 BinaryFloatingPointNumber &&that) = default;
64 constexpr explicit RT_API_ATTRS BinaryFloatingPointNumber(RawType raw)
65 : raw_{raw} {}
66
67 RT_API_ATTRS RawType raw() const { return raw_; }
68
69 template <typename A>
70 explicit constexpr RT_API_ATTRS BinaryFloatingPointNumber(A x) {
71 static_assert(sizeof raw_ <= sizeof x);
72 runtime::memcpy(reinterpret_cast<void *>(&raw_),
73 reinterpret_cast<const void *>(&x), sizeof raw_);
74 }
75
76 constexpr RT_API_ATTRS int BiasedExponent() const {
77 return static_cast<int>(
78 (raw_ >> significandBits) & ((1 << exponentBits) - 1));
79 }
80 constexpr RT_API_ATTRS int UnbiasedExponent() const {
81 int biased{BiasedExponent()};
82 return biased - exponentBias + (biased == 0);
83 }
84 constexpr RT_API_ATTRS RawType Significand() const {
85 return raw_ & significandMask;
86 }
87 constexpr RT_API_ATTRS RawType Fraction() const {
88 RawType sig{Significand()};
89 if (isImplicitMSB && BiasedExponent() > 0) {
90 sig |= RawType{1} << significandBits;
91 }
92 return sig;
93 }
94
95 constexpr RT_API_ATTRS bool IsZero() const {
96 return (raw_ & ((RawType{1} << (bits - 1)) - 1)) == 0;
97 }
98 constexpr RT_API_ATTRS bool IsNaN() const {
99 auto expo{BiasedExponent()};
100 auto sig{Significand()};
101 if constexpr (bits == 80) { // x87
102 if (expo == maxExponent) {
103 return sig != (significandMask >> 1) + 1;
104 } else {
105 return expo != 0 && !(sig & (RawType{1} << (significandBits - 1)));
106 ;
107 }
108 } else {
109 return expo == maxExponent && sig != 0;
110 }
111 }
112 constexpr RT_API_ATTRS bool IsInfinite() const {
113 if constexpr (bits == 80) { // x87
114 return BiasedExponent() == maxExponent &&
115 Significand() == ((significandMask >> 1) + 1);
116 } else {
117 return BiasedExponent() == maxExponent && Significand() == 0;
118 }
119 }
120 constexpr RT_API_ATTRS bool IsMaximalFiniteMagnitude() const {
121 return BiasedExponent() == maxExponent - 1 &&
122 Significand() == significandMask;
123 }
124 constexpr RT_API_ATTRS bool IsNegative() const {
125 return ((raw_ >> (bits - 1)) & 1) != 0;
126 }
127
128 constexpr RT_API_ATTRS void Negate() { raw_ ^= RawType{1} << (bits - 1); }
129
130 // For calculating the nearest neighbors of a floating-point value
131 constexpr RT_API_ATTRS void Previous() {
132 RemoveExplicitMSB();
133 --raw_;
134 InsertExplicitMSB();
135 }
136 constexpr RT_API_ATTRS void Next() {
137 RemoveExplicitMSB();
138 ++raw_;
139 InsertExplicitMSB();
140 }
141
142 static constexpr RT_API_ATTRS BinaryFloatingPointNumber Infinity(
143 bool isNegative) {
144 RawType result{RawType{maxExponent} << significandBits};
145 if (isNegative) {
146 result |= RawType{1} << (bits - 1);
147 }
148 return BinaryFloatingPointNumber{result};
149 }
150
151 // Returns true when the result is exact
152 constexpr RT_API_ATTRS bool RoundToBits(
153 int keepBits, enum FortranRounding mode) {
154 if (IsNaN() || IsInfinite() || keepBits >= binaryPrecision) {
155 return true;
156 }
157 int lostBits{keepBits < binaryPrecision ? binaryPrecision - keepBits : 0};
158 RawType lostMask{static_cast<RawType>((RawType{1} << lostBits) - 1)};
159 if (RawType lost{static_cast<RawType>(raw_ & lostMask)}; lost != 0) {
160 bool increase{false};
161 switch (mode) {
162 case RoundNearest:
163 if (lost >> (lostBits - 1) != 0) { // >= tie
164 if ((lost & (lostMask >> 1)) != 0) {
165 increase = true; // > tie
166 } else {
167 increase = ((raw_ >> lostBits) & 1) != 0; // tie to even
168 }
169 }
170 break;
171 case RoundUp:
172 increase = !IsNegative();
173 break;
174 case RoundDown:
175 increase = IsNegative();
176 break;
177 case RoundToZero:
178 break;
179 case RoundCompatible:
180 increase = lost >> (lostBits - 1) != 0; // >= tie
181 break;
182 }
183 if (increase) {
184 raw_ |= lostMask;
185 Next();
186 }
187 return false; // inexact
188 } else {
189 return true; // exact
190 }
191 }
192
193private:
194 constexpr RT_API_ATTRS void RemoveExplicitMSB() {
195 if constexpr (!isImplicitMSB) {
196 raw_ = (raw_ & (significandMask >> 1)) | ((raw_ & ~significandMask) >> 1);
197 }
198 }
199 constexpr RT_API_ATTRS void InsertExplicitMSB() {
200 if constexpr (!isImplicitMSB) {
201 constexpr RawType mask{significandMask >> 1};
202 raw_ = (raw_ & mask) | ((raw_ & ~mask) << 1);
203 if (BiasedExponent() > 0) {
204 raw_ |= RawType{1} << (significandBits - 1);
205 }
206 }
207 }
208
209 RawType raw_{0};
210};
211} // namespace Fortran::decimal
212#endif