FLANG
real.h
1//===-- include/flang/Common/real.h -----------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef FORTRAN_COMMON_REAL_H_
10#define FORTRAN_COMMON_REAL_H_
11
12// Characteristics of IEEE-754 & related binary floating-point numbers.
13// The various representations are distinguished by their binary precisions
14// (number of explicit significand bits and any implicit MSB in the fraction).
15
16#include <cinttypes>
17
18namespace Fortran::common {
19
20// Total representation size in bits for each type
21static constexpr int BitsForBinaryPrecision(int binaryPrecision) {
22 switch (binaryPrecision) {
23 case 8: // IEEE single (truncated): 1+8+7 with implicit bit
24 return 16;
25 case 11: // IEEE half precision: 1+5+10 with implicit bit
26 return 16;
27 case 24: // IEEE single precision: 1+8+23 with implicit bit
28 return 32;
29 case 53: // IEEE double precision: 1+11+52 with implicit bit
30 return 64;
31 case 64: // x87 extended precision: 1+15+64, no implicit bit
32 return 80;
33 case 106: // "double-double": 2*(1+11+52 with implicit bit)
34 return 128;
35 case 113: // IEEE quad precision: 1+15+112 with implicit bit
36 return 128;
37 default:
38 return -1;
39 }
40}
41
42// Maximum number of significant decimal digits in the fraction of an
43// exact conversion in each type; computed by converting the value
44// with the minimum exponent (biased to 1) and all fractional bits set.
45static constexpr int MaxDecimalConversionDigits(int binaryPrecision) {
46 switch (binaryPrecision) {
47 case 8: // IEEE single (truncated): 1+8+7 with implicit bit
48 return 96;
49 case 11: // IEEE half precision: 1+5+10 with implicit bit
50 return 21;
51 case 24: // IEEE single precision: 1+8+23 with implicit bit
52 return 112;
53 case 53: // IEEE double precision: 1+11+52 with implicit bit
54 return 767;
55 case 64: // x87 extended precision: 1+15+64, no implicit bit
56 return 11514;
57 case 106: // "double-double": 2*(1+11+52 with implicit bit)
58 return 2 * 767;
59 case 113: // IEEE quad precision: 1+15+112 with implicit bit
60 return 11563;
61 default:
62 return -1;
63 }
64}
65
66static constexpr int MaxHexadecimalConversionDigits(int binaryPrecision) {
67 return binaryPrecision >= 0 ? (binaryPrecision + 3) / 4 : binaryPrecision;
68}
69
70static constexpr int RealKindForPrecision(int binaryPrecision) {
71 switch (binaryPrecision) {
72 case 8: // IEEE single (truncated): 1+8+7 with implicit bit
73 return 3;
74 case 11: // IEEE half precision: 1+5+10 with implicit bit
75 return 2;
76 case 24: // IEEE single precision: 1+8+23 with implicit bit
77 return 4;
78 case 53: // IEEE double precision: 1+11+52 with implicit bit
79 return 8;
80 case 64: // x87 extended precision: 1+15+64, no implicit bit
81 return 10;
82 // TODO: case 106: return kind for double/double
83 case 113: // IEEE quad precision: 1+15+112 with implicit bit
84 return 16;
85 default:
86 return -1;
87 }
88}
89
90static constexpr int PrecisionOfRealKind(int kind) {
91 switch (kind) {
92 case 2: // IEEE half precision: 1+5+10 with implicit bit
93 return 11;
94 case 3: // IEEE single (truncated): 1+8+7 with implicit bit
95 return 8;
96 case 4: // IEEE single precision: 1+8+23 with implicit bit
97 return 24;
98 case 8: // IEEE double precision: 1+11+52 with implicit bit
99 return 53;
100 case 10: // x87 extended precision: 1+15+64, no implicit bit
101 return 64;
102 // TODO: case kind for double/double: return 106;
103 case 16: // IEEE quad precision: 1+15+112 with implicit bit
104 return 113;
105 default:
106 return -1;
107 }
108}
109
110// RealCharacteristics is constexpr, but also useful when constructed
111// with a non-constant precision argument.
112class RealCharacteristics {
113public:
114 explicit constexpr RealCharacteristics(int p) : binaryPrecision{p} {}
115
116 int binaryPrecision;
117 int bits{BitsForBinaryPrecision(binaryPrecision)};
118 bool isImplicitMSB{binaryPrecision != 64 /*x87*/};
119 int significandBits{binaryPrecision - isImplicitMSB};
120 int exponentBits{bits - significandBits - 1 /*sign*/};
121 int maxExponent{(1 << exponentBits) - 1};
122 int exponentBias{maxExponent / 2};
123 int decimalPrecision{LogBaseTwoToLogBaseTen(binaryPrecision - 1)};
124 int decimalRange{LogBaseTwoToLogBaseTen(exponentBias - 1)};
125 // Number of significant decimal digits in the fraction of the
126 // exact conversion of the least nonzero subnormal.
127 int maxDecimalConversionDigits{MaxDecimalConversionDigits(binaryPrecision)};
128 int maxHexadecimalConversionDigits{
129 MaxHexadecimalConversionDigits(binaryPrecision)};
130
131private:
132 // Converts bit widths to whole decimal digits
133 static constexpr int LogBaseTwoToLogBaseTen(int logb2) {
134 constexpr std::int64_t LogBaseTenOfTwoTimesTenToThe12th{301029995664};
135 constexpr std::int64_t TenToThe12th{1000000000000};
136 std::int64_t logb10{
137 (logb2 * LogBaseTenOfTwoTimesTenToThe12th) / TenToThe12th};
138 return static_cast<int>(logb10);
139 }
140};
141
142} // namespace Fortran::common
143#endif // FORTRAN_COMMON_REAL_H_
Definition bit-population-count.h:20