doxygen/Common_2real_8h_source.html

//===-- include/flang/Common/real.h -----------------------------*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//


#ifndef FORTRAN_COMMON_REAL_H_

#define FORTRAN_COMMON_REAL_H_


// Characteristics of IEEE-754 & related binary floating-point numbers.

// The various representations are distinguished by their binary precisions

// (number of explicit significand bits and any implicit MSB in the fraction).


#include <cinttypes>


namespace Fortran::common {


// Total representation size in bits for each type

static constexpr int BitsForBinaryPrecision(int binaryPrecision) {

  switch (binaryPrecision) {

  case 8: // IEEE single (truncated): 1+8+7 with implicit bit

    return 16;

  case 11: // IEEE half precision: 1+5+10 with implicit bit

    return 16;

  case 24: // IEEE single precision: 1+8+23 with implicit bit

    return 32;

  case 53: // IEEE double precision: 1+11+52 with implicit bit

    return 64;

  case 64: // x87 extended precision: 1+15+64, no implicit bit

    return 80;

  case 106: // "double-double": 2*(1+11+52 with implicit bit)

    return 128;

  case 113: // IEEE quad precision: 1+15+112 with implicit bit

    return 128;

  default:

    return -1;

  }

}


// Maximum number of significant decimal digits in the fraction of an

// exact conversion in each type; computed by converting the value

// with the minimum exponent (biased to 1) and all fractional bits set.

static constexpr int MaxDecimalConversionDigits(int binaryPrecision) {

  switch (binaryPrecision) {

  case 8: // IEEE single (truncated): 1+8+7 with implicit bit

    return 96;

  case 11: // IEEE half precision: 1+5+10 with implicit bit

    return 21;

  case 24: // IEEE single precision: 1+8+23 with implicit bit

    return 112;

  case 53: // IEEE double precision: 1+11+52 with implicit bit

    return 767;

  case 64: // x87 extended precision: 1+15+64, no implicit bit

    return 11514;

  case 106: // "double-double": 2*(1+11+52 with implicit bit)

    return 2 * 767;

  case 113: // IEEE quad precision: 1+15+112 with implicit bit

    return 11563;

  default:

    return -1;

  }

}


static constexpr int MaxHexadecimalConversionDigits(int binaryPrecision) {

  return binaryPrecision >= 0 ? (binaryPrecision + 3) / 4 : binaryPrecision;

}


static constexpr int RealKindForPrecision(int binaryPrecision) {

  switch (binaryPrecision) {

  case 8: // IEEE single (truncated): 1+8+7 with implicit bit

    return 3;

  case 11: // IEEE half precision: 1+5+10 with implicit bit

    return 2;

  case 24: // IEEE single precision: 1+8+23 with implicit bit

    return 4;

  case 53: // IEEE double precision: 1+11+52 with implicit bit

    return 8;

  case 64: // x87 extended precision: 1+15+64, no implicit bit

    return 10;

  // TODO: case 106: return kind for double/double

  case 113: // IEEE quad precision: 1+15+112 with implicit bit

    return 16;

  default:

    return -1;

  }

}


static constexpr int PrecisionOfRealKind(int kind) {

  switch (kind) {

  case 2: // IEEE half precision: 1+5+10 with implicit bit

    return 11;

  case 3: // IEEE single (truncated): 1+8+7 with implicit bit

    return 8;

  case 4: // IEEE single precision: 1+8+23 with implicit bit

    return 24;

  case 8: // IEEE double precision: 1+11+52 with implicit bit

    return 53;

  case 10: // x87 extended precision: 1+15+64, no implicit bit

    return 64;

  // TODO: case kind for double/double: return 106;

  case 16: // IEEE quad precision: 1+15+112 with implicit bit

    return 113;

  default:

    return -1;

  }

}


// RealCharacteristics is constexpr, but also useful when constructed

// with a non-constant precision argument.


class RealCharacteristics {

public:

  explicit constexpr RealCharacteristics(int p) : binaryPrecision{p} {}


  int binaryPrecision;

  int bits{BitsForBinaryPrecision(binaryPrecision)};

  bool isImplicitMSB{binaryPrecision != 64 /*x87*/};

  int significandBits{binaryPrecision - isImplicitMSB};

  int exponentBits{bits - significandBits - 1 /*sign*/};

  int maxExponent{(1 << exponentBits) - 1};

  int exponentBias{maxExponent / 2};

  int decimalPrecision{LogBaseTwoToLogBaseTen(binaryPrecision - 1)};

  int decimalRange{LogBaseTwoToLogBaseTen(exponentBias - 1)};

  // Number of significant decimal digits in the fraction of the

  // exact conversion of the least nonzero subnormal.

  int maxDecimalConversionDigits{MaxDecimalConversionDigits(binaryPrecision)};

  int maxHexadecimalConversionDigits{

      MaxHexadecimalConversionDigits(binaryPrecision)};


private:

  // Converts bit widths to whole decimal digits

  static constexpr int LogBaseTwoToLogBaseTen(int logb2) {

    constexpr std::int64_t LogBaseTenOfTwoTimesTenToThe12th{301029995664};

    constexpr std::int64_t TenToThe12th{1000000000000};

    std::int64_t logb10{

        (logb2 * LogBaseTenOfTwoTimesTenToThe12th) / TenToThe12th};

    return static_cast<int>(logb10);

  }

};


} // namespace Fortran::common

#endif // FORTRAN_COMMON_REAL_H_

Fortran::common
Definition bit-population-count.h:20