FLANG
reduction-templates.h
1//===-- runtime/reduction-templates.h ---------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9// Generic function templates used by various reduction transformation
10// intrinsic functions (SUM, PRODUCT, &c.)
11//
12// * Partial reductions (i.e., those with DIM= arguments that are not
13// required to be 1 by the rank of the argument) return arrays that
14// are dynamically allocated in a caller-supplied descriptor.
15// * Total reductions (i.e., no DIM= argument) with FINDLOC, MAXLOC, & MINLOC
16// return integer vectors of some kind, not scalars; a caller-supplied
17// descriptor is used
18// * Character-valued reductions (MAXVAL & MINVAL) return arbitrary
19// length results, dynamically allocated in a caller-supplied descriptor
20
21#ifndef FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
22#define FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
23
24#include "numeric-templates.h"
25#include "terminator.h"
26#include "tools.h"
27#include "flang/Runtime/cpp-type.h"
28#include "flang/Runtime/descriptor.h"
29#include <algorithm>
30
31namespace Fortran::runtime {
32
33// Reductions are implemented with *accumulators*, which are instances of
34// classes that incrementally build up the result (or an element thereof) during
35// a traversal of the unmasked elements of an array. Each accumulator class
36// supports a constructor (which captures a reference to the array), an
37// AccumulateAt() member function that applies supplied subscripts to the
38// array and does something with a scalar element, and a GetResult()
39// member function that copies a final result into its destination.
40
41// Total reduction of the array argument to a scalar (or to a vector in the
42// cases of FINDLOC, MAXLOC, & MINLOC). These are the cases without DIM= or
43// cases where the argument has rank 1 and DIM=, if present, must be 1.
44template <typename TYPE, typename ACCUMULATOR>
45inline RT_API_ATTRS void DoTotalReduction(const Descriptor &x, int dim,
46 const Descriptor *mask, ACCUMULATOR &accumulator, const char *intrinsic,
47 Terminator &terminator) {
48 if (dim < 0 || dim > 1) {
49 terminator.Crash("%s: bad DIM=%d for ARRAY argument with rank %d",
50 intrinsic, dim, x.rank());
51 }
52 SubscriptValue xAt[maxRank];
53 x.GetLowerBounds(xAt);
54 if (mask) {
55 CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
56 if (mask->rank() > 0) {
57 SubscriptValue maskAt[maxRank];
58 mask->GetLowerBounds(maskAt);
59 for (auto elements{x.Elements()}; elements--;
60 x.IncrementSubscripts(xAt), mask->IncrementSubscripts(maskAt)) {
61 if (IsLogicalElementTrue(*mask, maskAt)) {
62 if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
63 break;
64 }
65 }
66 }
67 return;
68 } else if (!IsLogicalScalarTrue(*mask)) {
69 // scalar MASK=.FALSE.: return identity value
70 return;
71 }
72 }
73 // No MASK=, or scalar MASK=.TRUE.
74 for (auto elements{x.Elements()}; elements--; x.IncrementSubscripts(xAt)) {
75 if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
76 break; // cut short, result is known
77 }
78 }
79}
80
81template <TypeCategory CAT, int KIND, typename ACCUMULATOR>
82inline RT_API_ATTRS CppTypeFor<CAT, KIND> GetTotalReduction(const Descriptor &x,
83 const char *source, int line, int dim, const Descriptor *mask,
84 ACCUMULATOR &&accumulator, const char *intrinsic,
85 bool allowUnsignedForInteger = false) {
86 Terminator terminator{source, line};
87 RUNTIME_CHECK(terminator,
88 TypeCode(CAT, KIND) == x.type() ||
89 (CAT == TypeCategory::Integer && allowUnsignedForInteger &&
90 TypeCode(TypeCategory::Unsigned, KIND) == x.type()));
91 using CppType = CppTypeFor<CAT, KIND>;
92 DoTotalReduction<CppType>(x, dim, mask, accumulator, intrinsic, terminator);
93 if constexpr (std::is_void_v<CppType>) {
94 // Result is returned from accumulator, as in REDUCE() for derived type
95#ifdef _MSC_VER // work around MSVC spurious error
96 accumulator.GetResult();
97#else
98 accumulator.template GetResult<CppType>();
99#endif
100 } else {
101 CppType result;
102#ifdef _MSC_VER // work around MSVC spurious error
103 accumulator.GetResult(&result);
104#else
105 accumulator.template GetResult<CppType>(&result);
106#endif
107 return result;
108 }
109}
110
111// For reductions on a dimension, e.g. SUM(array,DIM=2) where the shape
112// of the array is [2,3,5], the shape of the result is [2,5] and
113// result(j,k) = SUM(array(j,:,k)), possibly modified if the array has
114// lower bounds other than one. This utility subroutine creates an
115// array of subscripts [j,_,k] for result subscripts [j,k] so that the
116// elements of array(j,:,k) can be reduced.
117inline RT_API_ATTRS void GetExpandedSubscripts(SubscriptValue at[],
118 const Descriptor &descriptor, int zeroBasedDim,
119 const SubscriptValue from[]) {
120 descriptor.GetLowerBounds(at);
121 int rank{descriptor.rank()};
122 int j{0};
123 for (; j < zeroBasedDim; ++j) {
124 at[j] += from[j] - 1 /*lower bound*/;
125 }
126 for (++j; j < rank; ++j) {
127 at[j] += from[j - 1] - 1;
128 }
129}
130
131template <typename TYPE, typename ACCUMULATOR>
132inline RT_API_ATTRS void ReduceDimToScalar(const Descriptor &x,
133 int zeroBasedDim, SubscriptValue subscripts[], TYPE *result,
134 ACCUMULATOR &accumulator) {
135 SubscriptValue xAt[maxRank];
136 GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
137 const auto &dim{x.GetDimension(zeroBasedDim)};
138 SubscriptValue at{dim.LowerBound()};
139 for (auto n{dim.Extent()}; n-- > 0; ++at) {
140 xAt[zeroBasedDim] = at;
141 if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
142 break;
143 }
144 }
145#ifdef _MSC_VER // work around MSVC spurious error
146 accumulator.GetResult(result, zeroBasedDim);
147#else
148 accumulator.template GetResult<TYPE>(result, zeroBasedDim);
149#endif
150}
151
152template <typename TYPE, typename ACCUMULATOR>
153inline RT_API_ATTRS void ReduceDimMaskToScalar(const Descriptor &x,
154 int zeroBasedDim, SubscriptValue subscripts[], const Descriptor &mask,
155 TYPE *result, ACCUMULATOR &accumulator) {
156 SubscriptValue xAt[maxRank], maskAt[maxRank];
157 GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
158 GetExpandedSubscripts(maskAt, mask, zeroBasedDim, subscripts);
159 const auto &xDim{x.GetDimension(zeroBasedDim)};
160 SubscriptValue xPos{xDim.LowerBound()};
161 const auto &maskDim{mask.GetDimension(zeroBasedDim)};
162 SubscriptValue maskPos{maskDim.LowerBound()};
163 for (auto n{x.GetDimension(zeroBasedDim).Extent()}; n-- > 0;
164 ++xPos, ++maskPos) {
165 maskAt[zeroBasedDim] = maskPos;
166 if (IsLogicalElementTrue(mask, maskAt)) {
167 xAt[zeroBasedDim] = xPos;
168 if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
169 break;
170 }
171 }
172 }
173#ifdef _MSC_VER // work around MSVC spurious error
174 accumulator.GetResult(result, zeroBasedDim);
175#else
176 accumulator.template GetResult<TYPE>(result, zeroBasedDim);
177#endif
178}
179
180// Partial reductions with DIM=
181
182template <typename ACCUMULATOR, TypeCategory CAT, int KIND>
183inline RT_API_ATTRS void PartialReduction(Descriptor &result,
184 const Descriptor &x, std::size_t resultElementSize, int dim,
185 const Descriptor *mask, Terminator &terminator, const char *intrinsic,
186 ACCUMULATOR &accumulator) {
187 CreatePartialReductionResult(result, x, resultElementSize, dim, terminator,
188 intrinsic, TypeCode{CAT, KIND});
189 SubscriptValue at[maxRank];
190 result.GetLowerBounds(at);
191 INTERNAL_CHECK(result.rank() == 0 || at[0] == 1);
192 using CppType = CppTypeFor<CAT, KIND>;
193 if (mask) {
194 CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
195 if (mask->rank() > 0) {
196 for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
197 accumulator.Reinitialize();
198 ReduceDimMaskToScalar<CppType, ACCUMULATOR>(
199 x, dim - 1, at, *mask, result.Element<CppType>(at), accumulator);
200 }
201 return;
202 } else if (!IsLogicalScalarTrue(*mask)) {
203 // scalar MASK=.FALSE.
204 accumulator.Reinitialize();
205 for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
206 accumulator.GetResult(result.Element<CppType>(at));
207 }
208 return;
209 }
210 }
211 // No MASK= or scalar MASK=.TRUE.
212 for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
213 accumulator.Reinitialize();
214 ReduceDimToScalar<CppType, ACCUMULATOR>(
215 x, dim - 1, at, result.Element<CppType>(at), accumulator);
216 }
217}
218
219template <template <typename> class ACCUM>
221 template <int KIND> struct Functor {
222 static constexpr int Intermediate{
223 std::max(KIND, 4)}; // use at least "int" for intermediate results
224 RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
225 int dim, const Descriptor *mask, Terminator &terminator,
226 const char *intrinsic) const {
227 using Accumulator =
228 ACCUM<CppTypeFor<TypeCategory::Integer, Intermediate>>;
229 Accumulator accumulator{x};
230 // Element size of the destination descriptor is the same
231 // as the element size of the source.
232 PartialReduction<Accumulator, TypeCategory::Integer, KIND>(result, x,
233 x.ElementBytes(), dim, mask, terminator, intrinsic, accumulator);
234 }
235 };
236};
237
238template <template <typename> class INTEGER_ACCUM>
239inline RT_API_ATTRS void PartialIntegerReduction(Descriptor &result,
240 const Descriptor &x, int dim, int kind, const Descriptor *mask,
241 const char *intrinsic, Terminator &terminator) {
242 ApplyIntegerKind<
244 kind, terminator, result, x, dim, mask, terminator, intrinsic);
245}
246
247template <TypeCategory CAT, template <typename> class ACCUM, int MIN_KIND>
249 template <int KIND> struct Functor {
250 static constexpr int Intermediate{std::max(KIND, MIN_KIND)};
251 RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
252 int dim, const Descriptor *mask, Terminator &terminator,
253 const char *intrinsic) const {
254 using Accumulator = ACCUM<CppTypeFor<TypeCategory::Real, Intermediate>>;
255 Accumulator accumulator{x};
256 // Element size of the destination descriptor is the same
257 // as the element size of the source.
258 PartialReduction<Accumulator, CAT, KIND>(result, x, x.ElementBytes(), dim,
259 mask, terminator, intrinsic, accumulator);
260 }
261 };
262};
263
264template <template <typename> class INTEGER_ACCUM,
265 template <typename> class REAL_ACCUM,
266 template <typename> class COMPLEX_ACCUM, int MIN_REAL_KIND>
267inline RT_API_ATTRS void TypedPartialNumericReduction(Descriptor &result,
268 const Descriptor &x, int dim, const char *source, int line,
269 const Descriptor *mask, const char *intrinsic) {
270 Terminator terminator{source, line};
271 auto catKind{x.type().GetCategoryAndKind()};
272 RUNTIME_CHECK(terminator, catKind.has_value());
273 switch (catKind->first) {
274 case TypeCategory::Integer:
275 PartialIntegerReduction<INTEGER_ACCUM>(
276 result, x, dim, catKind->second, mask, intrinsic, terminator);
277 break;
278 case TypeCategory::Real:
279 ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Real,
280 REAL_ACCUM, MIN_REAL_KIND>::template Functor,
281 void>(catKind->second, terminator, result, x, dim, mask, terminator,
282 intrinsic);
283 break;
284 case TypeCategory::Complex:
285 ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Complex,
286 COMPLEX_ACCUM, MIN_REAL_KIND>::template Functor,
287 void>(catKind->second, terminator, result, x, dim, mask, terminator,
288 intrinsic);
289 break;
290 default:
291 terminator.Crash("%s: bad type code %d", intrinsic, x.type().raw());
292 }
293}
294
295template <typename ACCUMULATOR> struct LocationResultHelper {
296 template <int KIND> struct Functor {
297 RT_API_ATTRS void operator()(
298 ACCUMULATOR &accumulator, const Descriptor &result) const {
299 accumulator.GetResult(
300 result.OffsetElement<CppTypeFor<TypeCategory::Integer, KIND>>());
301 }
302 };
303};
304
305template <typename ACCUMULATOR> struct PartialLocationHelper {
306 template <int KIND> struct Functor {
307 RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
308 int dim, const Descriptor *mask, Terminator &terminator,
309 const char *intrinsic, ACCUMULATOR &accumulator) const {
310 // Element size of the destination descriptor is the size
311 // of {TypeCategory::Integer, KIND}.
312 PartialReduction<ACCUMULATOR, TypeCategory::Integer, KIND>(result, x,
313 Descriptor::BytesFor(TypeCategory::Integer, KIND), dim, mask,
314 terminator, intrinsic, accumulator);
315 }
316 };
317};
318
319// NORM2 templates
320
321RT_VAR_GROUP_BEGIN
322
323// Use at least double precision for accumulators.
324// Don't use __float128, it doesn't work with abs() or sqrt() yet.
325static constexpr RT_CONST_VAR_ATTRS int Norm2LargestLDKind{
326#if HAS_LDBL128 || HAS_FLOAT128
327 16
328#elif HAS_FLOAT80
329 10
330#else
331 8
332#endif
333};
334
335RT_VAR_GROUP_END
336
337template <TypeCategory CAT, int KIND, typename ACCUMULATOR>
338inline RT_API_ATTRS void DoMaxMinNorm2(Descriptor &result, const Descriptor &x,
339 int dim, const Descriptor *mask, const char *intrinsic,
340 Terminator &terminator) {
341 using Type = CppTypeFor<CAT, KIND>;
342 ACCUMULATOR accumulator{x};
343 if (dim == 0 || x.rank() == 1) {
344 // Total reduction
345
346 // Element size of the destination descriptor is the same
347 // as the element size of the source.
348 result.Establish(x.type(), x.ElementBytes(), nullptr, 0, nullptr,
349 CFI_attribute_allocatable);
350 if (int stat{result.Allocate()}) {
351 terminator.Crash(
352 "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
353 }
354 DoTotalReduction<Type>(x, dim, mask, accumulator, intrinsic, terminator);
355 accumulator.GetResult(result.OffsetElement<Type>());
356 } else {
357 // Partial reduction
358
359 // Element size of the destination descriptor is the same
360 // as the element size of the source.
361 PartialReduction<ACCUMULATOR, CAT, KIND>(result, x, x.ElementBytes(), dim,
362 mask, terminator, intrinsic, accumulator);
363 }
364}
365
366// The data type used by Norm2Accumulator.
367template <int KIND>
368using Norm2AccumType =
369 CppTypeFor<TypeCategory::Real, std::clamp(KIND, 8, Norm2LargestLDKind)>;
370
371template <int KIND> class Norm2Accumulator {
372public:
373 using Type = CppTypeFor<TypeCategory::Real, KIND>;
374 using AccumType = Norm2AccumType<KIND>;
375 explicit RT_API_ATTRS Norm2Accumulator(const Descriptor &array)
376 : array_{array} {}
377 RT_API_ATTRS void Reinitialize() { max_ = sum_ = 0; }
378 template <typename A>
379 RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
380 // m * sqrt(1 + sum((others(:)/m)**2))
381 *p = static_cast<Type>(max_ * SQRTTy<AccumType>::compute(1 + sum_));
382 }
383 RT_API_ATTRS bool Accumulate(Type x) {
384 auto absX{ABSTy<AccumType>::compute(static_cast<AccumType>(x))};
385 if (!max_) {
386 max_ = absX;
387 } else if (absX > max_) {
388 auto t{max_ / absX}; // < 1.0
389 auto tsq{t * t};
390 sum_ *= tsq; // scale sum to reflect change to the max
391 sum_ += tsq; // include a term for the previous max
392 max_ = absX;
393 } else { // absX <= max_
394 auto t{absX / max_};
395 sum_ += t * t;
396 }
397 return true;
398 }
399 template <typename A>
400 RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) {
401 return Accumulate(*array_.Element<A>(at));
402 }
403
404private:
405 const Descriptor &array_;
406 AccumType max_{0}; // value (m) with largest magnitude
407 AccumType sum_{0}; // sum((others(:)/m)**2)
408};
409
410template <int KIND> struct Norm2Helper {
411 RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x, int dim,
412 const Descriptor *mask, Terminator &terminator) const {
413 DoMaxMinNorm2<TypeCategory::Real, KIND, Norm2Accumulator<KIND>>(
414 result, x, dim, mask, "NORM2", terminator);
415 }
416};
417
418} // namespace Fortran::runtime
419#endif // FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
Definition: dot-product.cpp:27
Definition: descriptor.h:138
Definition: reduction-templates.h:371
Definition: terminator.h:23
Definition: numeric-templates.h:126
Definition: reduction-templates.h:296
Definition: reduction-templates.h:295
Definition: reduction-templates.h:410
Definition: reduction-templates.h:248
Definition: reduction-templates.h:220
Definition: reduction-templates.h:306
Definition: reduction-templates.h:305
Definition: numeric-templates.h:185