FLANG
CUDAIntrinsicCall.h
1//==-- Builder/CUDAIntrinsicCall.h - lowering of CUDA intrinsics ---*-C++-*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef FORTRAN_LOWER_CUDAINTRINSICCALL_H
10#define FORTRAN_LOWER_CUDAINTRINSICCALL_H
11
12#include "flang/Optimizer/Builder/IntrinsicCall.h"
13#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
14
15namespace fir {
16
17struct CUDAIntrinsicLibrary : IntrinsicLibrary {
18
19 // Constructors.
20 explicit CUDAIntrinsicLibrary(fir::FirOpBuilder &builder, mlir::Location loc)
21 : IntrinsicLibrary(builder, loc) {}
22 CUDAIntrinsicLibrary() = delete;
23 CUDAIntrinsicLibrary(const CUDAIntrinsicLibrary &) = delete;
24
25 // CUDA intrinsic handlers.
26 mlir::Value genAtomicAdd(mlir::Type, llvm::ArrayRef<mlir::Value>);
27 fir::ExtendedValue genAtomicAddR2(mlir::Type,
29 template <int extent>
30 fir::ExtendedValue genAtomicAddVector(mlir::Type,
32 fir::ExtendedValue genAtomicAddVector4x4(mlir::Type,
34 mlir::Value genAtomicAnd(mlir::Type, llvm::ArrayRef<mlir::Value>);
35 fir::ExtendedValue genAtomicCas(mlir::Type,
37 mlir::Value genAtomicDec(mlir::Type, llvm::ArrayRef<mlir::Value>);
38 fir::ExtendedValue genAtomicExch(mlir::Type,
40 mlir::Value genAtomicInc(mlir::Type, llvm::ArrayRef<mlir::Value>);
41 mlir::Value genAtomicMax(mlir::Type, llvm::ArrayRef<mlir::Value>);
42 mlir::Value genAtomicMin(mlir::Type, llvm::ArrayRef<mlir::Value>);
43 mlir::Value genAtomicOr(mlir::Type, llvm::ArrayRef<mlir::Value>);
44 mlir::Value genAtomicSub(mlir::Type, llvm::ArrayRef<mlir::Value>);
45 fir::ExtendedValue genAtomicXor(mlir::Type,
47 mlir::Value genBarrierArrive(mlir::Type, llvm::ArrayRef<mlir::Value>);
48 mlir::Value genBarrierArriveCnt(mlir::Type, llvm::ArrayRef<mlir::Value>);
49 void genBarrierInit(llvm::ArrayRef<fir::ExtendedValue>);
50 mlir::Value genBarrierTryWait(mlir::Type, llvm::ArrayRef<mlir::Value>);
51 mlir::Value genBarrierTryWaitSleep(mlir::Type, llvm::ArrayRef<mlir::Value>);
52 mlir::Value genClusterBlockIndex(mlir::Type, llvm::ArrayRef<mlir::Value>);
53 mlir::Value genClusterDimBlocks(mlir::Type, llvm::ArrayRef<mlir::Value>);
54 void genFenceProxyAsync(llvm::ArrayRef<fir::ExtendedValue>);
55 template <const char *fctName, int extent>
56 fir::ExtendedValue genLDXXFunc(mlir::Type,
58 mlir::Value genMatchAllSync(mlir::Type, llvm::ArrayRef<mlir::Value>);
59 mlir::Value genMatchAnySync(mlir::Type, llvm::ArrayRef<mlir::Value>);
60 template <typename OpTy>
61 mlir::Value genNVVMTime(mlir::Type, llvm::ArrayRef<mlir::Value>);
62 void genSyncThreads(llvm::ArrayRef<fir::ExtendedValue>);
63 mlir::Value genSyncThreadsAnd(mlir::Type, llvm::ArrayRef<mlir::Value>);
64 mlir::Value genSyncThreadsCount(mlir::Type, llvm::ArrayRef<mlir::Value>);
65 mlir::Value genSyncThreadsOr(mlir::Type, llvm::ArrayRef<mlir::Value>);
66 void genSyncWarp(llvm::ArrayRef<fir::ExtendedValue>);
67 mlir::Value genThisCluster(mlir::Type, llvm::ArrayRef<mlir::Value>);
68 mlir::Value genThisGrid(mlir::Type, llvm::ArrayRef<mlir::Value>);
69 mlir::Value genThisThreadBlock(mlir::Type, llvm::ArrayRef<mlir::Value>);
70 mlir::Value genThisWarp(mlir::Type, llvm::ArrayRef<mlir::Value>);
71 template <mlir::NVVM::MemScopeKind scope>
72 void genThreadFence(llvm::ArrayRef<fir::ExtendedValue>);
73 void genTMABulkCommitGroup(llvm::ArrayRef<fir::ExtendedValue>);
74 void genTMABulkG2S(llvm::ArrayRef<fir::ExtendedValue>);
75 void genTMABulkLoadC4(llvm::ArrayRef<fir::ExtendedValue>);
76 void genTMABulkLoadC8(llvm::ArrayRef<fir::ExtendedValue>);
77 void genTMABulkLoadI4(llvm::ArrayRef<fir::ExtendedValue>);
78 void genTMABulkLoadI8(llvm::ArrayRef<fir::ExtendedValue>);
79 void genTMABulkLoadR2(llvm::ArrayRef<fir::ExtendedValue>);
80 void genTMABulkLoadR4(llvm::ArrayRef<fir::ExtendedValue>);
81 void genTMABulkLoadR8(llvm::ArrayRef<fir::ExtendedValue>);
82 void genTMABulkS2G(llvm::ArrayRef<fir::ExtendedValue>);
83 void genTMABulkStoreC4(llvm::ArrayRef<fir::ExtendedValue>);
84 void genTMABulkStoreC8(llvm::ArrayRef<fir::ExtendedValue>);
85 void genTMABulkStoreI4(llvm::ArrayRef<fir::ExtendedValue>);
86 void genTMABulkStoreI8(llvm::ArrayRef<fir::ExtendedValue>);
87 void genTMABulkStoreR2(llvm::ArrayRef<fir::ExtendedValue>);
88 void genTMABulkStoreR4(llvm::ArrayRef<fir::ExtendedValue>);
89 void genTMABulkStoreR8(llvm::ArrayRef<fir::ExtendedValue>);
90 void genTMABulkWaitGroup(llvm::ArrayRef<fir::ExtendedValue>);
91 template <mlir::NVVM::VoteSyncKind kind>
92 mlir::Value genVoteSync(mlir::Type, llvm::ArrayRef<mlir::Value>);
93};
94
95const IntrinsicHandler *findCUDAIntrinsicHandler(llvm::StringRef name);
96
97} // namespace fir
98
99#endif // FORTRAN_LOWER_CUDAINTRINSICCALL_H
Definition BoxValue.h:478
Definition FIRBuilder.h:55
Definition FIRType.h:92
Definition AbstractConverter.h:34
Definition IntrinsicCall.h:567