FLANG
CUDAIntrinsicCall.h
1//==-- Builder/CUDAIntrinsicCall.h - lowering of CUDA intrinsics ---*-C++-*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef FORTRAN_LOWER_CUDAINTRINSICCALL_H
10#define FORTRAN_LOWER_CUDAINTRINSICCALL_H
11
12#include "flang/Optimizer/Builder/IntrinsicCall.h"
13#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
14
15namespace fir {
16
17struct CUDAIntrinsicLibrary : IntrinsicLibrary {
18
19 // Constructors.
20 explicit CUDAIntrinsicLibrary(fir::FirOpBuilder &builder, mlir::Location loc)
21 : IntrinsicLibrary(builder, loc) {}
22 CUDAIntrinsicLibrary() = delete;
23 CUDAIntrinsicLibrary(const CUDAIntrinsicLibrary &) = delete;
24
25 // CUDA intrinsic handlers.
26 mlir::Value genAtomicAdd(mlir::Type, llvm::ArrayRef<mlir::Value>);
27 fir::ExtendedValue genAtomicAddR2(mlir::Type,
29 template <int extent>
30 fir::ExtendedValue genAtomicAddVector(mlir::Type,
32 fir::ExtendedValue genAtomicAddVector4x4(mlir::Type,
34 mlir::Value genAtomicAnd(mlir::Type, llvm::ArrayRef<mlir::Value>);
35 fir::ExtendedValue genAtomicCas(mlir::Type,
37 mlir::Value genAtomicDec(mlir::Type, llvm::ArrayRef<mlir::Value>);
38 fir::ExtendedValue genAtomicExch(mlir::Type,
40 mlir::Value genAtomicInc(mlir::Type, llvm::ArrayRef<mlir::Value>);
41 mlir::Value genAtomicMax(mlir::Type, llvm::ArrayRef<mlir::Value>);
42 mlir::Value genAtomicMin(mlir::Type, llvm::ArrayRef<mlir::Value>);
43 mlir::Value genAtomicOr(mlir::Type, llvm::ArrayRef<mlir::Value>);
44 mlir::Value genAtomicSub(mlir::Type, llvm::ArrayRef<mlir::Value>);
45 fir::ExtendedValue genAtomicXor(mlir::Type,
47 mlir::Value genBarrierArrive(mlir::Type, llvm::ArrayRef<mlir::Value>);
48 mlir::Value genBarrierArriveCnt(mlir::Type, llvm::ArrayRef<mlir::Value>);
49 void genBarrierInit(llvm::ArrayRef<fir::ExtendedValue>);
50 mlir::Value genBarrierTryWait(mlir::Type, llvm::ArrayRef<mlir::Value>);
51 mlir::Value genBarrierTryWaitSleep(mlir::Type, llvm::ArrayRef<mlir::Value>);
52 mlir::Value genClusterBlockIndex(mlir::Type, llvm::ArrayRef<mlir::Value>);
53 mlir::Value genClusterDimBlocks(mlir::Type, llvm::ArrayRef<mlir::Value>);
55 genCUDASetDefaultStream(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
57 genCUDASetDefaultStreamArray(mlir::Type,
60 genCUDAGetDefaultStreamArg(mlir::Type,
62 mlir::Value genCUDAGetDefaultStreamNull(mlir::Type,
65 genCUDAStreamSynchronize(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
66 mlir::Value genCUDAStreamSynchronizeNull(mlir::Type,
68 fir::ExtendedValue genCUDAStreamDestroy(mlir::Type,
70 void genFenceProxyAsync(llvm::ArrayRef<fir::ExtendedValue>);
71 template <const char *fctName, int extent>
72 fir::ExtendedValue genLDXXFunc(mlir::Type,
74 mlir::Value genMatchAllSync(mlir::Type, llvm::ArrayRef<mlir::Value>);
75 mlir::Value genMatchAnySync(mlir::Type, llvm::ArrayRef<mlir::Value>);
76 template <typename OpTy>
77 mlir::Value genNVVMTime(mlir::Type, llvm::ArrayRef<mlir::Value>);
78 void genSyncThreads(llvm::ArrayRef<fir::ExtendedValue>);
79 mlir::Value genSyncThreadsAnd(mlir::Type, llvm::ArrayRef<mlir::Value>);
80 mlir::Value genSyncThreadsCount(mlir::Type, llvm::ArrayRef<mlir::Value>);
81 mlir::Value genSyncThreadsOr(mlir::Type, llvm::ArrayRef<mlir::Value>);
82 void genSyncWarp(llvm::ArrayRef<fir::ExtendedValue>);
83 mlir::Value genThisCluster(mlir::Type, llvm::ArrayRef<mlir::Value>);
84 mlir::Value genThisGrid(mlir::Type, llvm::ArrayRef<mlir::Value>);
85 mlir::Value genThisThreadBlock(mlir::Type, llvm::ArrayRef<mlir::Value>);
86 mlir::Value genThisWarp(mlir::Type, llvm::ArrayRef<mlir::Value>);
87 template <mlir::NVVM::MemScopeKind scope>
88 void genThreadFence(llvm::ArrayRef<fir::ExtendedValue>);
89 void genTMABulkCommitGroup(llvm::ArrayRef<fir::ExtendedValue>);
90 void genTMABulkG2S(llvm::ArrayRef<fir::ExtendedValue>);
91 void genTMABulkLoadC4(llvm::ArrayRef<fir::ExtendedValue>);
92 void genTMABulkLoadC8(llvm::ArrayRef<fir::ExtendedValue>);
93 void genTMABulkLoadI4(llvm::ArrayRef<fir::ExtendedValue>);
94 void genTMABulkLoadI8(llvm::ArrayRef<fir::ExtendedValue>);
95 void genTMABulkLoadR2(llvm::ArrayRef<fir::ExtendedValue>);
96 void genTMABulkLoadR4(llvm::ArrayRef<fir::ExtendedValue>);
97 void genTMABulkLoadR8(llvm::ArrayRef<fir::ExtendedValue>);
98 void genTMABulkS2G(llvm::ArrayRef<fir::ExtendedValue>);
99 void genTMABulkStoreC4(llvm::ArrayRef<fir::ExtendedValue>);
100 void genTMABulkStoreC8(llvm::ArrayRef<fir::ExtendedValue>);
101 void genTMABulkStoreI4(llvm::ArrayRef<fir::ExtendedValue>);
102 void genTMABulkStoreI8(llvm::ArrayRef<fir::ExtendedValue>);
103 void genTMABulkStoreR2(llvm::ArrayRef<fir::ExtendedValue>);
104 void genTMABulkStoreR4(llvm::ArrayRef<fir::ExtendedValue>);
105 void genTMABulkStoreR8(llvm::ArrayRef<fir::ExtendedValue>);
106 void genTMABulkWaitGroup(llvm::ArrayRef<fir::ExtendedValue>);
107 template <mlir::NVVM::VoteSyncKind kind>
108 mlir::Value genVoteSync(mlir::Type, llvm::ArrayRef<mlir::Value>);
109};
110
111const IntrinsicHandler *findCUDAIntrinsicHandler(llvm::StringRef name);
112
113} // namespace fir
114
115#endif // FORTRAN_LOWER_CUDAINTRINSICCALL_H
Definition BoxValue.h:478
Definition FIRBuilder.h:56
Definition FIRType.h:92
Definition AbstractConverter.h:37
Definition IntrinsicCall.h:543