Remove support for building with LLVM 10

We no longer support building with older branches of OE-Core/meta-clang
so remove LLVM 10 specific configurations and patches.

Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
This commit is contained in:
Anuj Mittal 2022-04-05 10:34:33 +08:00
parent 7bb49b1ae1
commit c993e8e815
28 changed files with 2 additions and 14764 deletions

View File

@ -21,10 +21,8 @@ PREFERRED_PROVIDER_libva-utils = "libva-intel-utils"
PREFERRED_PROVIDER_libva-utils-native = "libva-intel-utils-native"
PREFERRED_PROVIDER_nativesdk-libva-utils = "nativesdk-libva-intel-utils"
PREFERRED_VERSION_opencl-clang ?= "${@bb.utils.contains('LLVMVERSION', '10.0.1', '10.0.0', \
bb.utils.contains('LLVMVERSION', '12.0.0', '12.0.0', '13.0.0', d), d)}"
PREFERRED_VERSION_opencl-clang-native ?= "${@bb.utils.contains('LLVMVERSION', '10.0.1', '10.0.0', \
bb.utils.contains('LLVMVERSION', '12.0.0', '12.0.0', '13.0.0', d), d)}"
PREFERRED_VERSION_opencl-clang ?= "${@bb.utils.contains('LLVMVERSION', '12.0.0', '12.0.0', '13.0.0', d)}"
PREFERRED_VERSION_opencl-clang-native ?= "${@bb.utils.contains('LLVMVERSION', '12.0.0', '12.0.0', '13.0.0', d)}"
XSERVER_X86_ASPEED_AST = "xf86-video-ast \
"

View File

@ -1,111 +0,0 @@
From eeb816d95f0910bd246e37bb2bb3923acf0edf6b Mon Sep 17 00:00:00 2001
From: Aleksander Us <aleksander.us@intel.com>
Date: Mon, 26 Aug 2019 15:47:41 +0300
Subject: [PATCH] [BasicBlockUtils] Add metadata fixing in
SplitBlockPredecessors.
In case when BB is header of some loop and predecessor is latch of
this loop, metadata was not attached to newly created basic block.
This led to loss of loop metadata for other passes.
Upstream-Status: Submitted [https://reviews.llvm.org/D66892]
https://github.com/intel/llvm-patches/commit/8af4449e2d201707f7f2f832b473a0439e255f32
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
lib/Transforms/Utils/BasicBlockUtils.cpp | 23 ++++++++----
test/Transforms/LoopSimplify/loop_metadata.ll | 36 +++++++++++++++++++
2 files changed, 52 insertions(+), 7 deletions(-)
create mode 100644 test/Transforms/LoopSimplify/loop_metadata.ll
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 5fa371377c8..3a90ae061fb 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -579,24 +579,33 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// The new block unconditionally branches to the old block.
BranchInst *BI = BranchInst::Create(BB, NewBB);
+ bool IsBBHeader = LI && LI->isLoopHeader(BB);
+ Loop *BBLoop = LI ? LI->getLoopFor(BB) : nullptr;
// Splitting the predecessors of a loop header creates a preheader block.
- if (LI && LI->isLoopHeader(BB))
+ if (IsBBHeader)
// Using the loop start line number prevents debuggers stepping into the
// loop body for this instruction.
- BI->setDebugLoc(LI->getLoopFor(BB)->getStartLoc());
+ BI->setDebugLoc(BBLoop->getStartLoc());
else
BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc());
// Move the edges from Preds to point to NewBB instead of BB.
- for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ for (BasicBlock *Pred : Preds) {
+ Instruction *PI = Pred->getTerminator();
// This is slightly more strict than necessary; the minimum requirement
// is that there be no more than one indirectbr branching to BB. And
// all BlockAddress uses would need to be updated.
- assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
+ assert(!isa<IndirectBrInst>(PI) &&
"Cannot split an edge from an IndirectBrInst");
- assert(!isa<CallBrInst>(Preds[i]->getTerminator()) &&
- "Cannot split an edge from a CallBrInst");
- Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
+ assert(!isa<CallBrInst>(PI) && "Cannot split an edge from a CallBrInst");
+ if (IsBBHeader && BBLoop->contains(Pred) && BBLoop->isLoopLatch(Pred)) {
+ // Update loop metadata if it exists.
+ if (MDNode *LoopMD = PI->getMetadata(LLVMContext::MD_loop)) {
+ BI->setMetadata(LLVMContext::MD_loop, LoopMD);
+ PI->setMetadata(LLVMContext::MD_loop, nullptr);
+ }
+ }
+ PI->replaceUsesOfWith(BB, NewBB);
}
// Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
diff --git a/test/Transforms/LoopSimplify/loop_metadata.ll b/test/Transforms/LoopSimplify/loop_metadata.ll
new file mode 100644
index 00000000000..c15c92fe3ae
--- /dev/null
+++ b/test/Transforms/LoopSimplify/loop_metadata.ll
@@ -0,0 +1,36 @@
+; RUN: opt -S -loop-simplify < %s | FileCheck %s
+
+; CHECK: for.cond.loopexit:
+; CHECK: br label %for.cond, !llvm.loop !0
+; CHECK: br i1 %cmp1, label %for.body1, label %for.cond.loopexit
+
+define void @foo() {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.cond1, %entry
+ %j = phi i32 [ 0, %entry ], [ %add, %for.cond1 ]
+ %cmp = icmp ult i32 %j, 8
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %dummy1 = add i32 1, 1
+ %add = add nuw nsw i32 %j, 1
+ br label %for.cond1
+
+for.cond1: ; preds = %for.body1, %for.body
+ %i.0 = phi i32 [ 1, %for.body ], [ %inc, %for.body1 ]
+ %cmp1 = icmp ult i32 %i.0, 8
+ br i1 %cmp1, label %for.body1, label %for.cond, !llvm.loop !0
+
+for.body1: ; preds = %for.cond1
+ %dummy2 = add i32 1, 1
+ %inc = add nuw nsw i32 %i.0, 1
+ br label %for.cond1
+
+for.end: ; preds = %for.cond
+ ret void
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.unroll.full"}
--
2.18.0

View File

@ -1,146 +0,0 @@
From 35e218a886f4c066eabd18685240d55270bd5a6d Mon Sep 17 00:00:00 2001
From: Aleksander Us <aleksander.us@intel.com>
Date: Mon, 26 Aug 2019 15:45:47 +0300
Subject: [PATCH] [IndVarSimplify] Do not use SCEV expander for IVCount in
LFTR when possible.
SCEV analysis cannot properly cache instruction with poison flags
(for example, add nsw outside of loop will not be reused by expander).
This can lead to generating of additional instructions by SCEV expander.
Example IR:
...
%maxval = add nuw nsw i32 %a1, %a2
...
for.body:
...
%cmp22 = icmp ult i32 %ivadd, %maxval
br i1 %cmp22, label %for.body, label %for.end
...
SCEV expander will generate copy of %maxval in preheader but without
nuw/nsw flags. This can be avoided by explicit check that iv count
value gives the same SCEV expressions as calculated by LFTR.
Upstream-Status: Submitted [https://reviews.llvm.org/D66890]
https://github.com/intel/llvm-patches/commit/fd6a6c97341a56fd21bc32bc940afea751312e8f
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
lib/Transforms/Scalar/IndVarSimplify.cpp | 12 +++++++++-
test/Transforms/IndVarSimplify/add_nsw.ll | 23 ++++++++++++++++++++
test/Transforms/IndVarSimplify/lftr-reuse.ll | 9 +++-----
test/Transforms/IndVarSimplify/udiv.ll | 1 +
4 files changed, 38 insertions(+), 7 deletions(-)
create mode 100644 test/Transforms/IndVarSimplify/add_nsw.ll
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index f9fc698a4a9..5e04dac8aa6 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -2375,6 +2375,17 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
if (UsePostInc)
IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
+ // If computed limit is equal to old limit then do not use SCEV expander
+ // because it can lost NUW/NSW flags and create extra instructions.
+ BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
+ if (ICmpInst *Cmp = dyn_cast<ICmpInst>(BI->getOperand(0))) {
+ Value *Limit = Cmp->getOperand(0);
+ if (!L->isLoopInvariant(Limit))
+ Limit = Cmp->getOperand(1);
+ if (SE->getSCEV(Limit) == IVLimit)
+ return Limit;
+ }
+
// Expand the code for the iteration count.
assert(SE->isLoopInvariant(IVLimit, L) &&
"Computed iteration count is not loop invariant!");
@@ -2383,7 +2394,6 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
// SCEV expression (IVInit) for a pointer type IV value (IndVar).
Type *LimitTy = ExitCount->getType()->isPointerTy() ?
IndVar->getType() : ExitCount->getType();
- BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
}
}
diff --git a/test/Transforms/IndVarSimplify/add_nsw.ll b/test/Transforms/IndVarSimplify/add_nsw.ll
new file mode 100644
index 00000000000..abd1cbb6c51
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/add_nsw.ll
@@ -0,0 +1,23 @@
+; RUN: opt -indvars -S %s | FileCheck %s
+
+target datalayout = "e-p:32:32-i64:64-n8:16:32"
+
+; CHECK: for.body.preheader:
+; CHECK-NOT: add
+; CHECK: for.body:
+
+define void @foo(i32 %a1, i32 %a2) {
+entry:
+ %maxval = add nuw nsw i32 %a1, %a2
+ %cmp = icmp slt i32 %maxval, 1
+ br i1 %cmp, label %for.end, label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %j.02 = phi i32 [ 0, %entry ], [ %add31, %for.body ]
+ %add31 = add nuw nsw i32 %j.02, 1
+ %cmp22 = icmp slt i32 %add31, %maxval
+ br i1 %cmp22, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll
index 14ae9738696..509d662b767 100644
--- a/test/Transforms/IndVarSimplify/lftr-reuse.ll
+++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll
@@ -67,11 +67,9 @@ define void @expandOuterRecurrence(i32 %arg) nounwind {
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]]
; CHECK-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]]
; CHECK: outer.preheader:
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG]], -1
; CHECK-NEXT: br label [[OUTER:%.*]]
; CHECK: outer:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[TMP0]], [[OUTER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[OUTER_INC:%.*]] ]
-; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC]] ], [ 0, [[OUTER_PREHEADER]] ]
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ], [ 0, [[OUTER_PREHEADER]] ]
; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]]
; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]]
@@ -81,14 +79,13 @@ define void @expandOuterRecurrence(i32 %arg) nounwind {
; CHECK: inner:
; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[INNER_PH]] ], [ [[J_INC:%.*]], [[INNER]] ]
; CHECK-NEXT: [[J_INC]] = add nuw nsw i32 [[J]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[INDVARS_IV]]
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[SUB3]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]]
; CHECK: outer.inc.loopexit:
; CHECK-NEXT: br label [[OUTER_INC]]
; CHECK: outer.inc:
; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1
-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], -1
-; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[TMP0]]
+; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[SUB1]]
; CHECK-NEXT: br i1 [[EXITCOND1]], label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
diff --git a/test/Transforms/IndVarSimplify/udiv.ll b/test/Transforms/IndVarSimplify/udiv.ll
index b3f2c2a6a66..3530343ef4a 100644
--- a/test/Transforms/IndVarSimplify/udiv.ll
+++ b/test/Transforms/IndVarSimplify/udiv.ll
@@ -133,6 +133,7 @@ declare i32 @printf(i8* nocapture, ...) nounwind
; CHECK-LABEL: @foo(
; CHECK: for.body.preheader:
; CHECK-NOT: udiv
+; CHECK: for.body:
define void @foo(double* %p, i64 %n) nounwind {
entry:
--
2.18.0

View File

@ -1,51 +0,0 @@
From 661021749a168c423d69d0ba7cdfa16fed860836 Mon Sep 17 00:00:00 2001
From: Naveen Saini <naveen.kumar.saini@intel.com>
Date: Wed, 21 Aug 2019 14:35:31 +0800
Subject: [PATCH 1/3] llvm-spirv: skip building tests
Some of these need clang to be built and since we're building this in-tree,
that leads to problems when compiling libcxx, compiler-rt which aren't built
in-tree.
Instead of using SPIRV_SKIP_CLANG_BUILD to skip clang build and adding this to
all components, disable the building of tests altogether.
Upstream-Status: Inappropriate
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
CMakeLists.txt | 10 ----------
1 file changed, 10 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 92c50370..80999c98 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -25,13 +25,6 @@ if(LLVM_SPIRV_BUILD_EXTERNAL)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
- if(LLVM_SPIRV_INCLUDE_TESTS)
- set(LLVM_TEST_COMPONENTS
- llvm-as
- llvm-dis
- )
- endif(LLVM_SPIRV_INCLUDE_TESTS)
-
find_package(LLVM 10.0.0 REQUIRED
COMPONENTS
Analysis
@@ -63,9 +56,6 @@ set(LLVM_SPIRV_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include)
add_subdirectory(lib/SPIRV)
add_subdirectory(tools/llvm-spirv)
-if(LLVM_SPIRV_INCLUDE_TESTS)
- add_subdirectory(test)
-endif(LLVM_SPIRV_INCLUDE_TESTS)
install(
FILES
--
2.17.1

View File

@ -1,812 +0,0 @@
From 3f544cfe44ee5f113a3fb554aca2cf5d64996062 Mon Sep 17 00:00:00 2001
From: Naveen Saini <naveen.kumar.saini@intel.com>
Date: Wed, 7 Apr 2021 16:38:38 +0800
Subject: [PATCH 2/7] Add cl_khr_extended_subgroup extensions.
Added extensions and their function declarations into
the standard header.
Patch by Piotr Fusik!
Tags: #clang
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/4a4402f0d72167477a6252e4c3daf5089ebc8f9a]
Signed-off-by: Anastasia Stulova <anastasia.stulova@arm.com>
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
.../include/clang/Basic/OpenCLExtensions.def | 7 +
clang/lib/Headers/opencl-c.h | 668 ++++++++++++++++++
clang/test/SemaOpenCL/extension-version.cl | 83 +++
3 files changed, 758 insertions(+)
diff --git a/clang/include/clang/Basic/OpenCLExtensions.def b/clang/include/clang/Basic/OpenCLExtensions.def
index 608f78a13eef..d1574164f9b2 100644
--- a/clang/include/clang/Basic/OpenCLExtensions.def
+++ b/clang/include/clang/Basic/OpenCLExtensions.def
@@ -74,6 +74,13 @@ OPENCLEXT_INTERNAL(cl_khr_mipmap_image_writes, 200, ~0U)
OPENCLEXT_INTERNAL(cl_khr_srgb_image_writes, 200, ~0U)
OPENCLEXT_INTERNAL(cl_khr_subgroups, 200, ~0U)
OPENCLEXT_INTERNAL(cl_khr_terminate_context, 200, ~0U)
+OPENCLEXT_INTERNAL(cl_khr_subgroup_extended_types, 200, ~0U)
+OPENCLEXT_INTERNAL(cl_khr_subgroup_non_uniform_vote, 200, ~0U)
+OPENCLEXT_INTERNAL(cl_khr_subgroup_ballot, 200, ~0U)
+OPENCLEXT_INTERNAL(cl_khr_subgroup_non_uniform_arithmetic, 200, ~0U)
+OPENCLEXT_INTERNAL(cl_khr_subgroup_shuffle, 200, ~0U)
+OPENCLEXT_INTERNAL(cl_khr_subgroup_shuffle_relative, 200, ~0U)
+OPENCLEXT_INTERNAL(cl_khr_subgroup_clustered_reduce, 200, ~0U)
// Clang Extensions.
OPENCLEXT_INTERNAL(cl_clang_storage_class_specifiers, 100, ~0U)
diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index 93a946cec5b1..67d900eb1c3d 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -17530,6 +17530,674 @@ double __ovld __conv sub_group_scan_inclusive_max(double x);
#endif //cl_khr_subgroups cl_intel_subgroups
+#if defined(cl_khr_subgroup_extended_types)
+char __ovld __conv sub_group_broadcast( char value, uint index );
+char2 __ovld __conv sub_group_broadcast( char2 value, uint index );
+char3 __ovld __conv sub_group_broadcast( char3 value, uint index );
+char4 __ovld __conv sub_group_broadcast( char4 value, uint index );
+char8 __ovld __conv sub_group_broadcast( char8 value, uint index );
+char16 __ovld __conv sub_group_broadcast( char16 value, uint index );
+
+uchar __ovld __conv sub_group_broadcast( uchar value, uint index );
+uchar2 __ovld __conv sub_group_broadcast( uchar2 value, uint index );
+uchar3 __ovld __conv sub_group_broadcast( uchar3 value, uint index );
+uchar4 __ovld __conv sub_group_broadcast( uchar4 value, uint index );
+uchar8 __ovld __conv sub_group_broadcast( uchar8 value, uint index );
+uchar16 __ovld __conv sub_group_broadcast( uchar16 value, uint index );
+
+short __ovld __conv sub_group_broadcast( short value, uint index );
+short2 __ovld __conv sub_group_broadcast( short2 value, uint index );
+short3 __ovld __conv sub_group_broadcast( short3 value, uint index );
+short4 __ovld __conv sub_group_broadcast( short4 value, uint index );
+short8 __ovld __conv sub_group_broadcast( short8 value, uint index );
+short16 __ovld __conv sub_group_broadcast( short16 value, uint index );
+
+ushort __ovld __conv sub_group_broadcast( ushort value, uint index );
+ushort2 __ovld __conv sub_group_broadcast( ushort2 value, uint index );
+ushort3 __ovld __conv sub_group_broadcast( ushort3 value, uint index );
+ushort4 __ovld __conv sub_group_broadcast( ushort4 value, uint index );
+ushort8 __ovld __conv sub_group_broadcast( ushort8 value, uint index );
+ushort16 __ovld __conv sub_group_broadcast( ushort16 value, uint index );
+
+// scalar int broadcast is part of cl_khr_subgroups
+int2 __ovld __conv sub_group_broadcast( int2 value, uint index );
+int3 __ovld __conv sub_group_broadcast( int3 value, uint index );
+int4 __ovld __conv sub_group_broadcast( int4 value, uint index );
+int8 __ovld __conv sub_group_broadcast( int8 value, uint index );
+int16 __ovld __conv sub_group_broadcast( int16 value, uint index );
+
+// scalar uint broadcast is part of cl_khr_subgroups
+uint2 __ovld __conv sub_group_broadcast( uint2 value, uint index );
+uint3 __ovld __conv sub_group_broadcast( uint3 value, uint index );
+uint4 __ovld __conv sub_group_broadcast( uint4 value, uint index );
+uint8 __ovld __conv sub_group_broadcast( uint8 value, uint index );
+uint16 __ovld __conv sub_group_broadcast( uint16 value, uint index );
+
+// scalar long broadcast is part of cl_khr_subgroups
+long2 __ovld __conv sub_group_broadcast( long2 value, uint index );
+long3 __ovld __conv sub_group_broadcast( long3 value, uint index );
+long4 __ovld __conv sub_group_broadcast( long4 value, uint index );
+long8 __ovld __conv sub_group_broadcast( long8 value, uint index );
+long16 __ovld __conv sub_group_broadcast( long16 value, uint index );
+
+// scalar ulong broadcast is part of cl_khr_subgroups
+ulong2 __ovld __conv sub_group_broadcast( ulong2 value, uint index );
+ulong3 __ovld __conv sub_group_broadcast( ulong3 value, uint index );
+ulong4 __ovld __conv sub_group_broadcast( ulong4 value, uint index );
+ulong8 __ovld __conv sub_group_broadcast( ulong8 value, uint index );
+ulong16 __ovld __conv sub_group_broadcast( ulong16 value, uint index );
+
+// scalar float broadcast is part of cl_khr_subgroups
+float2 __ovld __conv sub_group_broadcast( float2 value, uint index );
+float3 __ovld __conv sub_group_broadcast( float3 value, uint index );
+float4 __ovld __conv sub_group_broadcast( float4 value, uint index );
+float8 __ovld __conv sub_group_broadcast( float8 value, uint index );
+float16 __ovld __conv sub_group_broadcast( float16 value, uint index );
+
+char __ovld __conv sub_group_reduce_add( char value );
+uchar __ovld __conv sub_group_reduce_add( uchar value );
+short __ovld __conv sub_group_reduce_add( short value );
+ushort __ovld __conv sub_group_reduce_add( ushort value );
+
+char __ovld __conv sub_group_reduce_min( char value );
+uchar __ovld __conv sub_group_reduce_min( uchar value );
+short __ovld __conv sub_group_reduce_min( short value );
+ushort __ovld __conv sub_group_reduce_min( ushort value );
+
+char __ovld __conv sub_group_reduce_max( char value );
+uchar __ovld __conv sub_group_reduce_max( uchar value );
+short __ovld __conv sub_group_reduce_max( short value );
+ushort __ovld __conv sub_group_reduce_max( ushort value );
+
+char __ovld __conv sub_group_scan_inclusive_add( char value );
+uchar __ovld __conv sub_group_scan_inclusive_add( uchar value );
+short __ovld __conv sub_group_scan_inclusive_add( short value );
+ushort __ovld __conv sub_group_scan_inclusive_add( ushort value );
+
+char __ovld __conv sub_group_scan_inclusive_min( char value );
+uchar __ovld __conv sub_group_scan_inclusive_min( uchar value );
+short __ovld __conv sub_group_scan_inclusive_min( short value );
+ushort __ovld __conv sub_group_scan_inclusive_min( ushort value );
+
+char __ovld __conv sub_group_scan_inclusive_max( char value );
+uchar __ovld __conv sub_group_scan_inclusive_max( uchar value );
+short __ovld __conv sub_group_scan_inclusive_max( short value );
+ushort __ovld __conv sub_group_scan_inclusive_max( ushort value );
+
+char __ovld __conv sub_group_scan_exclusive_add( char value );
+uchar __ovld __conv sub_group_scan_exclusive_add( uchar value );
+short __ovld __conv sub_group_scan_exclusive_add( short value );
+ushort __ovld __conv sub_group_scan_exclusive_add( ushort value );
+
+char __ovld __conv sub_group_scan_exclusive_min( char value );
+uchar __ovld __conv sub_group_scan_exclusive_min( uchar value );
+short __ovld __conv sub_group_scan_exclusive_min( short value );
+ushort __ovld __conv sub_group_scan_exclusive_min( ushort value );
+
+char __ovld __conv sub_group_scan_exclusive_max( char value );
+uchar __ovld __conv sub_group_scan_exclusive_max( uchar value );
+short __ovld __conv sub_group_scan_exclusive_max( short value );
+ushort __ovld __conv sub_group_scan_exclusive_max( ushort value );
+
+#if defined(cl_khr_fp16)
+// scalar half broadcast is part of cl_khr_subgroups
+half2 __ovld __conv sub_group_broadcast( half2 value, uint index );
+half3 __ovld __conv sub_group_broadcast( half3 value, uint index );
+half4 __ovld __conv sub_group_broadcast( half4 value, uint index );
+half8 __ovld __conv sub_group_broadcast( half8 value, uint index );
+half16 __ovld __conv sub_group_broadcast( half16 value, uint index );
+#endif // cl_khr_fp16
+
+#if defined(cl_khr_fp64)
+// scalar double broadcast is part of cl_khr_subgroups
+double2 __ovld __conv sub_group_broadcast( double2 value, uint index );
+double3 __ovld __conv sub_group_broadcast( double3 value, uint index );
+double4 __ovld __conv sub_group_broadcast( double4 value, uint index );
+double8 __ovld __conv sub_group_broadcast( double8 value, uint index );
+double16 __ovld __conv sub_group_broadcast( double16 value, uint index );
+#endif // cl_khr_fp64
+
+#endif // cl_khr_subgroup_extended_types
+
+#if defined(cl_khr_subgroup_non_uniform_vote)
+int __ovld sub_group_elect(void);
+int __ovld sub_group_non_uniform_all( int predicate );
+int __ovld sub_group_non_uniform_any( int predicate );
+
+int __ovld sub_group_non_uniform_all_equal( char value );
+int __ovld sub_group_non_uniform_all_equal( uchar value );
+int __ovld sub_group_non_uniform_all_equal( short value );
+int __ovld sub_group_non_uniform_all_equal( ushort value );
+int __ovld sub_group_non_uniform_all_equal( int value );
+int __ovld sub_group_non_uniform_all_equal( uint value );
+int __ovld sub_group_non_uniform_all_equal( long value );
+int __ovld sub_group_non_uniform_all_equal( ulong value );
+int __ovld sub_group_non_uniform_all_equal( float value );
+
+#if defined(cl_khr_fp16)
+int __ovld sub_group_non_uniform_all_equal( half value );
+#endif // cl_khr_fp16
+
+#if defined(cl_khr_fp64)
+int __ovld sub_group_non_uniform_all_equal( double value );
+#endif // cl_khr_fp64
+
+#endif // cl_khr_subgroup_non_uniform_vote
+
+#if defined(cl_khr_subgroup_ballot)
+char __ovld sub_group_non_uniform_broadcast( char value, uint index );
+char2 __ovld sub_group_non_uniform_broadcast( char2 value, uint index );
+char3 __ovld sub_group_non_uniform_broadcast( char3 value, uint index );
+char4 __ovld sub_group_non_uniform_broadcast( char4 value, uint index );
+char8 __ovld sub_group_non_uniform_broadcast( char8 value, uint index );
+char16 __ovld sub_group_non_uniform_broadcast( char16 value, uint index );
+
+uchar __ovld sub_group_non_uniform_broadcast( uchar value, uint index );
+uchar2 __ovld sub_group_non_uniform_broadcast( uchar2 value, uint index );
+uchar3 __ovld sub_group_non_uniform_broadcast( uchar3 value, uint index );
+uchar4 __ovld sub_group_non_uniform_broadcast( uchar4 value, uint index );
+uchar8 __ovld sub_group_non_uniform_broadcast( uchar8 value, uint index );
+uchar16 __ovld sub_group_non_uniform_broadcast( uchar16 value, uint index );
+
+short __ovld sub_group_non_uniform_broadcast( short value, uint index );
+short2 __ovld sub_group_non_uniform_broadcast( short2 value, uint index );
+short3 __ovld sub_group_non_uniform_broadcast( short3 value, uint index );
+short4 __ovld sub_group_non_uniform_broadcast( short4 value, uint index );
+short8 __ovld sub_group_non_uniform_broadcast( short8 value, uint index );
+short16 __ovld sub_group_non_uniform_broadcast( short16 value, uint index );
+
+ushort __ovld sub_group_non_uniform_broadcast( ushort value, uint index );
+ushort2 __ovld sub_group_non_uniform_broadcast( ushort2 value, uint index );
+ushort3 __ovld sub_group_non_uniform_broadcast( ushort3 value, uint index );
+ushort4 __ovld sub_group_non_uniform_broadcast( ushort4 value, uint index );
+ushort8 __ovld sub_group_non_uniform_broadcast( ushort8 value, uint index );
+ushort16 __ovld sub_group_non_uniform_broadcast( ushort16 value, uint index );
+
+int __ovld sub_group_non_uniform_broadcast( int value, uint index );
+int2 __ovld sub_group_non_uniform_broadcast( int2 value, uint index );
+int3 __ovld sub_group_non_uniform_broadcast( int3 value, uint index );
+int4 __ovld sub_group_non_uniform_broadcast( int4 value, uint index );
+int8 __ovld sub_group_non_uniform_broadcast( int8 value, uint index );
+int16 __ovld sub_group_non_uniform_broadcast( int16 value, uint index );
+
+uint __ovld sub_group_non_uniform_broadcast( uint value, uint index );
+uint2 __ovld sub_group_non_uniform_broadcast( uint2 value, uint index );
+uint3 __ovld sub_group_non_uniform_broadcast( uint3 value, uint index );
+uint4 __ovld sub_group_non_uniform_broadcast( uint4 value, uint index );
+uint8 __ovld sub_group_non_uniform_broadcast( uint8 value, uint index );
+uint16 __ovld sub_group_non_uniform_broadcast( uint16 value, uint index );
+
+long __ovld sub_group_non_uniform_broadcast( long value, uint index );
+long2 __ovld sub_group_non_uniform_broadcast( long2 value, uint index );
+long3 __ovld sub_group_non_uniform_broadcast( long3 value, uint index );
+long4 __ovld sub_group_non_uniform_broadcast( long4 value, uint index );
+long8 __ovld sub_group_non_uniform_broadcast( long8 value, uint index );
+long16 __ovld sub_group_non_uniform_broadcast( long16 value, uint index );
+
+ulong __ovld sub_group_non_uniform_broadcast( ulong value, uint index );
+ulong2 __ovld sub_group_non_uniform_broadcast( ulong2 value, uint index );
+ulong3 __ovld sub_group_non_uniform_broadcast( ulong3 value, uint index );
+ulong4 __ovld sub_group_non_uniform_broadcast( ulong4 value, uint index );
+ulong8 __ovld sub_group_non_uniform_broadcast( ulong8 value, uint index );
+ulong16 __ovld sub_group_non_uniform_broadcast( ulong16 value, uint index );
+
+float __ovld sub_group_non_uniform_broadcast( float value, uint index );
+float2 __ovld sub_group_non_uniform_broadcast( float2 value, uint index );
+float3 __ovld sub_group_non_uniform_broadcast( float3 value, uint index );
+float4 __ovld sub_group_non_uniform_broadcast( float4 value, uint index );
+float8 __ovld sub_group_non_uniform_broadcast( float8 value, uint index );
+float16 __ovld sub_group_non_uniform_broadcast( float16 value, uint index );
+
+char __ovld sub_group_broadcast_first( char value );
+uchar __ovld sub_group_broadcast_first( uchar value );
+short __ovld sub_group_broadcast_first( short value );
+ushort __ovld sub_group_broadcast_first( ushort value );
+int __ovld sub_group_broadcast_first( int value );
+uint __ovld sub_group_broadcast_first( uint value );
+long __ovld sub_group_broadcast_first( long value );
+ulong __ovld sub_group_broadcast_first( ulong value );
+float __ovld sub_group_broadcast_first( float value );
+
+uint4 __ovld sub_group_ballot( int predicate );
+int __ovld __cnfn sub_group_inverse_ballot( uint4 value );
+int __ovld __cnfn sub_group_ballot_bit_extract( uint4 value, uint index );
+uint __ovld __cnfn sub_group_ballot_bit_count( uint4 value );
+
+uint __ovld sub_group_ballot_inclusive_scan( uint4 value );
+uint __ovld sub_group_ballot_exclusive_scan( uint4 value );
+uint __ovld sub_group_ballot_find_lsb( uint4 value );
+uint __ovld sub_group_ballot_find_msb( uint4 value );
+
+uint4 __ovld __cnfn get_sub_group_eq_mask(void);
+uint4 __ovld __cnfn get_sub_group_ge_mask(void);
+uint4 __ovld __cnfn get_sub_group_gt_mask(void);
+uint4 __ovld __cnfn get_sub_group_le_mask(void);
+uint4 __ovld __cnfn get_sub_group_lt_mask(void);
+
+#if defined(cl_khr_fp16)
+half __ovld sub_group_non_uniform_broadcast( half value, uint index );
+half2 __ovld sub_group_non_uniform_broadcast( half2 value, uint index );
+half3 __ovld sub_group_non_uniform_broadcast( half3 value, uint index );
+half4 __ovld sub_group_non_uniform_broadcast( half4 value, uint index );
+half8 __ovld sub_group_non_uniform_broadcast( half8 value, uint index );
+half16 __ovld sub_group_non_uniform_broadcast( half16 value, uint index );
+
+half __ovld sub_group_broadcast_first( half value );
+#endif // cl_khr_fp16
+
+#if defined(cl_khr_fp64)
+double __ovld sub_group_non_uniform_broadcast( double value, uint index );
+double2 __ovld sub_group_non_uniform_broadcast( double2 value, uint index );
+double3 __ovld sub_group_non_uniform_broadcast( double3 value, uint index );
+double4 __ovld sub_group_non_uniform_broadcast( double4 value, uint index );
+double8 __ovld sub_group_non_uniform_broadcast( double8 value, uint index );
+double16 __ovld sub_group_non_uniform_broadcast( double16 value, uint index );
+
+double __ovld sub_group_broadcast_first( double value );
+#endif // cl_khr_fp64
+
+#endif // cl_khr_subgroup_ballot
+
+#if defined(cl_khr_subgroup_non_uniform_arithmetic)
+char __ovld sub_group_non_uniform_reduce_add( char value );
+uchar __ovld sub_group_non_uniform_reduce_add( uchar value );
+short __ovld sub_group_non_uniform_reduce_add( short value );
+ushort __ovld sub_group_non_uniform_reduce_add( ushort value );
+int __ovld sub_group_non_uniform_reduce_add( int value );
+uint __ovld sub_group_non_uniform_reduce_add( uint value );
+long __ovld sub_group_non_uniform_reduce_add( long value );
+ulong __ovld sub_group_non_uniform_reduce_add( ulong value );
+float __ovld sub_group_non_uniform_reduce_add( float value );
+
+char __ovld sub_group_non_uniform_reduce_mul( char value );
+uchar __ovld sub_group_non_uniform_reduce_mul( uchar value );
+short __ovld sub_group_non_uniform_reduce_mul( short value );
+ushort __ovld sub_group_non_uniform_reduce_mul( ushort value );
+int __ovld sub_group_non_uniform_reduce_mul( int value );
+uint __ovld sub_group_non_uniform_reduce_mul( uint value );
+long __ovld sub_group_non_uniform_reduce_mul( long value );
+ulong __ovld sub_group_non_uniform_reduce_mul( ulong value );
+float __ovld sub_group_non_uniform_reduce_mul( float value );
+
+char __ovld sub_group_non_uniform_reduce_min( char value );
+uchar __ovld sub_group_non_uniform_reduce_min( uchar value );
+short __ovld sub_group_non_uniform_reduce_min( short value );
+ushort __ovld sub_group_non_uniform_reduce_min( ushort value );
+int __ovld sub_group_non_uniform_reduce_min( int value );
+uint __ovld sub_group_non_uniform_reduce_min( uint value );
+long __ovld sub_group_non_uniform_reduce_min( long value );
+ulong __ovld sub_group_non_uniform_reduce_min( ulong value );
+float __ovld sub_group_non_uniform_reduce_min( float value );
+
+char __ovld sub_group_non_uniform_reduce_max( char value );
+uchar __ovld sub_group_non_uniform_reduce_max( uchar value );
+short __ovld sub_group_non_uniform_reduce_max( short value );
+ushort __ovld sub_group_non_uniform_reduce_max( ushort value );
+int __ovld sub_group_non_uniform_reduce_max( int value );
+uint __ovld sub_group_non_uniform_reduce_max( uint value );
+long __ovld sub_group_non_uniform_reduce_max( long value );
+ulong __ovld sub_group_non_uniform_reduce_max( ulong value );
+float __ovld sub_group_non_uniform_reduce_max( float value );
+
+char __ovld sub_group_non_uniform_scan_inclusive_add( char value );
+uchar __ovld sub_group_non_uniform_scan_inclusive_add( uchar value );
+short __ovld sub_group_non_uniform_scan_inclusive_add( short value );
+ushort __ovld sub_group_non_uniform_scan_inclusive_add( ushort value );
+int __ovld sub_group_non_uniform_scan_inclusive_add( int value );
+uint __ovld sub_group_non_uniform_scan_inclusive_add( uint value );
+long __ovld sub_group_non_uniform_scan_inclusive_add( long value );
+ulong __ovld sub_group_non_uniform_scan_inclusive_add( ulong value );
+float __ovld sub_group_non_uniform_scan_inclusive_add( float value );
+
+char __ovld sub_group_non_uniform_scan_inclusive_mul( char value );
+uchar __ovld sub_group_non_uniform_scan_inclusive_mul( uchar value );
+short __ovld sub_group_non_uniform_scan_inclusive_mul( short value );
+ushort __ovld sub_group_non_uniform_scan_inclusive_mul( ushort value );
+int __ovld sub_group_non_uniform_scan_inclusive_mul( int value );
+uint __ovld sub_group_non_uniform_scan_inclusive_mul( uint value );
+long __ovld sub_group_non_uniform_scan_inclusive_mul( long value );
+ulong __ovld sub_group_non_uniform_scan_inclusive_mul( ulong value );
+float __ovld sub_group_non_uniform_scan_inclusive_mul( float value );
+
+char __ovld sub_group_non_uniform_scan_inclusive_min( char value );
+uchar __ovld sub_group_non_uniform_scan_inclusive_min( uchar value );
+short __ovld sub_group_non_uniform_scan_inclusive_min( short value );
+ushort __ovld sub_group_non_uniform_scan_inclusive_min( ushort value );
+int __ovld sub_group_non_uniform_scan_inclusive_min( int value );
+uint __ovld sub_group_non_uniform_scan_inclusive_min( uint value );
+long __ovld sub_group_non_uniform_scan_inclusive_min( long value );
+ulong __ovld sub_group_non_uniform_scan_inclusive_min( ulong value );
+float __ovld sub_group_non_uniform_scan_inclusive_min( float value );
+
+char __ovld sub_group_non_uniform_scan_inclusive_max( char value );
+uchar __ovld sub_group_non_uniform_scan_inclusive_max( uchar value );
+short __ovld sub_group_non_uniform_scan_inclusive_max( short value );
+ushort __ovld sub_group_non_uniform_scan_inclusive_max( ushort value );
+int __ovld sub_group_non_uniform_scan_inclusive_max( int value );
+uint __ovld sub_group_non_uniform_scan_inclusive_max( uint value );
+long __ovld sub_group_non_uniform_scan_inclusive_max( long value );
+ulong __ovld sub_group_non_uniform_scan_inclusive_max( ulong value );
+float __ovld sub_group_non_uniform_scan_inclusive_max( float value );
+
+char __ovld sub_group_non_uniform_scan_exclusive_add( char value );
+uchar __ovld sub_group_non_uniform_scan_exclusive_add( uchar value );
+short __ovld sub_group_non_uniform_scan_exclusive_add( short value );
+ushort __ovld sub_group_non_uniform_scan_exclusive_add( ushort value );
+int __ovld sub_group_non_uniform_scan_exclusive_add( int value );
+uint __ovld sub_group_non_uniform_scan_exclusive_add( uint value );
+long __ovld sub_group_non_uniform_scan_exclusive_add( long value );
+ulong __ovld sub_group_non_uniform_scan_exclusive_add( ulong value );
+float __ovld sub_group_non_uniform_scan_exclusive_add( float value );
+
+char __ovld sub_group_non_uniform_scan_exclusive_mul( char value );
+uchar __ovld sub_group_non_uniform_scan_exclusive_mul( uchar value );
+short __ovld sub_group_non_uniform_scan_exclusive_mul( short value );
+ushort __ovld sub_group_non_uniform_scan_exclusive_mul( ushort value );
+int __ovld sub_group_non_uniform_scan_exclusive_mul( int value );
+uint __ovld sub_group_non_uniform_scan_exclusive_mul( uint value );
+long __ovld sub_group_non_uniform_scan_exclusive_mul( long value );
+ulong __ovld sub_group_non_uniform_scan_exclusive_mul( ulong value );
+float __ovld sub_group_non_uniform_scan_exclusive_mul( float value );
+
+char __ovld sub_group_non_uniform_scan_exclusive_min( char value );
+uchar __ovld sub_group_non_uniform_scan_exclusive_min( uchar value );
+short __ovld sub_group_non_uniform_scan_exclusive_min( short value );
+ushort __ovld sub_group_non_uniform_scan_exclusive_min( ushort value );
+int __ovld sub_group_non_uniform_scan_exclusive_min( int value );
+uint __ovld sub_group_non_uniform_scan_exclusive_min( uint value );
+long __ovld sub_group_non_uniform_scan_exclusive_min( long value );
+ulong __ovld sub_group_non_uniform_scan_exclusive_min( ulong value );
+float __ovld sub_group_non_uniform_scan_exclusive_min( float value );
+
+char __ovld sub_group_non_uniform_scan_exclusive_max( char value );
+uchar __ovld sub_group_non_uniform_scan_exclusive_max( uchar value );
+short __ovld sub_group_non_uniform_scan_exclusive_max( short value );
+ushort __ovld sub_group_non_uniform_scan_exclusive_max( ushort value );
+int __ovld sub_group_non_uniform_scan_exclusive_max( int value );
+uint __ovld sub_group_non_uniform_scan_exclusive_max( uint value );
+long __ovld sub_group_non_uniform_scan_exclusive_max( long value );
+ulong __ovld sub_group_non_uniform_scan_exclusive_max( ulong value );
+float __ovld sub_group_non_uniform_scan_exclusive_max( float value );
+
+char __ovld sub_group_non_uniform_reduce_and( char value );
+uchar __ovld sub_group_non_uniform_reduce_and( uchar value );
+short __ovld sub_group_non_uniform_reduce_and( short value );
+ushort __ovld sub_group_non_uniform_reduce_and( ushort value );
+int __ovld sub_group_non_uniform_reduce_and( int value );
+uint __ovld sub_group_non_uniform_reduce_and( uint value );
+long __ovld sub_group_non_uniform_reduce_and( long value );
+ulong __ovld sub_group_non_uniform_reduce_and( ulong value );
+
+char __ovld sub_group_non_uniform_reduce_or( char value );
+uchar __ovld sub_group_non_uniform_reduce_or( uchar value );
+short __ovld sub_group_non_uniform_reduce_or( short value );
+ushort __ovld sub_group_non_uniform_reduce_or( ushort value );
+int __ovld sub_group_non_uniform_reduce_or( int value );
+uint __ovld sub_group_non_uniform_reduce_or( uint value );
+long __ovld sub_group_non_uniform_reduce_or( long value );
+ulong __ovld sub_group_non_uniform_reduce_or( ulong value );
+
+char __ovld sub_group_non_uniform_reduce_xor( char value );
+uchar __ovld sub_group_non_uniform_reduce_xor( uchar value );
+short __ovld sub_group_non_uniform_reduce_xor( short value );
+ushort __ovld sub_group_non_uniform_reduce_xor( ushort value );
+int __ovld sub_group_non_uniform_reduce_xor( int value );
+uint __ovld sub_group_non_uniform_reduce_xor( uint value );
+long __ovld sub_group_non_uniform_reduce_xor( long value );
+ulong __ovld sub_group_non_uniform_reduce_xor( ulong value );
+
+char __ovld sub_group_non_uniform_scan_inclusive_and( char value );
+uchar __ovld sub_group_non_uniform_scan_inclusive_and( uchar value );
+short __ovld sub_group_non_uniform_scan_inclusive_and( short value );
+ushort __ovld sub_group_non_uniform_scan_inclusive_and( ushort value );
+int __ovld sub_group_non_uniform_scan_inclusive_and( int value );
+uint __ovld sub_group_non_uniform_scan_inclusive_and( uint value );
+long __ovld sub_group_non_uniform_scan_inclusive_and( long value );
+ulong __ovld sub_group_non_uniform_scan_inclusive_and( ulong value );
+
+char __ovld sub_group_non_uniform_scan_inclusive_or( char value );
+uchar __ovld sub_group_non_uniform_scan_inclusive_or( uchar value );
+short __ovld sub_group_non_uniform_scan_inclusive_or( short value );
+ushort __ovld sub_group_non_uniform_scan_inclusive_or( ushort value );
+int __ovld sub_group_non_uniform_scan_inclusive_or( int value );
+uint __ovld sub_group_non_uniform_scan_inclusive_or( uint value );
+long __ovld sub_group_non_uniform_scan_inclusive_or( long value );
+ulong __ovld sub_group_non_uniform_scan_inclusive_or( ulong value );
+
+char __ovld sub_group_non_uniform_scan_inclusive_xor( char value );
+uchar __ovld sub_group_non_uniform_scan_inclusive_xor( uchar value );
+short __ovld sub_group_non_uniform_scan_inclusive_xor( short value );
+ushort __ovld sub_group_non_uniform_scan_inclusive_xor( ushort value );
+int __ovld sub_group_non_uniform_scan_inclusive_xor( int value );
+uint __ovld sub_group_non_uniform_scan_inclusive_xor( uint value );
+long __ovld sub_group_non_uniform_scan_inclusive_xor( long value );
+ulong __ovld sub_group_non_uniform_scan_inclusive_xor( ulong value );
+
+char __ovld sub_group_non_uniform_scan_exclusive_and( char value );
+uchar __ovld sub_group_non_uniform_scan_exclusive_and( uchar value );
+short __ovld sub_group_non_uniform_scan_exclusive_and( short value );
+ushort __ovld sub_group_non_uniform_scan_exclusive_and( ushort value );
+int __ovld sub_group_non_uniform_scan_exclusive_and( int value );
+uint __ovld sub_group_non_uniform_scan_exclusive_and( uint value );
+long __ovld sub_group_non_uniform_scan_exclusive_and( long value );
+ulong __ovld sub_group_non_uniform_scan_exclusive_and( ulong value );
+
+char __ovld sub_group_non_uniform_scan_exclusive_or( char value );
+uchar __ovld sub_group_non_uniform_scan_exclusive_or( uchar value );
+short __ovld sub_group_non_uniform_scan_exclusive_or( short value );
+ushort __ovld sub_group_non_uniform_scan_exclusive_or( ushort value );
+int __ovld sub_group_non_uniform_scan_exclusive_or( int value );
+uint __ovld sub_group_non_uniform_scan_exclusive_or( uint value );
+long __ovld sub_group_non_uniform_scan_exclusive_or( long value );
+ulong __ovld sub_group_non_uniform_scan_exclusive_or( ulong value );
+
+char __ovld sub_group_non_uniform_scan_exclusive_xor( char value );
+uchar __ovld sub_group_non_uniform_scan_exclusive_xor( uchar value );
+short __ovld sub_group_non_uniform_scan_exclusive_xor( short value );
+ushort __ovld sub_group_non_uniform_scan_exclusive_xor( ushort value );
+int __ovld sub_group_non_uniform_scan_exclusive_xor( int value );
+uint __ovld sub_group_non_uniform_scan_exclusive_xor( uint value );
+long __ovld sub_group_non_uniform_scan_exclusive_xor( long value );
+ulong __ovld sub_group_non_uniform_scan_exclusive_xor( ulong value );
+
+int __ovld sub_group_non_uniform_reduce_logical_and( int predicate );
+int __ovld sub_group_non_uniform_reduce_logical_or( int predicate );
+int __ovld sub_group_non_uniform_reduce_logical_xor( int predicate );
+
+int __ovld sub_group_non_uniform_scan_inclusive_logical_and( int predicate );
+int __ovld sub_group_non_uniform_scan_inclusive_logical_or( int predicate );
+int __ovld sub_group_non_uniform_scan_inclusive_logical_xor( int predicate );
+
+int __ovld sub_group_non_uniform_scan_exclusive_logical_and( int predicate );
+int __ovld sub_group_non_uniform_scan_exclusive_logical_or( int predicate );
+int __ovld sub_group_non_uniform_scan_exclusive_logical_xor( int predicate );
+
+#if defined(cl_khr_fp16)
+half __ovld sub_group_non_uniform_reduce_add( half value );
+half __ovld sub_group_non_uniform_reduce_mul( half value );
+half __ovld sub_group_non_uniform_reduce_min( half value );
+half __ovld sub_group_non_uniform_reduce_max( half value );
+half __ovld sub_group_non_uniform_scan_inclusive_add( half value );
+half __ovld sub_group_non_uniform_scan_inclusive_mul( half value );
+half __ovld sub_group_non_uniform_scan_inclusive_min( half value );
+half __ovld sub_group_non_uniform_scan_inclusive_max( half value );
+half __ovld sub_group_non_uniform_scan_exclusive_add( half value );
+half __ovld sub_group_non_uniform_scan_exclusive_mul( half value );
+half __ovld sub_group_non_uniform_scan_exclusive_min( half value );
+half __ovld sub_group_non_uniform_scan_exclusive_max( half value );
+#endif // cl_khr_fp16
+
+#if defined(cl_khr_fp64)
+double __ovld sub_group_non_uniform_reduce_add( double value );
+double __ovld sub_group_non_uniform_reduce_mul( double value );
+double __ovld sub_group_non_uniform_reduce_min( double value );
+double __ovld sub_group_non_uniform_reduce_max( double value );
+double __ovld sub_group_non_uniform_scan_inclusive_add( double value );
+double __ovld sub_group_non_uniform_scan_inclusive_mul( double value );
+double __ovld sub_group_non_uniform_scan_inclusive_min( double value );
+double __ovld sub_group_non_uniform_scan_inclusive_max( double value );
+double __ovld sub_group_non_uniform_scan_exclusive_add( double value );
+double __ovld sub_group_non_uniform_scan_exclusive_mul( double value );
+double __ovld sub_group_non_uniform_scan_exclusive_min( double value );
+double __ovld sub_group_non_uniform_scan_exclusive_max( double value );
+#endif // cl_khr_fp64
+
+#endif // cl_khr_subgroup_non_uniform_arithmetic
+
+#if defined(cl_khr_subgroup_shuffle)
+char __ovld sub_group_shuffle( char value, uint index );
+uchar __ovld sub_group_shuffle( uchar value, uint index );
+short __ovld sub_group_shuffle( short value, uint index );
+ushort __ovld sub_group_shuffle( ushort value, uint index );
+int __ovld sub_group_shuffle( int value, uint index );
+uint __ovld sub_group_shuffle( uint value, uint index );
+long __ovld sub_group_shuffle( long value, uint index );
+ulong __ovld sub_group_shuffle( ulong value, uint index );
+float __ovld sub_group_shuffle( float value, uint index );
+
+char __ovld sub_group_shuffle_xor( char value, uint mask );
+uchar __ovld sub_group_shuffle_xor( uchar value, uint mask );
+short __ovld sub_group_shuffle_xor( short value, uint mask );
+ushort __ovld sub_group_shuffle_xor( ushort value, uint mask );
+int __ovld sub_group_shuffle_xor( int value, uint mask );
+uint __ovld sub_group_shuffle_xor( uint value, uint mask );
+long __ovld sub_group_shuffle_xor( long value, uint mask );
+ulong __ovld sub_group_shuffle_xor( ulong value, uint mask );
+float __ovld sub_group_shuffle_xor( float value, uint mask );
+
+#if defined(cl_khr_fp16)
+half __ovld sub_group_shuffle( half value, uint index );
+half __ovld sub_group_shuffle_xor( half value, uint mask );
+#endif // cl_khr_fp16
+
+#if defined(cl_khr_fp64)
+double __ovld sub_group_shuffle( double value, uint index );
+double __ovld sub_group_shuffle_xor( double value, uint mask );
+#endif // cl_khr_fp64
+
+#endif // cl_khr_subgroup_shuffle
+
+#if defined(cl_khr_subgroup_shuffle_relative)
+char __ovld sub_group_shuffle_up( char value, uint delta );
+uchar __ovld sub_group_shuffle_up( uchar value, uint delta );
+short __ovld sub_group_shuffle_up( short value, uint delta );
+ushort __ovld sub_group_shuffle_up( ushort value, uint delta );
+int __ovld sub_group_shuffle_up( int value, uint delta );
+uint __ovld sub_group_shuffle_up( uint value, uint delta );
+long __ovld sub_group_shuffle_up( long value, uint delta );
+ulong __ovld sub_group_shuffle_up( ulong value, uint delta );
+float __ovld sub_group_shuffle_up( float value, uint delta );
+
+char __ovld sub_group_shuffle_down( char value, uint delta );
+uchar __ovld sub_group_shuffle_down( uchar value, uint delta );
+short __ovld sub_group_shuffle_down( short value, uint delta );
+ushort __ovld sub_group_shuffle_down( ushort value, uint delta );
+int __ovld sub_group_shuffle_down( int value, uint delta );
+uint __ovld sub_group_shuffle_down( uint value, uint delta );
+long __ovld sub_group_shuffle_down( long value, uint delta );
+ulong __ovld sub_group_shuffle_down( ulong value, uint delta );
+float __ovld sub_group_shuffle_down( float value, uint delta );
+
+#if defined(cl_khr_fp16)
+half __ovld sub_group_shuffle_up( half value, uint delta );
+half __ovld sub_group_shuffle_down( half value, uint delta );
+#endif // cl_khr_fp16
+
+#if defined(cl_khr_fp64)
+double __ovld sub_group_shuffle_up( double value, uint delta );
+double __ovld sub_group_shuffle_down( double value, uint delta );
+#endif // cl_khr_fp64
+
+#endif // cl_khr_subgroup_shuffle_relative
+
+#if defined(cl_khr_subgroup_clustered_reduce)
+char __ovld sub_group_clustered_reduce_add( char value, uint clustersize );
+uchar __ovld sub_group_clustered_reduce_add( uchar value, uint clustersize );
+short __ovld sub_group_clustered_reduce_add( short value, uint clustersize );
+ushort __ovld sub_group_clustered_reduce_add( ushort value, uint clustersize );
+int __ovld sub_group_clustered_reduce_add( int value, uint clustersize );
+uint __ovld sub_group_clustered_reduce_add( uint value, uint clustersize );
+long __ovld sub_group_clustered_reduce_add( long value, uint clustersize );
+ulong __ovld sub_group_clustered_reduce_add( ulong value, uint clustersize );
+float __ovld sub_group_clustered_reduce_add( float value, uint clustersize );
+
+char __ovld sub_group_clustered_reduce_mul( char value, uint clustersize );
+uchar __ovld sub_group_clustered_reduce_mul( uchar value, uint clustersize );
+short __ovld sub_group_clustered_reduce_mul( short value, uint clustersize );
+ushort __ovld sub_group_clustered_reduce_mul( ushort value, uint clustersize );
+int __ovld sub_group_clustered_reduce_mul( int value, uint clustersize );
+uint __ovld sub_group_clustered_reduce_mul( uint value, uint clustersize );
+long __ovld sub_group_clustered_reduce_mul( long value, uint clustersize );
+ulong __ovld sub_group_clustered_reduce_mul( ulong value, uint clustersize );
+float __ovld sub_group_clustered_reduce_mul( float value, uint clustersize );
+
+char __ovld sub_group_clustered_reduce_min( char value, uint clustersize );
+uchar __ovld sub_group_clustered_reduce_min( uchar value, uint clustersize );
+short __ovld sub_group_clustered_reduce_min( short value, uint clustersize );
+ushort __ovld sub_group_clustered_reduce_min( ushort value, uint clustersize );
+int __ovld sub_group_clustered_reduce_min( int value, uint clustersize );
+uint __ovld sub_group_clustered_reduce_min( uint value, uint clustersize );
+long __ovld sub_group_clustered_reduce_min( long value, uint clustersize );
+ulong __ovld sub_group_clustered_reduce_min( ulong value, uint clustersize );
+float __ovld sub_group_clustered_reduce_min( float value, uint clustersize );
+
+char __ovld sub_group_clustered_reduce_max( char value, uint clustersize );
+uchar __ovld sub_group_clustered_reduce_max( uchar value, uint clustersize );
+short __ovld sub_group_clustered_reduce_max( short value, uint clustersize );
+ushort __ovld sub_group_clustered_reduce_max( ushort value, uint clustersize );
+int __ovld sub_group_clustered_reduce_max( int value, uint clustersize );
+uint __ovld sub_group_clustered_reduce_max( uint value, uint clustersize );
+long __ovld sub_group_clustered_reduce_max( long value, uint clustersize );
+ulong __ovld sub_group_clustered_reduce_max( ulong value, uint clustersize );
+float __ovld sub_group_clustered_reduce_max( float value, uint clustersize );
+
+char __ovld sub_group_clustered_reduce_and( char value, uint clustersize );
+uchar __ovld sub_group_clustered_reduce_and( uchar value, uint clustersize );
+short __ovld sub_group_clustered_reduce_and( short value, uint clustersize );
+ushort __ovld sub_group_clustered_reduce_and( ushort value, uint clustersize );
+int __ovld sub_group_clustered_reduce_and( int value, uint clustersize );
+uint __ovld sub_group_clustered_reduce_and( uint value, uint clustersize );
+long __ovld sub_group_clustered_reduce_and( long value, uint clustersize );
+ulong __ovld sub_group_clustered_reduce_and( ulong value, uint clustersize );
+
+char __ovld sub_group_clustered_reduce_or( char value, uint clustersize );
+uchar __ovld sub_group_clustered_reduce_or( uchar value, uint clustersize );
+short __ovld sub_group_clustered_reduce_or( short value, uint clustersize );
+ushort __ovld sub_group_clustered_reduce_or( ushort value, uint clustersize );
+int __ovld sub_group_clustered_reduce_or( int value, uint clustersize );
+uint __ovld sub_group_clustered_reduce_or( uint value, uint clustersize );
+long __ovld sub_group_clustered_reduce_or( long value, uint clustersize );
+ulong __ovld sub_group_clustered_reduce_or( ulong value, uint clustersize );
+
+char __ovld sub_group_clustered_reduce_xor( char value, uint clustersize );
+uchar __ovld sub_group_clustered_reduce_xor( uchar value, uint clustersize );
+short __ovld sub_group_clustered_reduce_xor( short value, uint clustersize );
+ushort __ovld sub_group_clustered_reduce_xor( ushort value, uint clustersize );
+int __ovld sub_group_clustered_reduce_xor( int value, uint clustersize );
+uint __ovld sub_group_clustered_reduce_xor( uint value, uint clustersize );
+long __ovld sub_group_clustered_reduce_xor( long value, uint clustersize );
+ulong __ovld sub_group_clustered_reduce_xor( ulong value, uint clustersize );
+
+int __ovld sub_group_clustered_reduce_logical_and( int predicate, uint clustersize );
+int __ovld sub_group_clustered_reduce_logical_or( int predicate, uint clustersize );
+int __ovld sub_group_clustered_reduce_logical_xor( int predicate, uint clustersize );
+
+#if defined(cl_khr_fp16)
+half __ovld sub_group_clustered_reduce_add( half value, uint clustersize );
+half __ovld sub_group_clustered_reduce_mul( half value, uint clustersize );
+half __ovld sub_group_clustered_reduce_min( half value, uint clustersize );
+half __ovld sub_group_clustered_reduce_max( half value, uint clustersize );
+#endif // cl_khr_fp16
+
+#if defined(cl_khr_fp64)
+double __ovld sub_group_clustered_reduce_add( double value, uint clustersize );
+double __ovld sub_group_clustered_reduce_mul( double value, uint clustersize );
+double __ovld sub_group_clustered_reduce_min( double value, uint clustersize );
+double __ovld sub_group_clustered_reduce_max( double value, uint clustersize );
+#endif // cl_khr_fp64
+
+#endif // cl_khr_subgroup_clustered_reduce
+
#if defined(cl_intel_subgroups)
// Intel-Specific Sub Group Functions
float __ovld __conv intel_sub_group_shuffle( float x, uint c );
diff --git a/clang/test/SemaOpenCL/extension-version.cl b/clang/test/SemaOpenCL/extension-version.cl
index 0e6bbb7d3bcd..86c78143a0eb 100644
--- a/clang/test/SemaOpenCL/extension-version.cl
+++ b/clang/test/SemaOpenCL/extension-version.cl
@@ -333,3 +333,86 @@
#endif
#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : enable
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
+#ifndef cl_khr_subgroup_extended_types
+#error "Missing cl_khr_subgroup_extended_types"
+#endif
+#else
+#ifdef cl_khr_subgroup_extended_types
+#error "Incorrect cl_khr_subgroup_extended_types define"
+#endif
+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_extended_types' - ignoring}}
+#endif
+#pragma OPENCL EXTENSION cl_khr_subgroup_extended_types : enable
+
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
+#ifndef cl_khr_subgroup_non_uniform_vote
+#error "Missing cl_khr_subgroup_non_uniform_vote"
+#endif
+#else
+#ifdef cl_khr_subgroup_non_uniform_vote
+#error "Incorrect cl_khr_subgroup_non_uniform_vote define"
+#endif
+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_non_uniform_vote' - ignoring}}
+#endif
+#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_vote : enable
+
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
+#ifndef cl_khr_subgroup_ballot
+#error "Missing cl_khr_subgroup_ballot"
+#endif
+#else
+#ifdef cl_khr_subgroup_ballot
+#error "Incorrect cl_khr_subgroup_ballot define"
+#endif
+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_ballot' - ignoring}}
+#endif
+#pragma OPENCL EXTENSION cl_khr_subgroup_ballot : enable
+
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
+#ifndef cl_khr_subgroup_non_uniform_arithmetic
+#error "Missing cl_khr_subgroup_non_uniform_arithmetic"
+#endif
+#else
+#ifdef cl_khr_subgroup_non_uniform_arithmetic
+#error "Incorrect cl_khr_subgroup_non_uniform_arithmetic define"
+#endif
+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_non_uniform_arithmetic' - ignoring}}
+#endif
+#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_arithmetic : enable
+
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
+#ifndef cl_khr_subgroup_shuffle
+#error "Missing cl_khr_subgroup_shuffle"
+#endif
+#else
+#ifdef cl_khr_subgroup_shuffle
+#error "Incorrect cl_khr_subgroup_shuffle define"
+#endif
+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_shuffle' - ignoring}}
+#endif
+#pragma OPENCL EXTENSION cl_khr_subgroup_shuffle : enable
+
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
+#ifndef cl_khr_subgroup_shuffle_relative
+#error "Missing cl_khr_subgroup_shuffle_relative"
+#endif
+#else
+#ifdef cl_khr_subgroup_shuffle_relative
+#error "Incorrect cl_khr_subgroup_shuffle_relative define"
+#endif
+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_shuffle_relative' - ignoring}}
+#endif
+#pragma OPENCL EXTENSION cl_khr_subgroup_shuffle_relative : enable
+
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
+#ifndef cl_khr_subgroup_clustered_reduce
+#error "Missing cl_khr_subgroup_clustered_reduce"
+#endif
+#else
+#ifdef cl_khr_subgroup_clustered_reduce
+#error "Incorrect cl_khr_subgroup_clustered_reduce define"
+#endif
+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_clustered_reduce' - ignoring}}
+#endif
+#pragma OPENCL EXTENSION cl_khr_subgroup_clustered_reduce : enable
--
2.17.1

View File

@ -1,33 +0,0 @@
From 331e323ae2633a8999a660314022491d670c442c Mon Sep 17 00:00:00 2001
From: Andrea Bocci <andrea.bocci@cern.ch>
Date: Sun, 15 Mar 2020 17:35:44 +0100
Subject: [PATCH 2/3] Fix building in-tree with cmake -DLLVM_LINK_LLVM_DYLIB=ON
Building in-tree with LLVM 11.0 master with the LLVM_LINK_LLVM_DYLIB
cmake flag fails to link with the LLVMSPIRVLib library.
Add an explicit dependency to force the correct build order and linking.
Signed-off-by: Andrea Bocci <andrea.bocci@cern.ch>
Upstream-Status: Backport
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
---
tools/llvm-spirv/CMakeLists.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/llvm-spirv/CMakeLists.txt b/tools/llvm-spirv/CMakeLists.txt
index 9aa96d9c..501c0daf 100644
--- a/tools/llvm-spirv/CMakeLists.txt
+++ b/tools/llvm-spirv/CMakeLists.txt
@@ -14,7 +14,7 @@ add_llvm_tool(llvm-spirv
NO_INSTALL_RPATH
)
-if (LLVM_SPIRV_BUILD_EXTERNAL)
+if (LLVM_SPIRV_BUILD_EXTERNAL OR LLVM_LINK_LLVM_DYLIB)
target_link_libraries(llvm-spirv PRIVATE LLVMSPIRVLib)
endif()
--
2.17.1

View File

@ -1,982 +0,0 @@
From fbc9996d6490a5d4720b85b47f38335e7fdc99d9 Mon Sep 17 00:00:00 2001
From: haonanya <haonan.yang@intel.com>
Date: Mon, 19 Jul 2021 10:14:20 +0800
Subject: [PATCH 3/3] Add support for cl_ext_float_atomics in SPIRVWriter
Upstream-Status: Backport [Taken from opencl-clang patches, https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/spirv/0001-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch]
Signed-off-by: haonanya <haonan.yang@intel.com>
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
lib/SPIRV/OCL20ToSPIRV.cpp | 79 ++++++++++++++++--
lib/SPIRV/SPIRVToOCL.h | 3 +
lib/SPIRV/SPIRVToOCL12.cpp | 21 +++++
lib/SPIRV/SPIRVToOCL20.cpp | 28 ++++++-
lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h | 1 -
lib/SPIRV/libSPIRV/SPIRVOpCode.h | 8 +-
test/AtomicFAddEXTForOCL.ll | 64 +++++++++++++++
test/AtomicFAddExt.ll | 111 ++++++++-----------------
test/AtomicFMaxEXT.ll | 113 +++++++-------------------
test/AtomicFMaxEXTForOCL.ll | 64 +++++++++++++++
test/AtomicFMinEXT.ll | 113 +++++++-------------------
test/AtomicFMinEXTForOCL.ll | 64 +++++++++++++++
test/InvalidAtomicBuiltins.cl | 8 --
13 files changed, 417 insertions(+), 260 deletions(-)
create mode 100644 test/AtomicFAddEXTForOCL.ll
create mode 100644 test/AtomicFMaxEXTForOCL.ll
create mode 100644 test/AtomicFMinEXTForOCL.ll
diff --git a/lib/SPIRV/OCL20ToSPIRV.cpp b/lib/SPIRV/OCL20ToSPIRV.cpp
index e30aa5be..b676a009 100644
--- a/lib/SPIRV/OCL20ToSPIRV.cpp
+++ b/lib/SPIRV/OCL20ToSPIRV.cpp
@@ -408,10 +408,63 @@ void OCL20ToSPIRV::visitCallInst(CallInst &CI) {
if (DemangledName.find(kOCLBuiltinName::AtomicPrefix) == 0 ||
DemangledName.find(kOCLBuiltinName::AtomPrefix) == 0) {
- // Compute atomic builtins do not support floating types.
- if (CI.getType()->isFloatingPointTy() &&
- isComputeAtomicOCLBuiltin(DemangledName))
- return;
+ // Compute "atom" prefixed builtins do not support floating types.
+ if (CI.getType()->isFloatingPointTy()) {
+ if (DemangledName.find(kOCLBuiltinName::AtomPrefix) == 0)
+ return;
+ // handle functions which are "atomic_" prefixed.
+ StringRef Stem = DemangledName;
+ Stem = Stem.drop_front(strlen("atomic_"));
+ // FP-typed atomic_{add, sub, inc, dec, exchange, min, max, or, and, xor,
+ // fetch_or, fetch_xor, fetch_and, fetch_or_explicit, fetch_xor_explicit,
+ // fetch_and_explicit} should be identified as function call
+ bool IsFunctionCall = llvm::StringSwitch<bool>(Stem)
+ .Case("add", true)
+ .Case("sub", true)
+ .Case("inc", true)
+ .Case("dec", true)
+ .Case("cmpxchg", true)
+ .Case("min", true)
+ .Case("max", true)
+ .Case("or", true)
+ .Case("xor", true)
+ .Case("and", true)
+ .Case("fetch_or", true)
+ .Case("fetch_and", true)
+ .Case("fetch_xor", true)
+ .Case("fetch_or_explicit", true)
+ .Case("fetch_xor_explicit", true)
+ .Case("fetch_and_explicit", true)
+ .Default(false);
+ if (IsFunctionCall)
+ return;
+ if (F->arg_size() != 2) {
+ IsFunctionCall = llvm::StringSwitch<bool>(Stem)
+ .Case("exchange", true)
+ .Case("fetch_add", true)
+ .Case("fetch_sub", true)
+ .Case("fetch_min", true)
+ .Case("fetch_max", true)
+ .Case("load", true)
+ .Case("store", true)
+ .Default(false);
+ if (IsFunctionCall)
+ return;
+ }
+ if (F->arg_size() != 3 && F->arg_size() != 4) {
+ IsFunctionCall = llvm::StringSwitch<bool>(Stem)
+ .Case("exchange_explicit", true)
+ .Case("fetch_add_explicit", true)
+ .Case("fetch_sub_explicit", true)
+ .Case("fetch_min_explicit", true)
+ .Case("fetch_max_explicit", true)
+ .Case("load_explicit", true)
+ .Case("store_explicit", true)
+ .Default(false);
+ if (IsFunctionCall)
+ return;
+ }
+ }
auto PCI = &CI;
if (DemangledName == kOCLBuiltinName::AtomicInit) {
@@ -819,7 +872,7 @@ void OCL20ToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) {
AttributeList Attrs = CI->getCalledFunction()->getAttributes();
mutateCallInstSPIRV(
M, CI,
- [=](CallInst *CI, std::vector<Value *> &Args) {
+ [=](CallInst *CI, std::vector<Value *> &Args) -> std::string {
Info.PostProc(Args);
// Order of args in OCL20:
// object, 0-2 other args, 1-2 order, scope
@@ -864,7 +917,21 @@ void OCL20ToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) {
std::rotate(Args.begin() + 2, Args.begin() + OrderIdx,
Args.end() - Offset);
}
- return getSPIRVFuncName(OCLSPIRVBuiltinMap::map(Info.UniqName));
+ llvm::Type* AtomicBuiltinsReturnType =
+ CI->getCalledFunction()->getReturnType();
+ auto IsFPType = [](llvm::Type *ReturnType) {
+ return ReturnType->isHalfTy() || ReturnType->isFloatTy() ||
+ ReturnType->isDoubleTy();
+ };
+ auto SPIRVFunctionName =
+ getSPIRVFuncName(OCLSPIRVBuiltinMap::map(Info.UniqName));
+ if (!IsFPType(AtomicBuiltinsReturnType))
+ return SPIRVFunctionName;
+ // Translate FP-typed atomic builtins.
+ return llvm::StringSwitch<std::string>(SPIRVFunctionName)
+ .Case("__spirv_AtomicIAdd", "__spirv_AtomicFAddEXT")
+ .Case("__spirv_AtomicSMax", "__spirv_AtomicFMaxEXT")
+ .Case("__spirv_AtomicSMin", "__spirv_AtomicFMinEXT");
},
&Attrs);
}
diff --git a/lib/SPIRV/SPIRVToOCL.h b/lib/SPIRV/SPIRVToOCL.h
index ddeec0b6..006fb0b1 100644
--- a/lib/SPIRV/SPIRVToOCL.h
+++ b/lib/SPIRV/SPIRVToOCL.h
@@ -178,6 +178,9 @@ public:
/// using separate maps for OpenCL 1.2 and OpenCL 2.0
virtual Instruction *mutateAtomicName(CallInst *CI, Op OC) = 0;
+ // Transform FP atomic opcode to corresponding OpenCL function name
+ virtual std::string mapFPAtomicName(Op OC) = 0;
+
private:
/// Transform uniform group opcode to corresponding OpenCL function name,
/// example: GroupIAdd(Reduce) => group_iadd => work_group_reduce_add |
diff --git a/lib/SPIRV/SPIRVToOCL12.cpp b/lib/SPIRV/SPIRVToOCL12.cpp
index afddd596..d7f00de3 100644
--- a/lib/SPIRV/SPIRVToOCL12.cpp
+++ b/lib/SPIRV/SPIRVToOCL12.cpp
@@ -104,6 +104,9 @@ public:
/// cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics extensions.
std::string mapAtomicName(Op OC, Type *Ty);
+ // Transform FP atomic opcode to corresponding OpenCL function name
+ std::string mapFPAtomicName(Op OC) override;
+
static char ID;
};
@@ -338,6 +341,21 @@ Instruction *SPIRVToOCL12::visitCallSPIRVAtomicBuiltin(CallInst *CI, Op OC) {
return NewCI;
}
+std::string SPIRVToOCL12::mapFPAtomicName(Op OC) {
+ assert(isFPAtomicOpCode(OC) && "Not intended to handle other opcodes than "
+ "AtomicF{Add/Min/Max}EXT!");
+ switch (OC) {
+ case OpAtomicFAddEXT:
+ return "atomic_add";
+ case OpAtomicFMinEXT:
+ return "atomic_min";
+ case OpAtomicFMaxEXT:
+ return "atomic_max";
+ default:
+ llvm_unreachable("Unsupported opcode!");
+ }
+}
+
Instruction *SPIRVToOCL12::mutateAtomicName(CallInst *CI, Op OC) {
AttributeList Attrs = CI->getCalledFunction()->getAttributes();
return mutateCallInstOCL(
@@ -351,6 +369,9 @@ Instruction *SPIRVToOCL12::mutateAtomicName(CallInst *CI, Op OC) {
std::string SPIRVToOCL12::mapAtomicName(Op OC, Type *Ty) {
std::string Prefix = Ty->isIntegerTy(64) ? kOCLBuiltinName::AtomPrefix
: kOCLBuiltinName::AtomicPrefix;
+ // Map fp atomic instructions to regular OpenCL built-ins.
+ if (isFPAtomicOpCode(OC))
+ return mapFPAtomicName(OC);
return Prefix += OCL12SPIRVBuiltinMap::rmap(OC);
}
diff --git a/lib/SPIRV/SPIRVToOCL20.cpp b/lib/SPIRV/SPIRVToOCL20.cpp
index d829ff42..01d088e9 100644
--- a/lib/SPIRV/SPIRVToOCL20.cpp
+++ b/lib/SPIRV/SPIRVToOCL20.cpp
@@ -82,6 +82,9 @@ public:
/// compare_exchange_strong/weak_explicit
Instruction *visitCallSPIRVAtomicCmpExchg(CallInst *CI, Op OC) override;
+ // Transform FP atomic opcode to corresponding OpenCL function name
+ std::string mapFPAtomicName(Op OC) override;
+
static char ID;
};
@@ -144,11 +147,29 @@ void SPIRVToOCL20::visitCallSPIRVControlBarrier(CallInst *CI) {
&Attrs);
}
+std::string SPIRVToOCL20::mapFPAtomicName(Op OC) {
+ assert(isFPAtomicOpCode(OC) && "Not intended to handle other opcodes than "
+ "AtomicF{Add/Min/Max}EXT!");
+ switch (OC) {
+ case OpAtomicFAddEXT:
+ return "atomic_fetch_add_explicit";
+ case OpAtomicFMinEXT:
+ return "atomic_fetch_min_explicit";
+ case OpAtomicFMaxEXT:
+ return "atomic_fetch_max_explicit";
+ default:
+ llvm_unreachable("Unsupported opcode!");
+ }
+}
+
Instruction *SPIRVToOCL20::mutateAtomicName(CallInst *CI, Op OC) {
AttributeList Attrs = CI->getCalledFunction()->getAttributes();
return mutateCallInstOCL(
M, CI,
[=](CallInst *, std::vector<Value *> &Args) {
+ // Map fp atomic instructions to regular OpenCL built-ins.
+ if (isFPAtomicOpCode(OC))
+ return mapFPAtomicName(OC);
return OCLSPIRVBuiltinMap::rmap(OC);
},
&Attrs);
@@ -215,7 +236,12 @@ CallInst *SPIRVToOCL20::mutateCommonAtomicArguments(CallInst *CI, Op OC) {
}
}
auto Ptr = findFirstPtr(Args);
- auto Name = OCLSPIRVBuiltinMap::rmap(OC);
+ std::string Name;
+ // Map fp atomic instructions to regular OpenCL built-ins.
+ if (isFPAtomicOpCode(OC))
+ Name = mapFPAtomicName(OC);
+ else
+ Name = OCLSPIRVBuiltinMap::rmap(OC);
auto NumOrder = getSPIRVAtomicBuiltinNumMemoryOrderArgs(OC);
auto ScopeIdx = Ptr + 1;
auto OrderIdx = Ptr + 2;
diff --git a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h
index 13f93fbe..7b707993 100644
--- a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h
+++ b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h
@@ -521,7 +521,6 @@ template <> inline void SPIRVMap<Capability, std::string>::init() {
add(CapabilityAtomicFloat64AddEXT, "AtomicFloat64AddEXT");
add(CapabilityAtomicFloat32MinMaxEXT, "AtomicFloat32MinMaxEXT");
add(CapabilityAtomicFloat64MinMaxEXT, "AtomicFloat64MinMaxEXT");
- add(CapabilityAtomicFloat16MinMaxEXT, "AtomicFloat16MinMaxEXT");
add(CapabilitySubgroupShuffleINTEL, "SubgroupShuffleINTEL");
add(CapabilitySubgroupBufferBlockIOINTEL, "SubgroupBufferBlockIOINTEL");
add(CapabilitySubgroupImageBlockIOINTEL, "SubgroupImageBlockIOINTEL");
diff --git a/lib/SPIRV/libSPIRV/SPIRVOpCode.h b/lib/SPIRV/libSPIRV/SPIRVOpCode.h
index feec70f6..8e595e83 100644
--- a/lib/SPIRV/libSPIRV/SPIRVOpCode.h
+++ b/lib/SPIRV/libSPIRV/SPIRVOpCode.h
@@ -54,11 +54,17 @@ template <> inline void SPIRVMap<Op, std::string>::init() {
}
SPIRV_DEF_NAMEMAP(Op, OpCodeNameMap)
+inline bool isFPAtomicOpCode(Op OpCode) {
+ return OpCode == OpAtomicFAddEXT || OpCode == OpAtomicFMinEXT ||
+ OpCode == OpAtomicFMaxEXT;
+}
+
inline bool isAtomicOpCode(Op OpCode) {
static_assert(OpAtomicLoad < OpAtomicXor, "");
return ((unsigned)OpCode >= OpAtomicLoad &&
(unsigned)OpCode <= OpAtomicXor) ||
- OpCode == OpAtomicFlagTestAndSet || OpCode == OpAtomicFlagClear;
+ OpCode == OpAtomicFlagTestAndSet || OpCode == OpAtomicFlagClear ||
+ isFPAtomicOpCode(OpCode);
}
inline bool isBinaryOpCode(Op OpCode) {
return ((unsigned)OpCode >= OpIAdd && (unsigned)OpCode <= OpFMod) ||
diff --git a/test/AtomicFAddEXTForOCL.ll b/test/AtomicFAddEXTForOCL.ll
new file mode 100644
index 00000000..fb146fb9
--- /dev/null
+++ b/test/AtomicFAddEXTForOCL.ll
@@ -0,0 +1,64 @@
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_EXT_shader_atomic_float_add -o %t.spv
+; RUN: spirv-val %t.spv
+; RUN: llvm-spirv -to-text %t.spv -o %t.spt
+; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV
+
+; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20
+
+; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV
+
+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+target triple = "spir-unknown-unknown"
+
+; CHECK-SPIRV: Capability AtomicFloat32AddEXT
+; CHECK-SPIRV: Capability AtomicFloat64AddEXT
+; CHECK-SPIRV: Extension "SPV_EXT_shader_atomic_float_add"
+; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32
+; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64
+
+
+; Function Attrs: convergent norecurse nounwind
+define dso_local spir_func void @test_atomic_float(float addrspace(1)* %a) local_unnamed_addr #0 {
+entry:
+ ; CHECK-SPIRV: 7 AtomicFAddEXT [[TYPE_FLOAT_32]]
+ ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_add_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}})
+ ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+fiif]]({{.*}})
+ %call = tail call spir_func float @_Z25atomic_fetch_add_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)* %a, float 0.000000e+00, i32 0) #2
+ ret void
+}
+
+; Function Attrs: convergent
+declare spir_func float @_Z25atomic_fetch_add_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)*, float, i32) local_unnamed_addr #1
+; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
+
+; Function Attrs: convergent norecurse nounwind
+define dso_local spir_func void @test_atomic_double(double addrspace(1)* %a) local_unnamed_addr #0 {
+entry:
+ ; CHECK-SPIRV: 7 AtomicFAddEXT [[TYPE_FLOAT_64]]
+ ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_add_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}})
+ ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+diid]]({{.*}})
+ %call = tail call spir_func double @_Z25atomic_fetch_add_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)* %a, double 0.000000e+00, i32 0) #2
+ ret void
+}
+; Function Attrs: convergent
+declare spir_func double @_Z25atomic_fetch_add_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)*, double, i32) local_unnamed_addr #1
+; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
+
+; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
+; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
+
+attributes #0 = { convergent norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { convergent nounwind }
+
+!llvm.module.flags = !{!0}
+!opencl.ocl.version = !{!1}
+!opencl.spir.version = !{!1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 2, i32 0}
+!2 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 94aa388f0ce0723bb15503cf41c2c15b288375b9)"}
diff --git a/test/AtomicFAddExt.ll b/test/AtomicFAddExt.ll
index 011dd8a7..42bdfeea 100644
--- a/test/AtomicFAddExt.ll
+++ b/test/AtomicFAddExt.ll
@@ -4,20 +4,16 @@
; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV
; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
-; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL12
-target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
-target triple = "spir64-unknown-unknown-sycldevice"
-
-%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
-%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] }
-%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
-
-$_ZTSZZ3addIfEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_37 = comdat any
+; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20
-$_ZTSZZ3addIdEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_37 = comdat any
+; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV
-@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
+target triple = "spir64-unknown-unknown-sycldevice"
; CHECK-SPIRV: Capability AtomicFloat32AddEXT
; CHECK-SPIRV: Capability AtomicFloat64AddEXT
@@ -25,62 +21,43 @@ $_ZTSZZ3addIdEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_3
; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32
; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64
-; Function Attrs: convergent norecurse mustprogress
-define weak_odr dso_local spir_kernel void @_ZTSZZ3addIfEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_37(float addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, float addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 {
+; Function Attrs: convergent norecurse nounwind
+define dso_local spir_func float @_Z14AtomicFloatIncRf(float addrspace(4)* align 4 dereferenceable(4) %Arg) local_unnamed_addr #0 {
entry:
- %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0
- %1 = load i64, i64* %0, align 8
- %add.ptr.i29 = getelementptr inbounds float, float addrspace(1)* %_arg_, i64 %1
- %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0
- %3 = load i64, i64* %2, align 8
- %add.ptr.i = getelementptr inbounds float, float addrspace(1)* %_arg_4, i64 %3
- %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !5
- %5 = extractelement <3 x i64> %4, i64 0
+ %0 = addrspacecast float addrspace(4)* %Arg to float addrspace(1)*
; CHECK-SPIRV: 7 AtomicFAddEXT [[TYPE_FLOAT_32]]
- ; CHECK-LLVM: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+]]({{.*}})
- %call3.i.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFAddEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %add.ptr.i29, i32 1, i32 896, float 1.000000e+00) #2
- %add.i.i = fadd float %call3.i.i.i.i, 1.000000e+00
- %sext.i = shl i64 %5, 32
- %conv5.i = ashr exact i64 %sext.i, 32
- %ptridx.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i, i64 %conv5.i
- %ptridx.ascast.i.i = addrspacecast float addrspace(1)* %ptridx.i.i to float addrspace(4)*
- store float %add.i.i, float addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !14
- ret void
+ ; CHECK-LLVM-CL12: call spir_func float @[[FLOAT_FUNC_NAME:_Z10atomic_add[[:alnum:]]+ff]]({{.*}})
+ ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_add_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}})
+ ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+fiif]]({{.*}})
+ %call3.i.i = tail call spir_func float @_Z21__spirv_AtomicFAddEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %0, i32 1, i32 896, float 1.000000e+00) #2
+ ret float %call3.i.i
}
; Function Attrs: convergent
-; CHECK-LLVM: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float addrspace(1)*, i32, i32, float)
declare dso_local spir_func float @_Z21__spirv_AtomicFAddEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)*, i32, i32, float) local_unnamed_addr #1
+; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
-; Function Attrs: convergent norecurse mustprogress
-define weak_odr dso_local spir_kernel void @_ZTSZZ3addIdEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_37(double addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, double addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 {
+; Function Attrs: convergent norecurse nounwind
+define dso_local spir_func double @_Z15AtomicDoubleIncRd(double addrspace(4)* align 8 dereferenceable(8) %Arg) local_unnamed_addr #0 {
entry:
- %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0
- %1 = load i64, i64* %0, align 8
- %add.ptr.i29 = getelementptr inbounds double, double addrspace(1)* %_arg_, i64 %1
- %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0
- %3 = load i64, i64* %2, align 8
- %add.ptr.i = getelementptr inbounds double, double addrspace(1)* %_arg_4, i64 %3
- %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !18
- %5 = extractelement <3 x i64> %4, i64 0
+ %0 = addrspacecast double addrspace(4)* %Arg to double addrspace(1)*
; CHECK-SPIRV: 7 AtomicFAddEXT [[TYPE_FLOAT_64]]
- ; CHECK-LLVM: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+]]({{.*}})
- %call3.i.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFAddEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %add.ptr.i29, i32 1, i32 896, double 1.000000e+00) #2
- %add.i.i = fadd double %call3.i.i.i.i, 1.000000e+00
- %sext.i = shl i64 %5, 32
- %conv5.i = ashr exact i64 %sext.i, 32
- %ptridx.i.i = getelementptr inbounds double, double addrspace(1)* %add.ptr.i, i64 %conv5.i
- %ptridx.ascast.i.i = addrspacecast double addrspace(1)* %ptridx.i.i to double addrspace(4)*
- store double %add.i.i, double addrspace(4)* %ptridx.ascast.i.i, align 8, !tbaa !27
- ret void
+ ; CHECK-LLVM-CL12: call spir_func double @[[DOUBLE_FUNC_NAME:_Z10atomic_add[[:alnum:]]+dd]]({{.*}})
+ ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_add_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}})
+ ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+diid]]({{.*}})
+ %call3.i.i = tail call spir_func double @_Z21__spirv_AtomicFAddEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %0, i32 1, i32 896, double 1.000000e+00) #2
+ ret double %call3.i.i
}
; Function Attrs: convergent
-; CHECK-LLVM: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double addrspace(1)*, i32, i32, double)
declare dso_local spir_func double @_Z21__spirv_AtomicFAddEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)*, i32, i32, double) local_unnamed_addr #1
+; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
-attributes #0 = { convergent norecurse }
-attributes #1 = { convergent }
+; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
+; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
+
+attributes #0 = { convergent norecurse nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { convergent nounwind }
!llvm.module.flags = !{!0}
@@ -91,29 +68,5 @@ attributes #2 = { convergent nounwind }
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 1, i32 2}
!2 = !{i32 4, i32 100000}
-!3 = !{!"clang version 12.0.0"}
-!4 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1}
-!5 = !{!6, !8, !10, !12}
-!6 = distinct !{!6, !7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"}
-!7 = distinct !{!7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"}
-!8 = distinct !{!8, !9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"}
-!9 = distinct !{!9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"}
-!10 = distinct !{!10, !11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"}
-!11 = distinct !{!11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"}
-!12 = distinct !{!12, !13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"}
-!13 = distinct !{!13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"}
-!14 = !{!15, !15, i64 0}
-!15 = !{!"float", !16, i64 0}
-!16 = !{!"omnipotent char", !17, i64 0}
-!17 = !{!"Simple C++ TBAA"}
-!18 = !{!19, !21, !23, !25}
-!19 = distinct !{!19, !20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"}
-!20 = distinct !{!20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"}
-!21 = distinct !{!21, !22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"}
-!22 = distinct !{!22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"}
-!23 = distinct !{!23, !24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"}
-!24 = distinct !{!24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"}
-!25 = distinct !{!25, !26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"}
-!26 = distinct !{!26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"}
-!27 = !{!28, !28, i64 0}
-!28 = !{!"double", !16, i64 0}
+!3 = !{!"clang version 13.0.0"}
+
diff --git a/test/AtomicFMaxEXT.ll b/test/AtomicFMaxEXT.ll
index 1b81e53b..1c2eec93 100644
--- a/test/AtomicFMaxEXT.ll
+++ b/test/AtomicFMaxEXT.ll
@@ -4,20 +4,16 @@
; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV
; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
-; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL12
-target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
-target triple = "spir64-unknown-unknown-sycldevice"
-
-%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
-%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] }
-%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
-
-$_ZTSZZ8max_testIfEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37 = comdat any
+; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20
-$_ZTSZZ8max_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37 = comdat any
+; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV
-@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
+target triple = "spir64-unknown-unknown-sycldevice"
; CHECK-SPIRV: Capability AtomicFloat32MinMaxEXT
; CHECK-SPIRV: Capability AtomicFloat64MinMaxEXT
@@ -25,65 +21,42 @@ $_ZTSZZ8max_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4item
; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32
; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64
-; Function Attrs: convergent norecurse
-define weak_odr dso_local spir_kernel void @_ZTSZZ8max_testIfEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37(float addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, float addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 {
+; Function Attrs: convergent norecurse nounwind
+define dso_local spir_func float @_Z14AtomicFloatMaxRf(float addrspace(4)* align 4 dereferenceable(4) %Arg) local_unnamed_addr #0 {
entry:
- %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0
- %1 = load i64, i64* %0, align 8
- %add.ptr.i29 = getelementptr inbounds float, float addrspace(1)* %_arg_, i64 %1
- %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0
- %3 = load i64, i64* %2, align 8
- %add.ptr.i = getelementptr inbounds float, float addrspace(1)* %_arg_4, i64 %3
- %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !5
- %5 = extractelement <3 x i64> %4, i64 0
- %conv.i = trunc i64 %5 to i32
- %conv3.i = sitofp i32 %conv.i to float
- %add.i = fadd float %conv3.i, 1.000000e+00
+ %0 = addrspacecast float addrspace(4)* %Arg to float addrspace(1)*
; CHECK-SPIRV: 7 AtomicFMaxEXT [[TYPE_FLOAT_32]]
- ; CHECK-LLVM: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+]]({{.*}})
- %call3.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFMaxEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %add.ptr.i29, i32 1, i32 896, float %add.i) #2
- %sext.i = shl i64 %5, 32
- %conv6.i = ashr exact i64 %sext.i, 32
- %ptridx.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i, i64 %conv6.i
- %ptridx.ascast.i.i = addrspacecast float addrspace(1)* %ptridx.i.i to float addrspace(4)*
- store float %call3.i.i.i, float addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !14
- ret void
+ ; CHECK-LLVM-CL12: call spir_func float @[[FLOAT_FUNC_NAME:_Z10atomic_max[[:alnum:]]+ff]]({{.*}})
+ ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_max_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}})
+ ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+fiif]]({{.*}})
+ %call.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFMaxEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %0, i32 1, i32 896, float 1.000000e+00) #2
+ ret float %call.i.i.i
}
; Function Attrs: convergent
-; CHECK-LLVM: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float addrspace(1)*, i32, i32, float)
declare dso_local spir_func float @_Z21__spirv_AtomicFMaxEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)*, i32, i32, float) local_unnamed_addr #1
+; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
-; Function Attrs: convergent norecurse
-define weak_odr dso_local spir_kernel void @_ZTSZZ8max_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37(double addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, double addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 {
+; Function Attrs: convergent norecurse nounwind
+define dso_local spir_func double @_Z15AtomicDoubleMaxRd(double addrspace(4)* align 8 dereferenceable(8) %Arg) local_unnamed_addr #0 {
entry:
- %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0
- %1 = load i64, i64* %0, align 8
- %add.ptr.i29 = getelementptr inbounds double, double addrspace(1)* %_arg_, i64 %1
- %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0
- %3 = load i64, i64* %2, align 8
- %add.ptr.i = getelementptr inbounds double, double addrspace(1)* %_arg_4, i64 %3
- %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !18
- %5 = extractelement <3 x i64> %4, i64 0
- %conv.i = trunc i64 %5 to i32
- %conv3.i = sitofp i32 %conv.i to double
- %add.i = fadd double %conv3.i, 1.000000e+00
+ %0 = addrspacecast double addrspace(4)* %Arg to double addrspace(1)*
; CHECK-SPIRV: 7 AtomicFMaxEXT [[TYPE_FLOAT_64]]
- ; CHECK-LLVM: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+]]({{.*}})
- %call3.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFMaxEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %add.ptr.i29, i32 1, i32 896, double %add.i) #2
- %sext.i = shl i64 %5, 32
- %conv6.i = ashr exact i64 %sext.i, 32
- %ptridx.i.i = getelementptr inbounds double, double addrspace(1)* %add.ptr.i, i64 %conv6.i
- %ptridx.ascast.i.i = addrspacecast double addrspace(1)* %ptridx.i.i to double addrspace(4)*
- store double %call3.i.i.i, double addrspace(4)* %ptridx.ascast.i.i, align 8, !tbaa !27
- ret void
+ ; CHECK-LLVM-CL12: call spir_func double @[[DOUBLE_FUNC_NAME:_Z10atomic_max[[:alnum:]]+dd]]({{.*}})
+ ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_max_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}})
+ ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+diid]]({{.*}})
+ %call.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFMaxEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %0, i32 1, i32 896, double 1.000000e+00) #2
+ ret double %call.i.i.i
}
; Function Attrs: convergent
-; CHECK-LLVM: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double addrspace(1)*, i32, i32, double)
declare dso_local spir_func double @_Z21__spirv_AtomicFMaxEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)*, i32, i32, double) local_unnamed_addr #1
+; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
-attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
+; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
+; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
+
+attributes #0 = { convergent norecurse nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { convergent nounwind }
@@ -95,29 +68,5 @@ attributes #2 = { convergent nounwind }
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 1, i32 2}
!2 = !{i32 4, i32 100000}
-!3 = !{!"clang version 12.0.0"}
-!4 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1}
-!5 = !{!6, !8, !10, !12}
-!6 = distinct !{!6, !7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"}
-!7 = distinct !{!7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"}
-!8 = distinct !{!8, !9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"}
-!9 = distinct !{!9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"}
-!10 = distinct !{!10, !11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"}
-!11 = distinct !{!11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"}
-!12 = distinct !{!12, !13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"}
-!13 = distinct !{!13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"}
-!14 = !{!15, !15, i64 0}
-!15 = !{!"float", !16, i64 0}
-!16 = !{!"omnipotent char", !17, i64 0}
-!17 = !{!"Simple C++ TBAA"}
-!18 = !{!19, !21, !23, !25}
-!19 = distinct !{!19, !20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"}
-!20 = distinct !{!20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"}
-!21 = distinct !{!21, !22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"}
-!22 = distinct !{!22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"}
-!23 = distinct !{!23, !24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"}
-!24 = distinct !{!24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"}
-!25 = distinct !{!25, !26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"}
-!26 = distinct !{!26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"}
-!27 = !{!28, !28, i64 0}
-!28 = !{!"double", !16, i64 0}
+!3 = !{!"clang version 13.0.0"}
+
diff --git a/test/AtomicFMaxEXTForOCL.ll b/test/AtomicFMaxEXTForOCL.ll
new file mode 100644
index 00000000..1f2530d9
--- /dev/null
+++ b/test/AtomicFMaxEXTForOCL.ll
@@ -0,0 +1,64 @@
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_EXT_shader_atomic_float_min_max -o %t.spv
+; RUN: spirv-val %t.spv
+; RUN: llvm-spirv -to-text %t.spv -o %t.spt
+; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV
+
+; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20
+
+; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV
+
+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+target triple = "spir-unknown-unknown"
+
+; CHECK-SPIRV: Capability AtomicFloat32MinMaxEXT
+; CHECK-SPIRV: Capability AtomicFloat64MinMaxEXT
+; CHECK-SPIRV: Extension "SPV_EXT_shader_atomic_float_min_max"
+; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32
+; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64
+
+; Function Attrs: convergent norecurse nounwind
+define dso_local spir_func void @test_float(float addrspace(1)* %a) local_unnamed_addr #0 {
+entry:
+ ; CHECK-SPIRV: 7 AtomicFMaxEXT [[TYPE_FLOAT_32]]
+ ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_max_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}})
+ ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+fiif]]({{.*}})
+ %call = tail call spir_func float @_Z25atomic_fetch_max_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)* %a, float 0.000000e+00, i32 0) #2
+ ret void
+}
+
+; Function Attrs: convergent
+declare spir_func float @_Z25atomic_fetch_max_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)*, float, i32) local_unnamed_addr #1
+; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
+
+; Function Attrs: convergent norecurse nounwind
+define dso_local spir_func void @test_double(double addrspace(1)* %a) local_unnamed_addr #0 {
+entry:
+ ; CHECK-SPIRV: 7 AtomicFMaxEXT [[TYPE_FLOAT_64]]
+ ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_max_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}})
+ ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+diid]]({{.*}})
+ %call = tail call spir_func double @_Z25atomic_fetch_max_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)* %a, double 0.000000e+00, i32 0) #2
+ ret void
+}
+
+; Function Attrs: convergent
+declare spir_func double @_Z25atomic_fetch_max_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)*, double, i32) local_unnamed_addr #1
+; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
+
+; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
+; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
+
+attributes #0 = { convergent norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { convergent nounwind }
+
+!llvm.module.flags = !{!0}
+!opencl.ocl.version = !{!1}
+!opencl.spir.version = !{!1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 2, i32 0}
+!2 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 94aa388f0ce0723bb15503cf41c2c15b288375b9)"}
diff --git a/test/AtomicFMinEXT.ll b/test/AtomicFMinEXT.ll
index 98c98b8e..9e40a669 100644
--- a/test/AtomicFMinEXT.ll
+++ b/test/AtomicFMinEXT.ll
@@ -4,20 +4,16 @@
; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV
; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
-; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL12
-target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
-target triple = "spir64-unknown-unknown-sycldevice"
-
-%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
-%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] }
-%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
-
-$_ZTSZZ8min_testIfEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37 = comdat any
+; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20
-$_ZTSZZ8min_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37 = comdat any
+; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV
-@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
+target triple = "spir64-unknown-unknown-sycldevice"
; CHECK-SPIRV: Capability AtomicFloat32MinMaxEXT
; CHECK-SPIRV: Capability AtomicFloat64MinMaxEXT
@@ -25,65 +21,42 @@ $_ZTSZZ8min_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4item
; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32
; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64
-; Function Attrs: convergent norecurse
-define weak_odr dso_local spir_kernel void @_ZTSZZ8min_testIfEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37(float addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, float addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 {
+; Function Attrs: convergent norecurse nounwind
+define dso_local spir_func float @_Z14AtomicFloatMinRf(float addrspace(4)* align 4 dereferenceable(4) %Arg) local_unnamed_addr #0 {
entry:
- %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0
- %1 = load i64, i64* %0, align 8
- %add.ptr.i29 = getelementptr inbounds float, float addrspace(1)* %_arg_, i64 %1
- %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0
- %3 = load i64, i64* %2, align 8
- %add.ptr.i = getelementptr inbounds float, float addrspace(1)* %_arg_4, i64 %3
- %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !5
- %5 = extractelement <3 x i64> %4, i64 0
- %conv.i = trunc i64 %5 to i32
- %conv3.i = sitofp i32 %conv.i to float
- %add.i = fadd float %conv3.i, 1.000000e+00
+ %0 = addrspacecast float addrspace(4)* %Arg to float addrspace(1)*
; CHECK-SPIRV: 7 AtomicFMinEXT [[TYPE_FLOAT_32]]
- ; CHECK-LLVM: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+]]({{.*}})
- %call3.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFMinEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %add.ptr.i29, i32 1, i32 896, float %add.i) #2
- %sext.i = shl i64 %5, 32
- %conv6.i = ashr exact i64 %sext.i, 32
- %ptridx.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i, i64 %conv6.i
- %ptridx.ascast.i.i = addrspacecast float addrspace(1)* %ptridx.i.i to float addrspace(4)*
- store float %call3.i.i.i, float addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !14
- ret void
+ ; CHECK-LLVM-CL12: call spir_func float @[[FLOAT_FUNC_NAME:_Z10atomic_min[[:alnum:]]+ff]]({{.*}})
+ ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_min_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}})
+ ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+fiif]]({{.*}})
+ %call.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFMinEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %0, i32 1, i32 896, float 1.000000e+00) #2
+ ret float %call.i.i.i
}
; Function Attrs: convergent
-; CHECK-LLVM: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float addrspace(1)*, i32, i32, float)
declare dso_local spir_func float @_Z21__spirv_AtomicFMinEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)*, i32, i32, float) local_unnamed_addr #1
+; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
-; Function Attrs: convergent norecurse
-define weak_odr dso_local spir_kernel void @_ZTSZZ8min_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37(double addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, double addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 {
+; Function Attrs: convergent norecurse nounwind
+define dso_local spir_func double @_Z15AtomicDoubleMinRd(double addrspace(4)* align 8 dereferenceable(8) %Arg) local_unnamed_addr #0 {
entry:
- %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0
- %1 = load i64, i64* %0, align 8
- %add.ptr.i29 = getelementptr inbounds double, double addrspace(1)* %_arg_, i64 %1
- %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0
- %3 = load i64, i64* %2, align 8
- %add.ptr.i = getelementptr inbounds double, double addrspace(1)* %_arg_4, i64 %3
- %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !18
- %5 = extractelement <3 x i64> %4, i64 0
- %conv.i = trunc i64 %5 to i32
- %conv3.i = sitofp i32 %conv.i to double
- %add.i = fadd double %conv3.i, 1.000000e+00
+ %0 = addrspacecast double addrspace(4)* %Arg to double addrspace(1)*
; CHECK-SPIRV: 7 AtomicFMinEXT [[TYPE_FLOAT_64]]
- ; CHECK-LLVM: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+]]({{.*}})
- %call3.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFMinEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %add.ptr.i29, i32 1, i32 896, double %add.i) #2
- %sext.i = shl i64 %5, 32
- %conv6.i = ashr exact i64 %sext.i, 32
- %ptridx.i.i = getelementptr inbounds double, double addrspace(1)* %add.ptr.i, i64 %conv6.i
- %ptridx.ascast.i.i = addrspacecast double addrspace(1)* %ptridx.i.i to double addrspace(4)*
- store double %call3.i.i.i, double addrspace(4)* %ptridx.ascast.i.i, align 8, !tbaa !27
- ret void
+ ; CHECK-LLVM-CL12: call spir_func double @[[DOUBLE_FUNC_NAME:_Z10atomic_min[[:alnum:]]+dd]]({{.*}})
+ ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_min_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}})
+ ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+diid]]({{.*}})
+ %call.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFMinEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %0, i32 1, i32 896, double 1.000000e+00) #2
+ ret double %call.i.i.i
}
; Function Attrs: convergent
-; CHECK-LLVM: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double addrspace(1)*, i32, i32, double)
declare dso_local spir_func double @_Z21__spirv_AtomicFMinEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)*, i32, i32, double) local_unnamed_addr #1
+; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
-attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
+; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
+; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
+
+attributes #0 = { convergent norecurse nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { convergent nounwind }
@@ -95,29 +68,5 @@ attributes #2 = { convergent nounwind }
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 1, i32 2}
!2 = !{i32 4, i32 100000}
-!3 = !{!"clang version 12.0.0 (https://github.com/otcshare/llvm.git 67add71766d55d6a8d8d894822f583d6365a3b7d)"}
-!4 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1}
-!5 = !{!6, !8, !10, !12}
-!6 = distinct !{!6, !7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"}
-!7 = distinct !{!7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"}
-!8 = distinct !{!8, !9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"}
-!9 = distinct !{!9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"}
-!10 = distinct !{!10, !11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"}
-!11 = distinct !{!11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"}
-!12 = distinct !{!12, !13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"}
-!13 = distinct !{!13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"}
-!14 = !{!15, !15, i64 0}
-!15 = !{!"float", !16, i64 0}
-!16 = !{!"omnipotent char", !17, i64 0}
-!17 = !{!"Simple C++ TBAA"}
-!18 = !{!19, !21, !23, !25}
-!19 = distinct !{!19, !20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"}
-!20 = distinct !{!20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"}
-!21 = distinct !{!21, !22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"}
-!22 = distinct !{!22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"}
-!23 = distinct !{!23, !24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"}
-!24 = distinct !{!24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"}
-!25 = distinct !{!25, !26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"}
-!26 = distinct !{!26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"}
-!27 = !{!28, !28, i64 0}
-!28 = !{!"double", !16, i64 0}
+!3 = !{!"clang version 13.0.0"}
+
diff --git a/test/AtomicFMinEXTForOCL.ll b/test/AtomicFMinEXTForOCL.ll
new file mode 100644
index 00000000..6196b0f8
--- /dev/null
+++ b/test/AtomicFMinEXTForOCL.ll
@@ -0,0 +1,64 @@
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_EXT_shader_atomic_float_min_max -o %t.spv
+; RUN: spirv-val %t.spv
+; RUN: llvm-spirv -to-text %t.spv -o %t.spt
+; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV
+
+; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20
+
+; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV
+
+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+target triple = "spir-unknown-unknown"
+
+; CHECK-SPIRV: Capability AtomicFloat32MinMaxEXT
+; CHECK-SPIRV: Capability AtomicFloat64MinMaxEXT
+; CHECK-SPIRV: Extension "SPV_EXT_shader_atomic_float_min_max"
+; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32
+; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64
+
+; Function Attrs: convergent norecurse nounwind
+define dso_local spir_func void @test_float(float addrspace(1)* %a) local_unnamed_addr #0 {
+entry:
+ ; CHECK-SPIRV: 7 AtomicFMinEXT [[TYPE_FLOAT_32]]
+ ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_min_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}})
+ ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+fiif]]({{.*}})
+ %call = tail call spir_func float @_Z25atomic_fetch_min_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)* %a, float 0.000000e+00, i32 0) #2
+ ret void
+}
+
+; Function Attrs: convergent
+declare spir_func float @_Z25atomic_fetch_min_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)*, float, i32) local_unnamed_addr #1
+; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
+
+; Function Attrs: convergent norecurse nounwind
+define dso_local spir_func void @test_double(double addrspace(1)* %a) local_unnamed_addr #0 {
+entry:
+ ; CHECK-SPIRV: 7 AtomicFMinEXT [[TYPE_FLOAT_64]]
+ ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_min_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}})
+ ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+diid]]({{.*}})
+ %call = tail call spir_func double @_Z25atomic_fetch_min_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)* %a, double 0.000000e+00, i32 0) #2
+ ret void
+}
+
+; Function Attrs: convergent
+declare spir_func double @_Z25atomic_fetch_min_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)*, double, i32) local_unnamed_addr #1
+; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
+
+; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
+; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
+
+attributes #0 = { convergent norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { convergent nounwind }
+
+!llvm.module.flags = !{!0}
+!opencl.ocl.version = !{!1}
+!opencl.spir.version = !{!1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 2, i32 0}
+!2 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 94aa388f0ce0723bb15503cf41c2c15b288375b9)"}
diff --git a/test/InvalidAtomicBuiltins.cl b/test/InvalidAtomicBuiltins.cl
index b8ec5b89..2182f070 100644
--- a/test/InvalidAtomicBuiltins.cl
+++ b/test/InvalidAtomicBuiltins.cl
@@ -41,13 +41,9 @@ float __attribute__((overloadable)) atomic_fetch_xor(volatile generic atomic_flo
double __attribute__((overloadable)) atomic_fetch_and(volatile generic atomic_double *object, double operand, memory_order order);
double __attribute__((overloadable)) atomic_fetch_max(volatile generic atomic_double *object, double operand, memory_order order);
double __attribute__((overloadable)) atomic_fetch_min(volatile generic atomic_double *object, double operand, memory_order order);
-float __attribute__((overloadable)) atomic_fetch_add_explicit(volatile generic atomic_float *object, float operand, memory_order order);
-float __attribute__((overloadable)) atomic_fetch_sub_explicit(volatile generic atomic_float *object, float operand, memory_order order);
float __attribute__((overloadable)) atomic_fetch_or_explicit(volatile generic atomic_float *object, float operand, memory_order order);
float __attribute__((overloadable)) atomic_fetch_xor_explicit(volatile generic atomic_float *object, float operand, memory_order order);
double __attribute__((overloadable)) atomic_fetch_and_explicit(volatile generic atomic_double *object, double operand, memory_order order);
-double __attribute__((overloadable)) atomic_fetch_max_explicit(volatile generic atomic_double *object, double operand, memory_order order);
-double __attribute__((overloadable)) atomic_fetch_min_explicit(volatile generic atomic_double *object, double operand, memory_order order);
__kernel void test_atomic_fn(volatile __global float *p,
volatile __global double *pp,
@@ -86,11 +82,7 @@ __kernel void test_atomic_fn(volatile __global float *p,
d = atomic_fetch_and(pp, val, order);
d = atomic_fetch_min(pp, val, order);
d = atomic_fetch_max(pp, val, order);
- f = atomic_fetch_add_explicit(p, val, order);
- f = atomic_fetch_sub_explicit(p, val, order);
f = atomic_fetch_or_explicit(p, val, order);
f = atomic_fetch_xor_explicit(p, val, order);
d = atomic_fetch_and_explicit(pp, val, order);
- d = atomic_fetch_min_explicit(pp, val, order);
- d = atomic_fetch_max_explicit(pp, val, order);
}
--
2.17.1

View File

@ -1,35 +0,0 @@
From cfb18b75e8a353bc7486f337541476a36994b063 Mon Sep 17 00:00:00 2001
From: juanrod2 <>
Date: Tue, 22 Dec 2020 08:33:08 +0800
Subject: [PATCH 3/7] Memory leak fix for Managed Static Mutex
Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/llvm/0001-Memory-leak-fix-for-Managed-Static-Mutex.patch]
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
Cleaning a mutex inside ManagedStatic llvm class.
---
llvm/lib/Support/ManagedStatic.cpp | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Support/ManagedStatic.cpp b/llvm/lib/Support/ManagedStatic.cpp
index 053493f72fb5..6571580ccecf 100644
--- a/llvm/lib/Support/ManagedStatic.cpp
+++ b/llvm/lib/Support/ManagedStatic.cpp
@@ -76,8 +76,12 @@ void ManagedStaticBase::destroy() const {
/// llvm_shutdown - Deallocate and destroy all ManagedStatic variables.
void llvm::llvm_shutdown() {
- std::lock_guard<std::recursive_mutex> Lock(*getManagedStaticMutex());
+ getManagedStaticMutex()->lock();
while (StaticList)
StaticList->destroy();
+
+ getManagedStaticMutex()->unlock();
+ delete ManagedStaticMutex;
+ ManagedStaticMutex = nullptr;
}
--
2.17.1

View File

@ -1,49 +0,0 @@
From b794037bf1f90a93efa4c542855ad569cb13b4c5 Mon Sep 17 00:00:00 2001
From: Feng Zou <feng.zou@intel.com>
Date: Mon, 19 Oct 2020 14:43:38 +0800
Subject: [PATCH 4/7] Remove repo name in LLVM IR
Upstream-Status: Backport [Taken from opencl-clang patches, https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/llvm/0003-Remove-repo-name-in-LLVM-IR.patch]
Signed-off-by: Feng Zou <feng.zou@intel.com>
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
llvm/cmake/modules/VersionFromVCS.cmake | 23 ++++++++++++-----------
1 file changed, 12 insertions(+), 11 deletions(-)
diff --git a/llvm/cmake/modules/VersionFromVCS.cmake b/llvm/cmake/modules/VersionFromVCS.cmake
index 1b6519b4b7c4..8fd6b23bb345 100644
--- a/llvm/cmake/modules/VersionFromVCS.cmake
+++ b/llvm/cmake/modules/VersionFromVCS.cmake
@@ -33,17 +33,18 @@ function(get_source_info path revision repository)
else()
set(remote "origin")
endif()
- execute_process(COMMAND ${GIT_EXECUTABLE} remote get-url ${remote}
- WORKING_DIRECTORY ${path}
- RESULT_VARIABLE git_result
- OUTPUT_VARIABLE git_output
- ERROR_QUIET)
- if(git_result EQUAL 0)
- string(STRIP "${git_output}" git_output)
- set(${repository} ${git_output} PARENT_SCOPE)
- else()
- set(${repository} ${path} PARENT_SCOPE)
- endif()
+ # Do not show repo name in IR
+ # execute_process(COMMAND ${GIT_EXECUTABLE} remote get-url ${remote}
+ # WORKING_DIRECTORY ${path}
+ # RESULT_VARIABLE git_result
+ # OUTPUT_VARIABLE git_output
+ # ERROR_QUIET)
+ # if(git_result EQUAL 0)
+ # string(STRIP "${git_output}" git_output)
+ # set(${repository} ${git_output} PARENT_SCOPE)
+ # else()
+ # set(${repository} ${path} PARENT_SCOPE)
+ # endif()
endif()
endif()
endfunction()
--
2.17.1

View File

@ -1,47 +0,0 @@
From 3dd4766499d25e5978a5d90001f18e657e875da0 Mon Sep 17 00:00:00 2001
From: haonanya <haonan.yang@intel.com>
Date: Thu, 12 Aug 2021 15:48:34 +0800
Subject: [PATCH 5/7] Remove __IMAGE_SUPPORT__ macro for SPIR since SPIR
doesn't require image support
Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/clang/0003-Remove-__IMAGE_SUPPORT__-macro-for-SPIR.patch]
Signed-off-by: haonanya <haonan.yang@intel.com>
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
clang/lib/Frontend/InitPreprocessor.cpp | 3 ---
clang/test/Preprocessor/predefined-macros.c | 4 ----
2 files changed, 7 deletions(-)
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index aefd208e6cd3..b4a84636673a 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -1108,9 +1108,6 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
if (TI.getSupportedOpenCLOpts().isSupported(#Ext)) \
Builder.defineMacro(#Ext);
#include "clang/Basic/OpenCLExtensions.def"
-
- if (TI.getTriple().isSPIR())
- Builder.defineMacro("__IMAGE_SUPPORT__");
}
if (TI.hasInt128Type() && LangOpts.CPlusPlus && LangOpts.GNUMode) {
diff --git a/clang/test/Preprocessor/predefined-macros.c b/clang/test/Preprocessor/predefined-macros.c
index b088a37ba665..39a222d02faf 100644
--- a/clang/test/Preprocessor/predefined-macros.c
+++ b/clang/test/Preprocessor/predefined-macros.c
@@ -184,10 +184,6 @@
// MSCOPE:#define __OPENCL_MEMORY_SCOPE_WORK_GROUP 1
// MSCOPE:#define __OPENCL_MEMORY_SCOPE_WORK_ITEM 0
-// RUN: %clang_cc1 %s -E -dM -o - -x cl -triple spir-unknown-unknown \
-// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-SPIR
-// CHECK-SPIR: #define __IMAGE_SUPPORT__ 1
-
// RUN: %clang_cc1 %s -E -dM -o - -x hip -triple amdgcn-amd-amdhsa \
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-HIP
// CHECK-HIP-NOT: #define __CUDA_ARCH__
--
2.17.1

View File

@ -1,53 +0,0 @@
From 2c53abd0008bbecfcfe871c6060f4bbf1c94c74a Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Thu, 1 Apr 2021 18:41:44 +0200
Subject: [PATCH 6/7] Avoid calling ParseCommandLineOptions in BackendUtil if
possible
Calling `ParseCommandLineOptions` should only be called from `main` as the
CommandLine setup code isn't thread-safe. As BackendUtil is part of the
generic Clang FrontendAction logic, a process which has several threads executing
Clang FrontendActions will randomly crash in the unsafe setup code.
This patch avoids calling the function unless either the debug-pass option or
limit-float-precision option is set. Without these two options set the
`ParseCommandLineOptions` call doesn't do anything beside parsing
the command line `clang` which doesn't set any options.
See also D99652 where LLDB received a workaround for this crash.
Reviewed By: JDevlieghere
Differential Revision: https://reviews.llvm.org/D99740
Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/clang/0004-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch]
Signed-off-by: Raphael Isemann <teemperor@gmail.com>
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
clang/lib/CodeGen/BackendUtil.cpp | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 0bfcab88a3a9..db8fd4166d7a 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -743,7 +743,15 @@ static void setCommandLineOpts(const CodeGenOptions &CodeGenOpts) {
BackendArgs.push_back("-limit-float-precision");
BackendArgs.push_back(CodeGenOpts.LimitFloatPrecision.c_str());
}
+ // Check for the default "clang" invocation that won't set any cl::opt values.
+ // Skip trying to parse the command line invocation to avoid the issues
+ // described below.
+ if (BackendArgs.size() == 1)
+ return;
BackendArgs.push_back(nullptr);
+ // FIXME: The command line parser below is not thread-safe and shares a global
+ // state, so this call might crash or overwrite the options of another Clang
+ // instance in the same process.
llvm::cl::ParseCommandLineOptions(BackendArgs.size() - 1,
BackendArgs.data());
}
--
2.17.1

View File

@ -1,377 +0,0 @@
From a685de6fc45afcdbe4a7120e9d5b33e175dd71cd Mon Sep 17 00:00:00 2001
From: haonanya <haonan.yang@intel.com>
Date: Fri, 13 Aug 2021 10:00:02 +0800
Subject: [PATCH 7/7] support cl_ext_float_atomics
Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/clang/0005-OpenCL-support-cl_ext_float_atomics.patch]
Signed-off-by: haonanya <haonan.yang@intel.com>
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
clang/lib/Headers/opencl-c-base.h | 25 ++++
clang/lib/Headers/opencl-c.h | 208 ++++++++++++++++++++++++++
clang/test/Headers/opencl-c-header.cl | 96 ++++++++++++
3 files changed, 329 insertions(+)
diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h
index 2cc688ccc3da..86bbee12fdf8 100644
--- a/clang/lib/Headers/opencl-c-base.h
+++ b/clang/lib/Headers/opencl-c-base.h
@@ -14,6 +14,31 @@
#define CL_VERSION_3_0 300
#endif
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
+// For SPIR all extensions are supported.
+#if defined(__SPIR__)
+#define cl_ext_float_atomics 1
+#ifdef cl_khr_fp16
+#define __opencl_c_ext_fp16_global_atomic_load_store 1
+#define __opencl_c_ext_fp16_local_atomic_load_store 1
+#define __opencl_c_ext_fp16_global_atomic_add 1
+#define __opencl_c_ext_fp16_local_atomic_add 1
+#define __opencl_c_ext_fp16_global_atomic_min_max 1
+#define __opencl_c_ext_fp16_local_atomic_min_max 1
+#endif
+#ifdef __opencl_c_fp64
+#define __opencl_c_ext_fp64_global_atomic_add 1
+#define __opencl_c_ext_fp64_local_atomic_add 1
+#define __opencl_c_ext_fp64_global_atomic_min_max 1
+#define __opencl_c_ext_fp64_local_atomic_min_max 1
+#endif
+#define __opencl_c_ext_fp32_global_atomic_add 1
+#define __opencl_c_ext_fp32_local_atomic_add 1
+#define __opencl_c_ext_fp32_global_atomic_min_max 1
+#define __opencl_c_ext_fp32_local_atomic_min_max 1
+#endif // defined(__SPIR__)
+#endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
+
// Define features for 2.0 for header backward compatibility
#ifndef __opencl_c_int64
#define __opencl_c_int64 1
diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index 67d900eb1c3d..b463e702d95e 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -14354,6 +14354,214 @@ intptr_t __ovld atomic_fetch_max_explicit(
// defined(cl_khr_int64_extended_atomics)
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
+#if defined(cl_ext_float_atomics)
+
+#if defined(__opencl_c_ext_fp32_global_atomic_min_max)
+float __ovld atomic_fetch_min(volatile __global atomic_float *object,
+ float operand);
+float __ovld atomic_fetch_max(volatile __global atomic_float *object,
+ float operand);
+float __ovld atomic_fetch_min_explicit(volatile __global atomic_float *object,
+ float operand, memory_order order);
+float __ovld atomic_fetch_max_explicit(volatile __global atomic_float *object,
+ float operand, memory_order order);
+float __ovld atomic_fetch_min_explicit(volatile __global atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+float __ovld atomic_fetch_max_explicit(volatile __global atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max)
+
+#if defined(__opencl_c_ext_fp32_local_atomic_min_max)
+float __ovld atomic_fetch_min(volatile __local atomic_float *object,
+ float operand);
+float __ovld atomic_fetch_max(volatile __local atomic_float *object,
+ float operand);
+float __ovld atomic_fetch_min_explicit(volatile __local atomic_float *object,
+ float operand, memory_order order);
+float __ovld atomic_fetch_max_explicit(volatile __local atomic_float *object,
+ float operand, memory_order order);
+float __ovld atomic_fetch_min_explicit(volatile __local atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+float __ovld atomic_fetch_max_explicit(volatile __local atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp32_local_atomic_min_max)
+
+#if defined(__opencl_c_ext_fp32_global_atomic_min_max) || \
+ defined(__opencl_c_ext_fp32_local_atomic_min_max)
+float __ovld atomic_fetch_min(volatile atomic_float *object, float operand);
+float __ovld atomic_fetch_max(volatile atomic_float *object, float operand);
+float __ovld atomic_fetch_min_explicit(volatile atomic_float *object,
+ float operand, memory_order order);
+float __ovld atomic_fetch_max_explicit(volatile atomic_float *object,
+ float operand, memory_order order);
+float __ovld atomic_fetch_min_explicit(volatile atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+float __ovld atomic_fetch_max_explicit(volatile atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) || \
+ defined(__opencl_c_ext_fp32_local_atomic_min_max)
+
+#if defined(__opencl_c_ext_fp64_global_atomic_min_max)
+double __ovld atomic_fetch_min(volatile __global atomic_double *object,
+ double operand);
+double __ovld atomic_fetch_max(volatile __global atomic_double *object,
+ double operand);
+double __ovld atomic_fetch_min_explicit(volatile __global atomic_double *object,
+ double operand, memory_order order);
+double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *object,
+ double operand, memory_order order);
+double __ovld atomic_fetch_min_explicit(volatile __global atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max)
+
+#if defined(__opencl_c_ext_fp64_local_atomic_min_max)
+double __ovld atomic_fetch_min(volatile __local atomic_double *object,
+ double operand);
+double __ovld atomic_fetch_max(volatile __local atomic_double *object,
+ double operand);
+double __ovld atomic_fetch_min_explicit(volatile __local atomic_double *object,
+ double operand, memory_order order);
+double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *object,
+ double operand, memory_order order);
+double __ovld atomic_fetch_min_explicit(volatile __local atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp64_local_atomic_min_max)
+
+#if defined(__opencl_c_ext_fp64_global_atomic_min_max) || \
+ defined(__opencl_c_ext_fp64_local_atomic_min_max)
+double __ovld atomic_fetch_min(volatile atomic_double *object, double operand);
+double __ovld atomic_fetch_max(volatile atomic_double *object, double operand);
+double __ovld atomic_fetch_min_explicit(volatile atomic_double *object,
+ double operand, memory_order order);
+double __ovld atomic_fetch_max_explicit(volatile atomic_double *object,
+ double operand, memory_order order);
+double __ovld atomic_fetch_min_explicit(volatile atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+double __ovld atomic_fetch_max_explicit(volatile atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) || \
+ defined(__opencl_c_ext_fp64_local_atomic_min_max)
+
+#if defined(__opencl_c_ext_fp32_global_atomic_add)
+float __ovld atomic_fetch_add(volatile __global atomic_float *object,
+ float operand);
+float __ovld atomic_fetch_sub(volatile __global atomic_float *object,
+ float operand);
+float __ovld atomic_fetch_add_explicit(volatile __global atomic_float *object,
+ float operand, memory_order order);
+float __ovld atomic_fetch_sub_explicit(volatile __global atomic_float *object,
+ float operand, memory_order order);
+float __ovld atomic_fetch_add_explicit(volatile __global atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+float __ovld atomic_fetch_sub_explicit(volatile __global atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp32_global_atomic_add)
+
+#if defined(__opencl_c_ext_fp32_local_atomic_add)
+float __ovld atomic_fetch_add(volatile __local atomic_float *object,
+ float operand);
+float __ovld atomic_fetch_sub(volatile __local atomic_float *object,
+ float operand);
+float __ovld atomic_fetch_add_explicit(volatile __local atomic_float *object,
+ float operand, memory_order order);
+float __ovld atomic_fetch_sub_explicit(volatile __local atomic_float *object,
+ float operand, memory_order order);
+float __ovld atomic_fetch_add_explicit(volatile __local atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+float __ovld atomic_fetch_sub_explicit(volatile __local atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp32_local_atomic_add)
+
+#if defined(__opencl_c_ext_fp32_global_atomic_add) || \
+ defined(__opencl_c_ext_fp32_local_atomic_add)
+float __ovld atomic_fetch_add(volatile atomic_float *object, float operand);
+float __ovld atomic_fetch_sub(volatile atomic_float *object, float operand);
+float __ovld atomic_fetch_add_explicit(volatile atomic_float *object,
+ float operand, memory_order order);
+float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object,
+ float operand, memory_order order);
+float __ovld atomic_fetch_add_explicit(volatile atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp32_global_atomic_add) || \
+ defined(__opencl_c_ext_fp32_local_atomic_add)
+
+#if defined(__opencl_c_ext_fp64_global_atomic_add)
+double __ovld atomic_fetch_add(volatile __global atomic_double *object,
+ double operand);
+double __ovld atomic_fetch_sub(volatile __global atomic_double *object,
+ double operand);
+double __ovld atomic_fetch_add_explicit(volatile __global atomic_double *object,
+ double operand, memory_order order);
+double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *object,
+ double operand, memory_order order);
+double __ovld atomic_fetch_add_explicit(volatile __global atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp64_global_atomic_add)
+
+#if defined(__opencl_c_ext_fp64_local_atomic_add)
+double __ovld atomic_fetch_add(volatile __local atomic_double *object,
+ double operand);
+double __ovld atomic_fetch_sub(volatile __local atomic_double *object,
+ double operand);
+double __ovld atomic_fetch_add_explicit(volatile __local atomic_double *object,
+ double operand, memory_order order);
+double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *object,
+ double operand, memory_order order);
+double __ovld atomic_fetch_add_explicit(volatile __local atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp64_local_atomic_add)
+
+#if defined(__opencl_c_ext_fp64_global_atomic_add) || \
+ defined(__opencl_c_ext_fp64_local_atomic_add)
+double __ovld atomic_fetch_add(volatile atomic_double *object, double operand);
+double __ovld atomic_fetch_sub(volatile atomic_double *object, double operand);
+double __ovld atomic_fetch_add_explicit(volatile atomic_double *object,
+ double operand, memory_order order);
+double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object,
+ double operand, memory_order order);
+double __ovld atomic_fetch_add_explicit(volatile atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp64_global_atomic_add) || \
+ defined(__opencl_c_ext_fp64_local_atomic_add)
+
+#endif // cl_ext_float_atomics
+
// atomic_store()
#if defined(__opencl_c_atomic_scope_device) && \
diff --git a/clang/test/Headers/opencl-c-header.cl b/clang/test/Headers/opencl-c-header.cl
index 2716076acdcf..7f720cf28142 100644
--- a/clang/test/Headers/opencl-c-header.cl
+++ b/clang/test/Headers/opencl-c-header.cl
@@ -98,3 +98,99 @@ global atomic_int z = ATOMIC_VAR_INIT(99);
#pragma OPENCL EXTENSION cl_intel_planar_yuv : enable
// CHECK-MOD: Reading modules
+
+// For SPIR all extensions are supported.
+#if defined(__SPIR__)
+
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
+
+#if __opencl_c_ext_fp16_global_atomic_load_store != 1
+#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_load_store"
+#endif
+#if __opencl_c_ext_fp16_local_atomic_load_store != 1
+#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_load_store"
+#endif
+#if __opencl_c_ext_fp16_global_atomic_add != 1
+#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_add"
+#endif
+#if __opencl_c_ext_fp32_global_atomic_add != 1
+#error "Incorrectly defined __opencl_c_ext_fp32_global_atomic_add"
+#endif
+#if __opencl_c_ext_fp64_global_atomic_add != 1
+#error "Incorrectly defined __opencl_c_ext_fp64_global_atomic_add"
+#endif
+#if __opencl_c_ext_fp16_local_atomic_add != 1
+#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_add"
+#endif
+#if __opencl_c_ext_fp32_local_atomic_add != 1
+#error "Incorrectly defined __opencl_c_ext_fp32_local_atomic_add"
+#endif
+#if __opencl_c_ext_fp64_local_atomic_add != 1
+#error "Incorrectly defined __opencl_c_ext_fp64_local_atomic_add"
+#endif
+#if __opencl_c_ext_fp16_global_atomic_min_max != 1
+#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_min_max"
+#endif
+#if __opencl_c_ext_fp32_global_atomic_min_max != 1
+#error "Incorrectly defined __opencl_c_ext_fp32_global_atomic_min_max"
+#endif
+#if __opencl_c_ext_fp64_global_atomic_min_max != 1
+#error "Incorrectly defined __opencl_c_ext_fp64_global_atomic_min_max"
+#endif
+#if __opencl_c_ext_fp16_local_atomic_min_max != 1
+#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_min_max"
+#endif
+#if __opencl_c_ext_fp32_local_atomic_min_max != 1
+#error "Incorrectly defined __opencl_c_ext_fp32_local_atomic_min_max"
+#endif
+#if __opencl_c_ext_fp64_local_atomic_min_max != 1
+#error "Incorrectly defined __opencl_c_ext_fp64_local_atomic_min_max"
+#endif
+#else
+
+#ifdef __opencl_c_ext_fp16_global_atomic_load_store
+#error "Incorrectly __opencl_c_ext_fp16_global_atomic_load_store defined"
+#endif
+#ifdef __opencl_c_ext_fp16_local_atomic_load_store
+#error "Incorrectly __opencl_c_ext_fp16_local_atomic_load_store defined"
+#endif
+#ifdef __opencl_c_ext_fp16_global_atomic_add
+#error "Incorrectly __opencl_c_ext_fp16_global_atomic_add defined"
+#endif
+#ifdef __opencl_c_ext_fp32_global_atomic_add
+#error "Incorrectly __opencl_c_ext_fp32_global_atomic_add defined"
+#endif
+#ifdef __opencl_c_ext_fp64_global_atomic_add
+#error "Incorrectly __opencl_c_ext_fp64_global_atomic_add defined"
+#endif
+#ifdef __opencl_c_ext_fp16_local_atomic_add
+#error "Incorrectly __opencl_c_ext_fp16_local_atomic_add defined"
+#endif
+#ifdef __opencl_c_ext_fp32_local_atomic_add
+#error "Incorrectly __opencl_c_ext_fp32_local_atomic_add defined"
+#endif
+#ifdef __opencl_c_ext_fp64_local_atomic_add
+#error "Incorrectly __opencl_c_ext_fp64_local_atomic_add defined"
+#endif
+#ifdef __opencl_c_ext_fp16_global_atomic_min_max
+#error "Incorrectly __opencl_c_ext_fp16_global_atomic_min_max defined"
+#endif
+#ifdef __opencl_c_ext_fp32_global_atomic_min_max
+#error "Incorrectly __opencl_c_ext_fp32_global_atomic_min_max defined"
+#endif
+#ifdef __opencl_c_ext_fp64_global_atomic_min_max
+#error "Incorrectly __opencl_c_ext_fp64_global_atomic_min_max defined"
+#endif
+#ifdef __opencl_c_ext_fp16_local_atomic_min_max
+#error "Incorrectly __opencl_c_ext_fp16_local_atomic_min_max defined"
+#endif
+#ifdef __opencl_c_ext_fp32_local_atomic_min_max
+#error "Incorrectly __opencl_c_ext_fp32_local_atomic_min_max defined"
+#endif
+#ifdef __opencl_c_ext_fp64_local_atomic_min_max
+#error "Incorrectly __opencl_c_ext_fp64_local_atomic_min_max defined"
+#endif
+
+#endif //(defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
+
+#endif // defined(__SPIR__)
--
2.17.1

View File

@ -1,96 +0,0 @@
From 294ca2fd69a077b35acec9d498120d6cb0324dae Mon Sep 17 00:00:00 2001
From: Naveen Saini <naveen.kumar.saini@intel.com>
Date: Fri, 27 Aug 2021 11:53:27 +0800
Subject: [PATCH 1/2] This patch is required to fix the crash referenced to in
#1767
It is a port of the following llvm 11.0 commit : https://reviews.llvm.org/D76994.
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/41f13f1f64d2074ae7512fb23656c22585e912bd]
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
.../CodeGen/SelectionDAG/LegalizeTypes.cpp | 3 +-
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 31 ++++++++++++-------
2 files changed, 21 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 63ddb59fce68..822da2183269 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -173,7 +173,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
}
}
}
-
+#ifndef NDEBUG
// Checked that NewNodes are only used by other NewNodes.
for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) {
SDNode *N = NewNodes[i];
@@ -181,6 +181,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
UI != UE; ++UI)
assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!");
}
+#endif
}
/// This is the main entry point for the type legalizer. This does a top-down
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index faae14444d51..b908c5c58e9f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -155,7 +155,9 @@ private:
const SDValue &getSDValue(TableId &Id) {
RemapId(Id);
assert(Id && "TableId should be non-zero");
- return IdToValueMap[Id];
+ auto I = IdToValueMap.find(Id);
+ assert(I != IdToValueMap.end() && "cannot find Id in map");
+ return I->second;
}
public:
@@ -172,24 +174,29 @@ public:
bool run();
void NoteDeletion(SDNode *Old, SDNode *New) {
+ assert(Old != New && "node replaced with self");
for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) {
TableId NewId = getTableId(SDValue(New, i));
TableId OldId = getTableId(SDValue(Old, i));
- if (OldId != NewId)
+ if (OldId != NewId) {
ReplacedValues[OldId] = NewId;
- // Delete Node from tables.
+ // Delete Node from tables. We cannot do this when OldId == NewId,
+ // because NewId can still have table references to it in
+ // ReplacedValues.
+ IdToValueMap.erase(OldId);
+ PromotedIntegers.erase(OldId);
+ ExpandedIntegers.erase(OldId);
+ SoftenedFloats.erase(OldId);
+ PromotedFloats.erase(OldId);
+ ExpandedFloats.erase(OldId);
+ ScalarizedVectors.erase(OldId);
+ SplitVectors.erase(OldId);
+ WidenedVectors.erase(OldId);
+ }
+
ValueToIdMap.erase(SDValue(Old, i));
- IdToValueMap.erase(OldId);
- PromotedIntegers.erase(OldId);
- ExpandedIntegers.erase(OldId);
- SoftenedFloats.erase(OldId);
- PromotedFloats.erase(OldId);
- ExpandedFloats.erase(OldId);
- ScalarizedVectors.erase(OldId);
- SplitVectors.erase(OldId);
- WidenedVectors.erase(OldId);
}
}
--
2.17.1

View File

@ -1,105 +0,0 @@
From d266087e8dba9e8fd4984e1cb85c20376e2c8ea3 Mon Sep 17 00:00:00 2001
From: Naveen Saini <naveen.kumar.saini@intel.com>
Date: Fri, 27 Aug 2021 11:56:01 +0800
Subject: [PATCH 2/2] This patch is a fix for #1788.
It is a port of the following llvm 11.0 commit: https://reviews.llvm.org/D81698
This also needed part of another llvm 11.0 commit: https://reviews.llvm.org/D72975
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/aeb50448019ce1b1002f3781f9647d486320d83c]
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
llvm/include/llvm/IR/PatternMatch.h | 22 ++++++++++++---
.../InstCombine/InstructionCombining.cpp | 27 +++++++++++++++++--
2 files changed, 44 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 6621fc9f819c..fb7ad93519f6 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -152,8 +152,10 @@ inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) {
struct apint_match {
const APInt *&Res;
+ bool AllowUndef;
- apint_match(const APInt *&R) : Res(R) {}
+ apint_match(const APInt *&Res, bool AllowUndef)
+ : Res(Res), AllowUndef(AllowUndef) {}
template <typename ITy> bool match(ITy *V) {
if (auto *CI = dyn_cast<ConstantInt>(V)) {
@@ -162,7 +164,8 @@ struct apint_match {
}
if (V->getType()->isVectorTy())
if (const auto *C = dyn_cast<Constant>(V))
- if (auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) {
+ if (auto *CI = dyn_cast_or_null<ConstantInt>(
+ C->getSplatValue(AllowUndef))) {
Res = &CI->getValue();
return true;
}
@@ -192,7 +195,20 @@ struct apfloat_match {
/// Match a ConstantInt or splatted ConstantVector, binding the
/// specified pointer to the contained APInt.
-inline apint_match m_APInt(const APInt *&Res) { return Res; }
+inline apint_match m_APInt(const APInt *&Res) {
+ // Forbid undefs by default to maintain previous behavior.
+ return apint_match(Res, /* AllowUndef */ false);
+}
+
+/// Match APInt while allowing undefs in splat vector constants.
+inline apint_match m_APIntAllowUndef(const APInt *&Res) {
+ return apint_match(Res, /* AllowUndef */ true);
+}
+
+/// Match APInt while forbidding undefs in splat vector constants.
+inline apint_match m_APIntForbidUndef(const APInt *&Res) {
+ return apint_match(Res, /* AllowUndef */ false);
+}
/// Match a ConstantFP or splatted ConstantVector, binding the
/// specified pointer to the contained APFloat.
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index bf32996d96e2..40a246b9d7a7 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -925,8 +925,31 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
if (CI->hasOneUse()) {
Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
- if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
- (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
+
+ // FIXME: This is a hack to avoid infinite looping with min/max patterns.
+ // We have to ensure that vector constants that only differ with
+ // undef elements are treated as equivalent.
+ auto areLooselyEqual = [](Value *A, Value *B) {
+ if (A == B)
+ return true;
+
+ // Test for vector constants.
+ Constant *ConstA, *ConstB;
+ if (!match(A, m_Constant(ConstA)) || !match(B, m_Constant(ConstB)))
+ return false;
+
+ // TODO: Deal with FP constants?
+ if (!A->getType()->isIntOrIntVectorTy() || A->getType() != B->getType())
+ return false;
+
+ // Compare for equality including undefs as equal.
+ auto *Cmp = ConstantExpr::getCompare(ICmpInst::ICMP_EQ, ConstA, ConstB);
+ const APInt *C;
+ return match(Cmp, m_APIntAllowUndef(C)) && C->isOneValue();
+ };
+
+ if ((areLooselyEqual(TV, Op0) && areLooselyEqual(FV, Op1)) ||
+ (areLooselyEqual(FV, Op0) && areLooselyEqual(TV, Op1)))
return nullptr;
}
}
--
2.17.1

View File

@ -1,43 +0,0 @@
From 8f83e2b7618da7a98a30839a8f41a6dd82dec468 Mon Sep 17 00:00:00 2001
From: Naveen Saini <naveen.kumar.saini@intel.com>
Date: Fri, 27 Aug 2021 12:00:23 +0800
Subject: [PATCH 1/2] This patch is required to fix stability problem #1793
It's backport of the following llvm 11.0 commit: 120c5f1057dc50229f73bc75bbabf4df6ee50fef
Upstream-Status: Backport
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2476fd26f250..2743acc89bca 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10702,8 +10702,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // zext_vector_inreg(undef) = 0 because the top bits will be zero.
if (N0.isUndef())
- return DAG.getUNDEF(VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
return Res;
@@ -10718,8 +10719,9 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
if (N0.isUndef())
- return DAG.getUNDEF(VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
return Res;
--
2.17.1

View File

@ -1,34 +0,0 @@
From 62b05a69b4a185cd0b7535f19742686e19fcaf22 Mon Sep 17 00:00:00 2001
From: Naveen Saini <naveen.kumar.saini@intel.com>
Date: Fri, 27 Aug 2021 12:02:37 +0800
Subject: [PATCH 2/2] Fix for #1844, affects avx512skx-i8x64 and
avx512skx-i16x32.
It's a port of 11.0 commit edcfb47ff6d5562e22207f364c65f84302aa346b
https://reviews.llvm.org/D76312
Upstream-Status: Backport
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2743acc89bca..439a8367dabe 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10841,7 +10841,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// Attempt to pre-truncate BUILD_VECTOR sources.
if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
- TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
+ TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
+ // Avoid creating illegal types if running after type legalizer.
+ (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
SDLoc DL(N);
EVT SVT = VT.getScalarType();
SmallVector<SDValue, 8> TruncOps;
--
2.17.1

View File

@ -1,40 +0,0 @@
From cc4301f82ca1bde1d438c3708de285b0ab8c72d3 Mon Sep 17 00:00:00 2001
From: Naveen Saini <naveen.kumar.saini@intel.com>
Date: Fri, 27 Aug 2021 12:07:25 +0800
Subject: [PATCH 1/2] [X86] createVariablePermute - handle case where recursive
createVariablePermute call fails
Account for the case where a recursive createVariablePermute call with a wider vector type fails.
Original test case from @craig.topper (Craig Topper)
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/6bdd63dc28208a597542b0c6bc41093f32417804]
Signed-off-by: Simon Pilgrim <llvm-dev@redking.me.uk>
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c8720d9ae3a6..63eb050e9b3a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -9571,9 +9571,11 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
IndicesVT = EVT(VT).changeVectorElementTypeToInteger();
IndicesVec = widenSubVector(IndicesVT.getSimpleVT(), IndicesVec, false,
Subtarget, DAG, SDLoc(IndicesVec));
- return extractSubVector(
- createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget), 0,
- DAG, DL, SizeInBits);
+ SDValue NewSrcVec =
+ createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget);
+ if (NewSrcVec)
+ return extractSubVector(NewSrcVec, 0, DAG, DL, SizeInBits);
+ return SDValue();
} else if (SrcVec.getValueSizeInBits() < SizeInBits) {
// Widen smaller SrcVec to match VT.
SrcVec = widenSubVector(VT, SrcVec, false, Subtarget, DAG, SDLoc(SrcVec));
--
2.17.1

View File

@ -1,61 +0,0 @@
From 9cdff0785d5cf9effc8e922d3330311c4d3dda78 Mon Sep 17 00:00:00 2001
From: Naveen Saini <naveen.kumar.saini@intel.com>
Date: Fri, 27 Aug 2021 12:09:42 +0800
Subject: [PATCH 2/2] This patch is needed for avx512skx-i8x64 and
avx512skx-i16x32 targets.
This is combination of two commits:
- 0cd6712a7af0fa2702b5d4cc733500eb5e62e7d0 - stability fix.
- d8ad7cc0885f32104a7cd83c77191aec15fd684f - performance follow up.
Upstream-Status: Backport
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 23 +++++++++++++++++--
1 file changed, 21 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 439a8367dabe..b1639c7f275d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18471,6 +18471,26 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
// Allow targets to opt-out.
EVT VT = Extract->getValueType(0);
+
+ // We can only create byte sized loads.
+ if (!VT.isByteSized())
+ return SDValue();
+
+ unsigned Index = ExtIdx->getZExtValue();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // If the index is a multiple of the extract element count, we can offset the
+ // address by the store size multiplied by the subvector index. Otherwise if
+ // the scalar type is byte sized, we can just use the index multiplied by
+ // the element size in bytes as the offset.
+ unsigned Offset;
+ if (Index % NumElts == 0)
+ Offset = (Index / NumElts) * VT.getStoreSize();
+ else if (VT.getScalarType().isByteSized())
+ Offset = Index * VT.getScalarType().getStoreSize();
+ else
+ return SDValue();
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
return SDValue();
@@ -18478,8 +18498,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
// The narrow load will be offset from the base address of the old load if
// we are extracting from something besides index 0 (little-endian).
SDLoc DL(Extract);
- SDValue BaseAddr = Ld->getOperand(1);
- unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
+ SDValue BaseAddr = Ld->getBasePtr();
// TODO: Use "BaseIndexOffset" to make this more effective.
SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
--
2.17.1

View File

@ -1,97 +0,0 @@
From c2ebd328979c081dd2c9fd0e359ed99473731d0e Mon Sep 17 00:00:00 2001
From: Naveen Saini <naveen.kumar.saini@intel.com>
Date: Fri, 27 Aug 2021 12:13:00 +0800
Subject: [PATCH 1/2] [X86] When storing v1i1/v2i1/v4i1 to memory, make sure we
store zeros in the rest of the byte
We can't store garbage in the unused bits. It possible that something like zextload from i1/i2/i4 is created to read the memory. Those zextloads would be legalized assuming the extra bits are 0.
I'm not sure that the code in lowerStore is executed for the v1i1/v2i1/v4i1 case. It looks like the DAG combine in combineStore may have converted them to v8i1 first. And I think we're missing some cases to avoid going to the stack in the first place. But I don't have time to investigate those things at the moment so I wanted to focus on the correctness issue.
Should fix PR48147.
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D9129
Upstream-Status: Backport
Signed-off-by:Craig Topper <craig.topper@sifive.com>
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 20 ++++++++++++++------
llvm/lib/Target/X86/X86InstrAVX512.td | 2 --
2 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 63eb050e9b3a..96b5e2cfbd82 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -22688,17 +22688,22 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
// Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 stores.
if (StoredVal.getValueType().isVector() &&
StoredVal.getValueType().getVectorElementType() == MVT::i1) {
- assert(StoredVal.getValueType().getVectorNumElements() <= 8 &&
- "Unexpected VT");
+ unsigned NumElts = StoredVal.getValueType().getVectorNumElements();
+ assert(NumElts <= 8 && "Unexpected VT");
assert(!St->isTruncatingStore() && "Expected non-truncating store");
assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
"Expected AVX512F without AVX512DQI");
+ // We must pad with zeros to ensure we store zeroes to any unused bits.
StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
DAG.getUNDEF(MVT::v16i1), StoredVal,
DAG.getIntPtrConstant(0, dl));
StoredVal = DAG.getBitcast(MVT::i16, StoredVal);
StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal);
+ // Make sure we store zeros in the extra bits.
+ if (NumElts < 8)
+ StoredVal = DAG.getZeroExtendInReg(StoredVal, dl,
+ MVT::getIntegerVT(NumElts));
return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
St->getPointerInfo(), St->getAlignment(),
@@ -41585,8 +41590,10 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements());
StoredVal = DAG.getBitcast(NewVT, StoredVal);
-
- return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
+ SDValue Val = StoredVal.getOperand(0);
+ // We must store zeros to the unused bits.
+ Val = DAG.getZeroExtendInReg(Val, dl, MVT::i1);
+ return DAG.getStore(St->getChain(), dl, Val, St->getBasePtr(),
St->getPointerInfo(), St->getAlignment(),
St->getMemOperand()->getFlags());
}
@@ -41602,10 +41609,11 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
}
// Widen v2i1/v4i1 stores to v8i1.
- if ((VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
+ if ((VT == MVT::v1i1 || VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
Subtarget.hasAVX512()) {
unsigned NumConcats = 8 / VT.getVectorNumElements();
- SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(VT));
+ // We must store zeros to the unused bits.
+ SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, VT));
Ops[0] = StoredVal;
StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops);
return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 32f012033fb0..d3b92183f87b 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -2888,8 +2888,6 @@ def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
// Load/store kreg
let Predicates = [HasDQI] in {
- def : Pat<(store VK1:$src, addr:$dst),
- (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
def : Pat<(v1i1 (load addr:$src)),
(COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
--
2.17.1

View File

@ -1,173 +0,0 @@
From c1565af764adceca118daad0f592e5f14c2bdd4a Mon Sep 17 00:00:00 2001
From: Naveen Saini <naveen.kumar.saini@intel.com>
Date: Fri, 27 Aug 2021 12:15:09 +0800
Subject: [PATCH 2/2] [X86] Convert vXi1 vectors to xmm/ymm/zmm types via
getRegisterTypeForCallingConv rather than using CCPromoteToType in the td
file
Previously we tried to promote these to xmm/ymm/zmm by promoting
in the X86CallingConv.td file. But this breaks when we run out
of xmm/ymm/zmm registers and need to fall back to memory. We end
up trying to create a non-sensical scalar to vector. This lead
to an assertion. The new tests in avx512-calling-conv.ll all
trigger this assertion.
Since we really want to treat these types like we do on avx2,
it seems better to promote them before the calling convention
code gets involved. Except when the calling convention is one
that passes the vXi1 type in a k register.
The changes in avx512-regcall-Mask.ll are because we indicated
that xmm/ymm/zmm types should be passed indirectly for the
Win64 ABI before we go to the common lines that promoted the
vXi1 types. This caused the promoted types to be picked up by
the default calling convention code. Now we promote them earlier
so they get passed indirectly as though they were xmm/ymm/zmm.
Differential Revision: https://reviews.llvm.org/D75154
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/eadea7868f5b7542ee6bdcd9a975697a0c919ffc]
Signed-off-by:Craig Topper <craig.topper@intel.com>
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 90 +++++++++++++++++--------
1 file changed, 61 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 96b5e2cfbd82..d5de94aeb8a2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2085,51 +2085,83 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const {
return TargetLoweringBase::getPreferredVectorAction(VT);
}
+static std::pair<MVT, unsigned>
+handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
+ const X86Subtarget &Subtarget) {
+ // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
+ // convention is one that uses k registers.
+ if (NumElts == 2)
+ return {MVT::v2i64, 1};
+ if (NumElts == 4)
+ return {MVT::v4i32, 1};
+ if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
+ CC != CallingConv::Intel_OCL_BI)
+ return {MVT::v8i16, 1};
+ if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
+ CC != CallingConv::Intel_OCL_BI)
+ return {MVT::v16i8, 1};
+ // v32i1 passes in ymm unless we have BWI and the calling convention is
+ // regcall.
+ if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
+ return {MVT::v32i8, 1};
+ // Split v64i1 vectors if we don't have v64i8 available.
+ if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
+ if (Subtarget.useAVX512Regs())
+ return {MVT::v64i8, 1};
+ return {MVT::v32i8, 2};
+ }
+
+ // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
+ if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
+ NumElts > 64)
+ return {MVT::i8, NumElts};
+
+ return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
+}
+
MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
- // v32i1 vectors should be promoted to v32i8 to match avx2.
- if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
- return MVT::v32i8;
- // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
- Subtarget.hasAVX512() &&
- (!isPowerOf2_32(VT.getVectorNumElements()) ||
- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
- return MVT::i8;
- // Split v64i1 vectors if we don't have v64i8 available.
- if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
- CC != CallingConv::X86_RegCall)
- return MVT::v32i1;
+ Subtarget.hasAVX512()) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ MVT RegisterVT;
+ unsigned NumRegisters;
+ std::tie(RegisterVT, NumRegisters) =
+ handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
+ if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
+ return RegisterVT;
+ }
+
// FIXME: Should we just make these types legal and custom split operations?
if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
return MVT::v16i32;
+
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
}
unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
- // v32i1 vectors should be promoted to v32i8 to match avx2.
- if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
- return 1;
- // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
- Subtarget.hasAVX512() &&
- (!isPowerOf2_32(VT.getVectorNumElements()) ||
- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
- return VT.getVectorNumElements();
- // Split v64i1 vectors if we don't have v64i8 available.
- if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
- CC != CallingConv::X86_RegCall)
- return 2;
+ Subtarget.hasAVX512()) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ MVT RegisterVT;
+ unsigned NumRegisters;
+ std::tie(RegisterVT, NumRegisters) =
+ handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
+ if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
+ return NumRegisters;
+ }
+
// FIXME: Should we just make these types legal and custom split operations?
if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
return 1;
+
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
}
@@ -2140,8 +2172,8 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
Subtarget.hasAVX512() &&
(!isPowerOf2_32(VT.getVectorNumElements()) ||
- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) {
+ (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
+ VT.getVectorNumElements() > 64)) {
RegisterVT = MVT::i8;
IntermediateVT = MVT::i1;
NumIntermediates = VT.getVectorNumElements();
@@ -2151,7 +2183,7 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
// Split v64i1 vectors if we don't have v64i8 available.
if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
CC != CallingConv::X86_RegCall) {
- RegisterVT = MVT::v32i1;
+ RegisterVT = MVT::v32i8;
IntermediateVT = MVT::v32i1;
NumIntermediates = 2;
return 2;
--
2.17.1

View File

@ -1,550 +0,0 @@
From 447cb2e1b2f0d8bdcfd8a0b39f47d28de50b5d82 Mon Sep 17 00:00:00 2001
From: Djordje Todorovic <djordje.todorovic@syrmia.com>
Date: Mon, 9 Mar 2020 11:02:35 +0100
Subject: [PATCH] Enable the call site info only for -g + optimizations
Emit call site info only in the case of '-g' + 'O>0' level.
Differential Revision: https://reviews.llvm.org/D75175
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/c15c68abdc6f1afece637bdedba808676191a8e6]
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
---
clang/include/clang/Basic/CodeGenOptions.def | 2 ++
clang/lib/CodeGen/BackendUtil.cpp | 1 +
clang/lib/Frontend/CompilerInvocation.cpp | 4 +++-
llvm/include/llvm/CodeGen/CommandFlags.inc | 7 +++++++
llvm/include/llvm/Target/TargetOptions.h | 7 ++++++-
llvm/lib/CodeGen/MIRParser/MIRParser.cpp | 4 ++--
llvm/lib/CodeGen/MachineFunction.cpp | 2 +-
llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 2 +-
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 2 +-
llvm/lib/Target/ARM/ARMISelLowering.cpp | 2 +-
llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +-
llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir | 2 +-
llvm/test/CodeGen/X86/call-site-info-output.ll | 4 ++--
llvm/test/DebugInfo/AArch64/call-site-info-output.ll | 2 +-
llvm/test/DebugInfo/ARM/call-site-info-output.ll | 2 +-
.../MIR/AArch64/dbgcall-site-interpret-movzxi.mir | 2 +-
.../DebugInfo/MIR/AArch64/dbgcall-site-interpretation.mir | 2 +-
llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-orr-moves.mir | 2 +-
.../test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir | 2 +-
.../test/DebugInfo/MIR/ARM/dbgcall-site-interpretation.mir | 2 +-
.../DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir | 2 +-
llvm/test/DebugInfo/MIR/ARM/if-coverter-call-site-info.mir | 2 +-
.../MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir | 2 +-
.../MIR/Hexagon/live-debug-values-bundled-entry-values.mir | 2 +-
llvm/test/DebugInfo/MIR/SystemZ/call-site-lzer.mir | 2 +-
llvm/test/DebugInfo/MIR/X86/DW_OP_entry_value.mir | 2 +-
llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir | 2 +-
.../test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir | 2 +-
.../test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir | 2 +-
.../DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir | 2 +-
llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir | 2 +-
.../DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir | 2 +-
llvm/test/DebugInfo/MIR/X86/dbginfo-entryvals.mir | 2 +-
llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir | 4 ++--
.../DebugInfo/MIR/X86/entry-value-of-modified-param.mir | 2 +-
llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir | 2 +-
.../DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir | 2 +-
.../test/DebugInfo/MIR/X86/unreachable-block-call-site.mir | 2 +-
llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll | 2 +-
llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll | 2 +-
.../tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll | 2 +-
41 files changed, 58 insertions(+), 41 deletions(-)
diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index 1ecae98b13b1..6a6a9465273f 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -64,6 +64,8 @@ CODEGENOPT(DebugPassManager, 1, 0) ///< Prints debug information for the new
///< pass manager.
CODEGENOPT(DisableRedZone , 1, 0) ///< Set when -mno-red-zone is enabled.
CODEGENOPT(EnableDebugEntryValues, 1, 0) ///< Emit call site parameter dbg info
+CODEGENOPT(EmitCallSiteInfo, 1, 0) ///< Emit call site info only in the case of
+ ///< '-g' + 'O>0' level.
CODEGENOPT(IndirectTlsSegRefs, 1, 0) ///< Set when -mno-tls-direct-seg-refs
///< is specified.
CODEGENOPT(DisableTailCalls , 1, 0) ///< Do not emit tail calls.
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index db8fd4166d7a..db09f9b641fe 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -482,6 +482,7 @@ static void initTargetOptions(llvm::TargetOptions &Options,
Options.EmitAddrsig = CodeGenOpts.Addrsig;
Options.EnableDebugEntryValues = CodeGenOpts.EnableDebugEntryValues;
Options.ForceDwarfFrameSection = CodeGenOpts.ForceDwarfFrameSection;
+ Options.EmitCallSiteInfo = CodeGenOpts.EmitCallSiteInfo;
Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile;
Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll;
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 18fa06bf3c6d..2e73dcbdebe4 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -789,8 +789,10 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
llvm::Triple T(TargetOpts.Triple);
if (Opts.OptimizationLevel > 0 && Opts.hasReducedDebugInfo() &&
- llvm::is_contained(DebugEntryValueArchs, T.getArch()))
+ llvm::is_contained(DebugEntryValueArchs, T.getArch())) {
Opts.EnableDebugEntryValues = Args.hasArg(OPT_femit_debug_entry_values);
+ Opts.EmitCallSiteInfo = true;
+ }
Opts.DisableO0ImplyOptNone = Args.hasArg(OPT_disable_O0_optnone);
Opts.DisableRedZone = Args.hasArg(OPT_disable_red_zone);
diff --git a/llvm/include/llvm/CodeGen/CommandFlags.inc b/llvm/include/llvm/CodeGen/CommandFlags.inc
index 6475a5b19edb..36073fe9cc98 100644
--- a/llvm/include/llvm/CodeGen/CommandFlags.inc
+++ b/llvm/include/llvm/CodeGen/CommandFlags.inc
@@ -286,6 +286,12 @@ static cl::opt<bool>
EnableAddrsig("addrsig", cl::desc("Emit an address-significance table"),
cl::init(false));
+static cl::opt<bool> EmitCallSiteInfo(
+ "emit-call-site-info",
+ cl::desc(
+ "Emit call site debug information, if debug information is enabled."),
+ cl::init(false));
+
static cl::opt<bool>
EnableDebugEntryValues("debug-entry-values",
cl::desc("Emit debug info about parameter's entry values"),
@@ -349,6 +355,7 @@ static TargetOptions InitTargetOptionsFromCodeGenFlags() {
Options.ExceptionModel = ExceptionModel;
Options.EmitStackSizeSection = EnableStackSizeSection;
Options.EmitAddrsig = EnableAddrsig;
+ Options.EmitCallSiteInfo = EmitCallSiteInfo;
Options.EnableDebugEntryValues = EnableDebugEntryValues;
Options.ForceDwarfFrameSection = ForceDwarfFrameSection;
diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h
index d27c7b0178f0..9378e290bed1 100644
--- a/llvm/include/llvm/Target/TargetOptions.h
+++ b/llvm/include/llvm/Target/TargetOptions.h
@@ -134,7 +134,8 @@ namespace llvm {
EmulatedTLS(false), ExplicitEmulatedTLS(false), EnableIPRA(false),
EmitStackSizeSection(false), EnableMachineOutliner(false),
SupportsDefaultOutlining(false), EmitAddrsig(false),
- EnableDebugEntryValues(false), ForceDwarfFrameSection(false) {}
+ EmitCallSiteInfo(false), EnableDebugEntryValues(false),
+ ForceDwarfFrameSection(false) {}
/// PrintMachineCode - This flag is enabled when the -print-machineinstrs
/// option is specified on the command line, and should enable debugging
@@ -281,6 +282,10 @@ namespace llvm {
/// to selectively generate basic block sections.
std::shared_ptr<MemoryBuffer> BBSectionsFuncListBuf;
+ /// The flag enables call site info production. It is used only for debug
+ /// info, and it is restricted only to optimized code. This can be used for
+ /// something else, so that should be controlled in the frontend.
+ unsigned EmitCallSiteInfo : 1;
/// Emit debug info about parameter's entry values.
unsigned EnableDebugEntryValues : 1;
diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index 10157c746b46..f955bdc6186a 100644
--- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -381,11 +381,11 @@ bool MIRParserImpl::initializeCallSiteInfo(
CSInfo.emplace_back(Reg, ArgRegPair.ArgNo);
}
- if (TM.Options.EnableDebugEntryValues)
+ if (TM.Options.EmitCallSiteInfo)
MF.addCallArgsForwardingRegs(&*CallI, std::move(CSInfo));
}
- if (YamlMF.CallSitesInfo.size() && !TM.Options.EnableDebugEntryValues)
+ if (YamlMF.CallSitesInfo.size() && !TM.Options.EmitCallSiteInfo)
return error(Twine("Call site info provided but not used"));
return false;
}
diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index 4612690644fe..c3795b7ed314 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -855,7 +855,7 @@ MachineFunction::CallSiteInfoMap::iterator
MachineFunction::getCallSiteInfo(const MachineInstr *MI) {
assert(MI->isCall() && "Call site info refers only to call instructions!");
- if (!Target.Options.EnableDebugEntryValues)
+ if (!Target.Options.EmitCallSiteInfo)
return CallSitesInfo.end();
return CallSitesInfo.find(MI);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 0e4d783e3505..52099f24aca5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -863,7 +863,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
MI = &*std::next(Before);
}
- if (MI->isCall() && DAG->getTarget().Options.EnableDebugEntryValues)
+ if (MI->isCall() && DAG->getTarget().Options.EmitCallSiteInfo)
MF.addCallArgsForwardingRegs(MI, DAG->getSDCallSiteInfo(Node));
return MI;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 23f05eaad944..63ff3031a5e8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4132,7 +4132,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
RegsToPass.emplace_back(VA.getLocReg(), Arg);
RegsUsed.insert(VA.getLocReg());
const TargetOptions &Options = DAG.getTarget().Options;
- if (Options.EnableDebugEntryValues)
+ if (Options.EmitCallSiteInfo)
CSInfo.emplace_back(VA.getLocReg(), i);
}
} else {
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 9f504b1eaa42..5589ba34a2ac 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -2222,7 +2222,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
isThisReturn = true;
}
const TargetOptions &Options = DAG.getTarget().Options;
- if (Options.EnableDebugEntryValues)
+ if (Options.EmitCallSiteInfo)
CSInfo.emplace_back(VA.getLocReg(), i);
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else if (isByVal) {
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d5de94aeb8a2..4808bdf6ddc2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4030,7 +4030,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
} else if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
const TargetOptions &Options = DAG.getTarget().Options;
- if (Options.EnableDebugEntryValues)
+ if (Options.EmitCallSiteInfo)
CSInfo.emplace_back(VA.getLocReg(), I);
if (isVarArg && IsWin64) {
// Win64 ABI requires argument XMM reg to be copied to the corresponding
diff --git a/llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir b/llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir
index 5ffa0293a2e1..fec542223fc9 100644
--- a/llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir
+++ b/llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir
@@ -1,4 +1,4 @@
-# RUN: llc -debug-entry-values -run-pass=none -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=none -verify-machineinstrs -o - %s | FileCheck %s
# Verify that it is possible to read and write MIR where a callSites entry
# points to a call residing in a bundle. The offset should point to the call
diff --git a/llvm/test/CodeGen/X86/call-site-info-output.ll b/llvm/test/CodeGen/X86/call-site-info-output.ll
index 4b1e236aadfe..a0438f0c2b98 100644
--- a/llvm/test/CodeGen/X86/call-site-info-output.ll
+++ b/llvm/test/CodeGen/X86/call-site-info-output.ll
@@ -1,6 +1,6 @@
; Test call site info MIR printer and parser.Parser assertions and machine
; verifier will check the rest;
-; RUN: llc -debug-entry-values %s -stop-before=finalize-isel -o %t.mir
+; RUN: llc -emit-call-site-info -debug-entry-values %s -stop-before=finalize-isel -o %t.mir
; RUN: cat %t.mir | FileCheck %s
; CHECK: name: fn2
; CHECK: callSites:
@@ -10,7 +10,7 @@
; CHECK-NEXT: arg: 0, reg: '$edi'
; CHECK-NEXT: arg: 1, reg: '$esi'
; CHECK-NEXT: arg: 2, reg: '$edx'
-; RUN: llc -debug-entry-values %t.mir -run-pass=finalize-isel -o -| FileCheck %s --check-prefix=PARSER
+; RUN: llc -emit-call-site-info -debug-entry-values %t.mir -run-pass=finalize-isel -o -| FileCheck %s --check-prefix=PARSER
; Verify that we are able to parse output mir and that we are getting the same result.
; PARSER: name: fn2
; PARSER: callSites:
diff --git a/llvm/test/DebugInfo/AArch64/call-site-info-output.ll b/llvm/test/DebugInfo/AArch64/call-site-info-output.ll
index d52d6962f3c4..17d9f7f18762 100644
--- a/llvm/test/DebugInfo/AArch64/call-site-info-output.ll
+++ b/llvm/test/DebugInfo/AArch64/call-site-info-output.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple aarch64-linux-gnu -debug-entry-values %s -o - -stop-before=finalize-isel | FileCheck %s
+; RUN: llc -emit-call-site-info -mtriple aarch64-linux-gnu -debug-entry-values %s -o - -stop-before=finalize-isel | FileCheck %s
; Verify that Selection DAG knows how to recognize simple function parameter forwarding registers.
; Produced from:
; extern int fn1(int,int,int);
diff --git a/llvm/test/DebugInfo/ARM/call-site-info-output.ll b/llvm/test/DebugInfo/ARM/call-site-info-output.ll
index 9255a7d57dde..ed726dfe753f 100644
--- a/llvm/test/DebugInfo/ARM/call-site-info-output.ll
+++ b/llvm/test/DebugInfo/ARM/call-site-info-output.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple arm-linux-gnu -debug-entry-values %s -o - -stop-before=finalize-isel | FileCheck %s
+; RUN: llc -emit-call-site-info -mtriple arm-linux-gnu -debug-entry-values %s -o - -stop-before=finalize-isel | FileCheck %s
; Verify that Selection DAG knows how to recognize simple function parameter forwarding registers.
; Produced from:
; extern int fn1(int,int,int);
diff --git a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpret-movzxi.mir b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpret-movzxi.mir
index dc7561ca6400..057779a90721 100644
--- a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpret-movzxi.mir
+++ b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpret-movzxi.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple aarch64-linux-gnu -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
+# RUN: llc -emit-call-site-info -mtriple aarch64-linux-gnu -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
#
# Based on the following C reproducer:
#
diff --git a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpretation.mir b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpretation.mir
index 0371ccef603e..d925bc395878 100644
--- a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpretation.mir
+++ b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpretation.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple aarch64-linux-gnu -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
+# RUN: llc -emit-call-site-info -mtriple aarch64-linux-gnu -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
# Following code is used for producing this test case. Note that
# some of argument loading instruction are modified in order to
# cover certain cases.
diff --git a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-orr-moves.mir b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-orr-moves.mir
index 916a14022ba5..4a87dad3b9b5 100644
--- a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-orr-moves.mir
+++ b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-orr-moves.mir
@@ -1,4 +1,4 @@
-# RUN: llc -debug-entry-values -start-after=livedebugvalues -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
+# RUN: llc -emit-call-site-info -debug-entry-values -start-after=livedebugvalues -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
# Based on the following C reproducer:
#
diff --git a/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir b/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir
index fbf9b3454689..d85f2d25391d 100644
--- a/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir
+++ b/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir
@@ -1,4 +1,4 @@
-# RUN: llc -start-after=livedebugvalues -filetype=obj -o - %s \
+# RUN: llc -emit-call-site-info -start-after=livedebugvalues -filetype=obj -o - %s \
# RUN: | llvm-dwarfdump -v - | FileCheck %s
# This tests for a crash in DwarfDebug's singular DBG_VALUE range promotion when
diff --git a/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-interpretation.mir b/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-interpretation.mir
index ce8dc97f0e72..0ae4e6ec485c 100644
--- a/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-interpretation.mir
+++ b/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-interpretation.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=arm-linux-gnueabi -debug-entry-values -filetype=obj -start-after=machineverifier %s -o -| llvm-dwarfdump -| FileCheck %s
+# RUN: llc -emit-call-site-info -mtriple=arm-linux-gnueabi -debug-entry-values -filetype=obj -start-after=machineverifier %s -o -| llvm-dwarfdump -| FileCheck %s
# Following code is used for producing this test case. Note that
# some of argument loading instruction are modified in order to
# cover certain cases.
diff --git a/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir b/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir
index 9001c8ba8eea..5b84d9e9627f 100644
--- a/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir
+++ b/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir
@@ -1,4 +1,4 @@
-# RUN: llc -debug-entry-values -run-pass=livedebugvalues -o - %s | FileCheck %s
+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -o - %s | FileCheck %s
# Based on the following C reproducer:
#
diff --git a/llvm/test/DebugInfo/MIR/ARM/if-coverter-call-site-info.mir b/llvm/test/DebugInfo/MIR/ARM/if-coverter-call-site-info.mir
index aa7b54c1e5bb..11e9c4c90836 100644
--- a/llvm/test/DebugInfo/MIR/ARM/if-coverter-call-site-info.mir
+++ b/llvm/test/DebugInfo/MIR/ARM/if-coverter-call-site-info.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=arm-linux-gnu -debug-entry-values -run-pass if-converter %s -o -| FileCheck %s
+# RUN: llc -emit-call-site-info -mtriple=arm-linux-gnu -debug-entry-values -run-pass if-converter %s -o -| FileCheck %s
# Vefify that the call site info will be updated after the optimization.
# This test case would previously trigger an assertion when
diff --git a/llvm/test/DebugInfo/MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir b/llvm/test/DebugInfo/MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir
index 8ae628af2c09..3ae23d4189bf 100644
--- a/llvm/test/DebugInfo/MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir
+++ b/llvm/test/DebugInfo/MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple hexagon -debug-entry-values -start-after=machineverifier -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s
+# RUN: llc -mtriple hexagon -emit-call-site-info -debug-entry-values -start-after=machineverifier -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s
# Based on the following C reproducer:
#
diff --git a/llvm/test/DebugInfo/MIR/Hexagon/live-debug-values-bundled-entry-values.mir b/llvm/test/DebugInfo/MIR/Hexagon/live-debug-values-bundled-entry-values.mir
index ff0a539dd15d..8bb0b3202acd 100644
--- a/llvm/test/DebugInfo/MIR/Hexagon/live-debug-values-bundled-entry-values.mir
+++ b/llvm/test/DebugInfo/MIR/Hexagon/live-debug-values-bundled-entry-values.mir
@@ -1,4 +1,4 @@
-# RUN: llc -debug-entry-values -run-pass=livedebugvalues -o - %s | FileCheck %s
+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -o - %s | FileCheck %s
# Verify that the entry values for the input parameters are inserted after the
# bundles which contains the registers' clobbering instructions (the calls to
diff --git a/llvm/test/DebugInfo/MIR/SystemZ/call-site-lzer.mir b/llvm/test/DebugInfo/MIR/SystemZ/call-site-lzer.mir
index 8a4e8b5632c2..3cf41467f7f9 100644
--- a/llvm/test/DebugInfo/MIR/SystemZ/call-site-lzer.mir
+++ b/llvm/test/DebugInfo/MIR/SystemZ/call-site-lzer.mir
@@ -1,4 +1,4 @@
-# RUN: llc -debug-entry-values -start-after=livedebugvalues -o - %s | FileCheck %s
+# RUN: llc -emit-call-site-info -debug-entry-values -start-after=livedebugvalues -o - %s | FileCheck %s
# This test would previously trigger an assertion when trying to describe the
# call site value for callee()'s float parameter.
diff --git a/llvm/test/DebugInfo/MIR/X86/DW_OP_entry_value.mir b/llvm/test/DebugInfo/MIR/X86/DW_OP_entry_value.mir
index e6fe5d2de878..4e5a07321d42 100644
--- a/llvm/test/DebugInfo/MIR/X86/DW_OP_entry_value.mir
+++ b/llvm/test/DebugInfo/MIR/X86/DW_OP_entry_value.mir
@@ -1,4 +1,4 @@
-# RUN: llc -debug-entry-values -start-before=livedebugvalues -mtriple=x86_64-apple-darwin -o %t %s -filetype=obj
+# RUN: llc -emit-call-site-info -debug-entry-values -start-before=livedebugvalues -mtriple=x86_64-apple-darwin -o %t %s -filetype=obj
# RUN: llvm-dwarfdump %t | FileCheck %s
#
# int global;
diff --git a/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir b/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir
index c32a1155d038..edeef2c7aed4 100644
--- a/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir
+++ b/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir
@@ -1,6 +1,6 @@
# Check that llvm can describe a call site parameter which resides in a spill slot.
#
-# RUN: llc -debug-entry-values -start-after=machineverifier -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s
+# RUN: llc -emit-call-site-info -debug-entry-values -start-after=machineverifier -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s
#
# Command:
# $ ~/src/builds/llvm-project-master-RA/bin/clang -g -Xclang -femit-debug-entry-values -O2 -c -o spill.o spill.cc -mllvm -stop-before=machineverifier -o spill.mir
diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir
index a2d51a203512..01a2b887a60b 100644
--- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir
+++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir
@@ -1,4 +1,4 @@
-# RUN: llc -debug-entry-values -start-after=livedebugvalues -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
+# RUN: llc -emit-call-site-info -debug-entry-values -start-after=livedebugvalues -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
# Based on the following reproducer:
#
diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir
index f9e9459f1abd..104bc0146798 100644
--- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir
+++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir
@@ -1,4 +1,4 @@
-# RUN: llc -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
+# RUN: llc -emit-call-site-info -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
#
# CHECK: DW_TAG_GNU_call_site
# CHECK-NEXT: DW_AT_abstract_origin {{.*}} "foo"
diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir
index 1bb70f6d4530..4d88fa9aab74 100644
--- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir
+++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir
@@ -1,4 +1,4 @@
-# RUN: llc -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
+# RUN: llc -emit-call-site-info -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
# CHECK: DW_TAG_GNU_call_site
# CHECK-NEXT: DW_AT_abstract_origin {{.*}} "foo")
# CHECK-NEXT: DW_AT_low_pc {{.*}}
diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir
index 235787573f51..81af598ba194 100644
--- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir
+++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir
@@ -1,4 +1,4 @@
-# RUN: llc -debug-entry-values -start-before=livedebugvalues -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
+# RUN: llc -emit-call-site-info -debug-entry-values -start-before=livedebugvalues -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
# Based on the following C++ code:
# struct A { A(A &) {} };
diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir
index db0934c595c3..46adedd1be44 100644
--- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir
+++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir
@@ -1,4 +1,4 @@
-# RUN: llc -O1 -debug-entry-values -start-after=livedebugvalues -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s
+# RUN: llc -O1 -emit-call-site-info -debug-entry-values -start-after=livedebugvalues -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s
# Based on the following C reproducer:
#
diff --git a/llvm/test/DebugInfo/MIR/X86/dbginfo-entryvals.mir b/llvm/test/DebugInfo/MIR/X86/dbginfo-entryvals.mir
index 5d203029936e..1d7b64f169d1 100644
--- a/llvm/test/DebugInfo/MIR/X86/dbginfo-entryvals.mir
+++ b/llvm/test/DebugInfo/MIR/X86/dbginfo-entryvals.mir
@@ -1,4 +1,4 @@
-# RUN: llc -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s
+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s
#
#extern void fn2(int);
#
diff --git a/llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir b/llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir
index e79be66cd4e3..c39bc4db50be 100644
--- a/llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir
+++ b/llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir
@@ -2,8 +2,8 @@
# When the debugger tuning is set to gdb, use GNU opcodes.
# For lldb, use the standard DWARF5 opcodes.
-# RUN: llc -debug-entry-values -debugger-tune=gdb -filetype=obj -mtriple=x86_64-unknown-unknown -start-after=machineverifier -o - %s | llvm-dwarfdump - | FileCheck %s -check-prefixes=CHECK-GNU
-# RUN: llc -debug-entry-values -debugger-tune=lldb -filetype=obj -mtriple=x86_64-unknown-unknown -start-after=machineverifier -o - %s | llvm-dwarfdump - | FileCheck %s -check-prefixes=CHECK-DWARF5
+# RUN: llc -emit-call-site-info -debug-entry-values -debugger-tune=gdb -filetype=obj -mtriple=x86_64-unknown-unknown -start-after=machineverifier -o - %s | llvm-dwarfdump - | FileCheck %s -check-prefixes=CHECK-GNU
+# RUN: llc -emit-call-site-info -debug-entry-values -debugger-tune=lldb -filetype=obj -mtriple=x86_64-unknown-unknown -start-after=machineverifier -o - %s | llvm-dwarfdump - | FileCheck %s -check-prefixes=CHECK-DWARF5
#
# extern void foo(int *a, int b, int c, int d, int e, int f);
# extern int getVal();
diff --git a/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir b/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir
index 8d121c3a30b9..c7f15aaaa562 100644
--- a/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir
+++ b/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir
@@ -1,4 +1,4 @@
-# RUN: llc -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s
+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s
#
#extern void fn1 (int, int, int);
#
diff --git a/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir b/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir
index 2396daada876..aa8fdd7afd47 100644
--- a/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir
+++ b/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir
@@ -1,4 +1,4 @@
-# RUN: llc -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s
+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s
#
# The test case was artificially adjusted, in order to make proper diamond basic
# block structure relevant to the debug entry values propagation.
diff --git a/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir b/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir
index 86b1cddaa462..c5af863954bf 100644
--- a/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir
+++ b/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir
@@ -1,4 +1,4 @@
-# RUN: llc -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s
+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s
#
#extern void fn1 (int, int, int);
#__attribute__((noinline))
diff --git a/llvm/test/DebugInfo/MIR/X86/unreachable-block-call-site.mir b/llvm/test/DebugInfo/MIR/X86/unreachable-block-call-site.mir
index d282d796f6d7..ea9c12b5a192 100644
--- a/llvm/test/DebugInfo/MIR/X86/unreachable-block-call-site.mir
+++ b/llvm/test/DebugInfo/MIR/X86/unreachable-block-call-site.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=x86_64-pc-linux -debug-entry-values -run-pass=unreachable-mbb-elimination -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64-pc-linux -emit-call-site-info -debug-entry-values -run-pass=unreachable-mbb-elimination -o - %s | FileCheck %s
# Verify that the call site information for the call residing in the eliminated
# block is removed. This test case would previously trigger an assertion when
diff --git a/llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll b/llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll
index b698f1cdbfe8..b8cd9574cc63 100644
--- a/llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll
+++ b/llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O1 -debug-entry-values -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
+; RUN: llc -O1 -emit-call-site-info -debug-entry-values -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
; Verify that the 64-bit call site immediates are not truncated.
;
diff --git a/llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll b/llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll
index 9fe67f82a2b4..5d37774f55d6 100644
--- a/llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll
+++ b/llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -debug-entry-values -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
+; RUN: llc -O3 -emit-call-site-info -debug-entry-values -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll b/llvm/test/tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll
index c304e9d768a5..d126757398ff 100644
--- a/llvm/test/tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll
+++ b/llvm/test/tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll
@@ -1,4 +1,4 @@
-; RUN: llc -debug-entry-values %s -o - -filetype=obj \
+; RUN: llc -emit-call-site-info -debug-entry-values %s -o - -filetype=obj \
; RUN: | llvm-dwarfdump -statistics - | FileCheck %s
;
; The LLVM IR file was generated on this source code by using
--
2.33.1

View File

@ -1,237 +0,0 @@
From d51fdb9f2986747a56c593fa057d531720b39deb Mon Sep 17 00:00:00 2001
From: Sriraman Tallam <tmsriram@google.com>
Date: Fri, 13 Mar 2020 15:58:57 -0700
Subject: [PATCH] Basic Block Sections Support.
This is the first in a series of patches to enable Basic Block Sections
in LLVM.
We introduce a new compiler option, -fbasicblock-sections=, which places every
basic block in a unique ELF text section in the object file along with a
symbol labeling the basic block. The linker can then order the basic block
sections in any arbitrary sequence which when done correctly can encapsulate
block layout, function layout and function splitting optimizations. However,
there are a couple of challenges to be addressed for this to be feasible:
1) The compiler must not allow any implicit fall-through between any two
adjacent basic blocks as they could be reordered at link time to be
non-adjacent. In other words, the compiler must make a fall-through
between adjacent basic blocks explicit by retaining the direct jump
instruction that jumps to the next basic block. These branches can only
be removed later by the linker after the blocks have been reordered.
2) All inter-basic block branch targets would now need to be resolved by
the linker as they cannot be calculated during compile time. This is
done using static relocations which bloats the size of the object files.
Further, the compiler tries to use short branch instructions on some ISAs
for branch offsets that can be accommodated in one byte. This is not
possible with basic block sections as the offset is not determined at
compile time, and long branch instructions have to be used everywhere.
3) Each additional section bloats object file sizes by tens of bytes. The
number of basic blocks can be potentially very large compared to the
size of functions and can bloat object sizes significantly. Option
fbasicblock-sections= also takes a file path which can be used to
specify a subset of basic blocks that needs unique sections to keep
the bloats small.
4) Debug Info and CFI need special handling and will be presented as
separate patches.
Basic Block Labels
With -fbasicblock-sections=labels, or when a basic block is placed in a
unique section, it is labelled with a symbol. This allows easy mapping of
virtual addresses from PMU profiles back to the corresponding basic blocks.
Since the number of basic blocks is large, the labeling bloats the symbol
table sizes and the string table sizes significantly. While the binary size
does increase, it does not affect performance as the symbol table is not
loaded in memory during run-time. The string table size bloat is kept very
minimal using a unary naming scheme that uses string suffix compression.
The basic blocks for function foo are named "a.BB.foo", "aa.BB.foo", ...
This turns out to be very good for string table sizes and the bloat in the
string table size for a very large binary is ~8 %. The naming also allows
using the --symbol-ordering-file option in LLD to arbitrarily reorder the
sections.
Differential Revision: https://reviews.llvm.org/D68063
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/4dfe92e46542be46d634a7ec24da2f2f889623d0]
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
---
llvm/include/llvm/CodeGen/CommandFlags.inc | 34 ++++++++++++++++++++++
llvm/include/llvm/Target/TargetMachine.h | 14 +++++++++
llvm/include/llvm/Target/TargetOptions.h | 31 ++++++++++++++++++--
3 files changed, 76 insertions(+), 3 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/CommandFlags.inc b/llvm/include/llvm/CodeGen/CommandFlags.inc
index 8739b644873d..6475a5b19edb 100644
--- a/llvm/include/llvm/CodeGen/CommandFlags.inc
+++ b/llvm/include/llvm/CodeGen/CommandFlags.inc
@@ -238,6 +238,12 @@ static cl::opt<bool>
cl::desc("Emit functions into separate sections"),
cl::init(false));
+static cl::opt<std::string>
+ BBSections("basicblock-sections",
+ cl::desc("Emit basic blocks into separate sections"),
+ cl::value_desc("all | <function list (file)> | labels | none"),
+ cl::init("none"));
+
static cl::opt<unsigned> TLSSize("tls-size",
cl::desc("Bit size of immediate TLS offsets"),
cl::init(0));
@@ -251,6 +257,11 @@ static cl::opt<bool>
cl::desc("Give unique names to every section"),
cl::init(true));
+static cl::opt<bool> UniqueBBSectionNames(
+ "unique-bb-section-names",
+ cl::desc("Give unique names to every basic block section"),
+ cl::init(false));
+
static cl::opt<llvm::EABI>
EABIVersion("meabi", cl::desc("Set EABI type (default depends on triple):"),
cl::init(EABI::Default),
@@ -285,6 +296,27 @@ static cl::opt<bool>
cl::desc("Always emit a debug frame section."),
cl::init(false));
+static llvm::BasicBlockSection
+getBBSectionsMode(llvm::TargetOptions &Options) {
+ if (BBSections == "all")
+ return BasicBlockSection::All;
+ else if (BBSections == "labels")
+ return BasicBlockSection::Labels;
+ else if (BBSections == "none")
+ return BasicBlockSection::None;
+ else {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
+ MemoryBuffer::getFile(BBSections);
+ if (!MBOrErr) {
+ errs() << "Error loading basic block sections function list file: "
+ << MBOrErr.getError().message() << "\n";
+ } else {
+ Options.BBSectionsFuncListBuf = std::move(*MBOrErr);
+ }
+ return BasicBlockSection::List;
+ }
+}
+
// Common utility function tightly tied to the options listed here. Initializes
// a TargetOptions object with CodeGen flags and returns it.
static TargetOptions InitTargetOptionsFromCodeGenFlags() {
@@ -308,7 +340,9 @@ static TargetOptions InitTargetOptionsFromCodeGenFlags() {
Options.RelaxELFRelocations = RelaxELFRelocations;
Options.DataSections = DataSections;
Options.FunctionSections = FunctionSections;
+ Options.BBSections = getBBSectionsMode(Options);
Options.UniqueSectionNames = UniqueSectionNames;
+ Options.UniqueBBSectionNames = UniqueBBSectionNames;
Options.TLSSize = TLSSize;
Options.EmulatedTLS = EmulatedTLS;
Options.ExplicitEmulatedTLS = EmulatedTLS.getNumOccurrences() > 0;
diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
index 176ae39b17a7..4a1f3377f31d 100644
--- a/llvm/include/llvm/Target/TargetMachine.h
+++ b/llvm/include/llvm/Target/TargetMachine.h
@@ -242,6 +242,9 @@ public:
bool getUniqueSectionNames() const { return Options.UniqueSectionNames; }
+ /// Return true if unique basic block section names must be generated.
+ bool getUniqueBBSectionNames() const { return Options.UniqueBBSectionNames; }
+
/// Return true if data objects should be emitted into their own section,
/// corresponds to -fdata-sections.
bool getDataSections() const {
@@ -254,6 +257,17 @@ public:
return Options.FunctionSections;
}
+ /// If basic blocks should be emitted into their own section,
+ /// corresponding to -fbasicblock-sections.
+ llvm::BasicBlockSection getBBSectionsType() const {
+ return Options.BBSections;
+ }
+
+ /// Get the list of functions and basic block ids that need unique sections.
+ const MemoryBuffer *getBBSectionsFuncListBuf() const {
+ return Options.BBSectionsFuncListBuf.get();
+ }
+
/// Get a \c TargetIRAnalysis appropriate for the target.
///
/// This is used to construct the new pass manager's target IR analysis pass,
diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h
index 84c6ee2a6387..d27c7b0178f0 100644
--- a/llvm/include/llvm/Target/TargetOptions.h
+++ b/llvm/include/llvm/Target/TargetOptions.h
@@ -16,8 +16,11 @@
#include "llvm/MC/MCTargetOptions.h"
+#include <memory>
+
namespace llvm {
class MachineFunction;
+ class MemoryBuffer;
class Module;
namespace FloatABI {
@@ -63,6 +66,18 @@ namespace llvm {
};
}
+ enum class BasicBlockSection {
+ All, // Use Basic Block Sections for all basic blocks. A section
+ // for every basic block can significantly bloat object file sizes.
+ List, // Get list of functions & BBs from a file. Selectively enables
+ // basic block sections for a subset of basic blocks which can be
+ // used to control object size bloats from creating sections.
+ Labels, // Do not use Basic Block Sections but label basic blocks. This
+ // is useful when associating profile counts from virtual addresses
+ // to basic blocks.
+ None // Do not use Basic Block Sections.
+ };
+
enum class EABI {
Unknown,
Default, // Default means not specified
@@ -114,9 +129,9 @@ namespace llvm {
EnableFastISel(false), EnableGlobalISel(false), UseInitArray(false),
DisableIntegratedAS(false), RelaxELFRelocations(false),
FunctionSections(false), DataSections(false),
- UniqueSectionNames(true), TrapUnreachable(false),
- NoTrapAfterNoreturn(false), TLSSize(0), EmulatedTLS(false),
- ExplicitEmulatedTLS(false), EnableIPRA(false),
+ UniqueSectionNames(true), UniqueBBSectionNames(false),
+ TrapUnreachable(false), NoTrapAfterNoreturn(false), TLSSize(0),
+ EmulatedTLS(false), ExplicitEmulatedTLS(false), EnableIPRA(false),
EmitStackSizeSection(false), EnableMachineOutliner(false),
SupportsDefaultOutlining(false), EmitAddrsig(false),
EnableDebugEntryValues(false), ForceDwarfFrameSection(false) {}
@@ -224,6 +239,9 @@ namespace llvm {
unsigned UniqueSectionNames : 1;
+ /// Use unique names for basic block sections.
+ unsigned UniqueBBSectionNames : 1;
+
/// Emit target-specific trap instruction for 'unreachable' IR instructions.
unsigned TrapUnreachable : 1;
@@ -256,6 +274,13 @@ namespace llvm {
/// Emit address-significance table.
unsigned EmitAddrsig : 1;
+ /// Emit basic blocks into separate sections.
+ BasicBlockSection BBSections = BasicBlockSection::None;
+
+ /// Memory Buffer that contains information on sampled basic blocks and used
+ /// to selectively generate basic block sections.
+ std::shared_ptr<MemoryBuffer> BBSectionsFuncListBuf;
+
/// Emit debug info about parameter's entry values.
unsigned EnableDebugEntryValues : 1;
--
2.33.1

View File

@ -1,33 +1,5 @@
FILESEXTRAPATHS:prepend:intel-x86-common := "${THISDIR}/files:"
SPIRV_SRCREV = "fe4d6b767363a1995ccbfca27f79efb10dcfe110"
SRC_URI_LLVM10_PATCHES = " \
file://llvm10-0001-llvm-spirv-skip-building-tests.patch;patchdir=llvm/projects/llvm-spirv \
file://llvm10-0002-Fix-building-in-tree-with-cmake-DLLVM_LINK_LLVM_DYLI.patch;patchdir=llvm/projects/llvm-spirv \
file://llvm10-0003-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch;patchdir=llvm/projects/llvm-spirv \
file://BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch;patchdir=llvm \
file://IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch;patchdir=llvm \
file://llvm10-0001-OpenCL-3.0-support.patch \
file://llvm10-0002-Add-cl_khr_extended_subgroup-extensions.patch \
file://llvm10-0003-Memory-leak-fix-for-Managed-Static-Mutex.patch \
file://llvm10-0004-Remove-repo-name-in-LLVM-IR.patch \
file://llvm10-0005-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch \
file://llvm10-0006-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch \
file://llvm10-0007-support-cl_ext_float_atomics.patch \
file://llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch \
file://llvm10-0009-ispc-10_0_fix_for_1788.patch \
file://llvm10-0010-ispc-10_0_fix_for_1793.patch \
file://llvm10-0011-ispc-10_0_fix_for_1844.patch \
file://llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch \
file://llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch \
file://llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch \
file://llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch \
file://llvm10-basic-block-sections-support.patch \
file://llvm10-Enable-the-call-site-info-only-for-g-optimizations.patch \
file://llvm10-Replace-MCTargetOptionsCommandFlags.inc-and-CommandF.patch \
"
SRC_URI_LLVM12_PATCHES = " \
file://llvm12-0001-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch \
file://llvm12-0002-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch \
@ -36,10 +8,4 @@ SRC_URI_LLVM12_PATCHES = " \
file://llvm12-0005-ispc-12_0_fix_for_2111.patch \
"
SPIRV_LLVM10_SRC_URI = "git://github.com/KhronosGroup/SPIRV-LLVM-Translator.git;protocol=https;branch=llvm_release_100;destsuffix=git/llvm/projects/llvm-spirv;name=spirv"
SRC_URI:append:intel-x86-common = "${@bb.utils.contains('LLVMVERSION', '10.0.1', ' ${SPIRV_LLVM10_SRC_URI} ${SRC_URI_LLVM10_PATCHES} ', '', d)}"
SRC_URI:append:intel-x86-common = "${@bb.utils.contains('LLVMVERSION', '12.0.0', ' ${SRC_URI_LLVM12_PATCHES} ', '', d)}"
SRCREV_spirv = "${@bb.utils.contains_any('LLVMVERSION', [ '13.0.0', '12.0.0' ], '', '${SPIRV_SRCREV}', d)}"

View File

@ -1,35 +0,0 @@
From 7fc05c52dd91902fa324a7aac9b90715cfca4717 Mon Sep 17 00:00:00 2001
From: Naveen Saini <naveen.kumar.saini@intel.com>
Date: Wed, 15 Apr 2020 17:55:32 +0800
Subject: [PATCH] Building in-tree with LLVM 10.0 with the LLVM_LINK_LLVM_DYLIB
Failed to link with the LLVMSPIRVLib library.
Add an explicit dependency to force the correct build order and linking.
Reference:
https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/a6d4ccf082858e63e139ca06c02a071c343d2657
Upstream-Status: Submitted [https://github.com/intel/opencl-clang/pull/118]
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
CMakeLists.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 51c140d..b8b514e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -208,7 +208,7 @@ link_directories(
set(OPENCL_CLANG_LINK_LIBS ${CMAKE_DL_LIBS})
-if(NOT LLVMSPIRVLib IN_LIST LLVM_AVAILABLE_LIBS)
+if(NOT LLVMSPIRVLib IN_LIST LLVM_AVAILABLE_LIBS OR (USE_PREBUILT_LLVM AND LLVM_LINK_LLVM_DYLIB))
# SPIRV-LLVM-Translator is not included into LLVM as a component.
# So, we need to list it here explicitly as an external library
list(APPEND OPENCL_CLANG_LINK_LIBS LLVMSPIRVLib)
--
2.17.1

View File

@ -1,42 +0,0 @@
From b29e00e6fe428a031cf577dfb703cf13eff837f6 Mon Sep 17 00:00:00 2001
From: Naveen Saini <naveen.kumar.saini@intel.com>
Date: Wed, 15 Apr 2020 18:05:14 +0800
Subject: [PATCH 2/2] make sure only static libraries linked for native build
LINK_COMPONENTS=all isn't working for static libs for out of tree builds. Use
LLVM_AVAILABLE_LIBS instead. Reported:
https://github.com/intel/opencl-clang/issues/114
Upstream-Status: Pending
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
CMakeLists.txt | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8707487..ad2dbda 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -218,7 +218,7 @@ add_subdirectory(cl_headers)
set(LLVM_REQUIRES_EH ON)
-if(USE_PREBUILT_LLVM OR CLANG_LINK_CLANG_DYLIB)
+if(false)
list(APPEND OPENCL_CLANG_LINK_LIBS clang-cpp)
else()
list(APPEND OPENCL_CLANG_LINK_LIBS
@@ -266,6 +266,7 @@ add_llvm_library(${TARGET_NAME} SHARED
all
LINK_LIBS
${OPENCL_CLANG_LINK_LIBS}
+ ${LLVM_AVAILABLE_LIBS}
)
# Configure resource file on Windows
--
2.17.1

View File

@ -1,15 +0,0 @@
require opencl-clang.inc
SRC_URI:append = " file://0001-don-t-redefine-LLVM_TABLEGEN_EXE.patch \
file://0001-Building-in-tree-with-LLVM-10.0-with-the-LLVM_LINK_L.patch \
"
SRC_URI:append:class-native = " file://0002-make-sure-only-static-libraries-linked-for-native-bu.patch"
BRANCH = "ocl-open-100"
SRCREV = "c8cd72e32b6abc18ce6da71c357ea45ba78b52f0"
EXTRA_OECMAKE += "\
-DLLVM_TABLEGEN_EXE=${STAGING_BINDIR_NATIVE}/llvm-tblgen \
-DCMAKE_SKIP_RPATH=TRUE \
"