mirror of
git://git.yoctoproject.org/meta-intel.git
synced 2025-07-19 12:59:03 +02:00
Remove support for building with LLVM 10
We no longer support building with older branches of OE-Core/meta-clang so remove LLVM 10 specific configurations and patches. Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
This commit is contained in:
parent
7bb49b1ae1
commit
c993e8e815
|
@ -21,10 +21,8 @@ PREFERRED_PROVIDER_libva-utils = "libva-intel-utils"
|
|||
PREFERRED_PROVIDER_libva-utils-native = "libva-intel-utils-native"
|
||||
PREFERRED_PROVIDER_nativesdk-libva-utils = "nativesdk-libva-intel-utils"
|
||||
|
||||
PREFERRED_VERSION_opencl-clang ?= "${@bb.utils.contains('LLVMVERSION', '10.0.1', '10.0.0', \
|
||||
bb.utils.contains('LLVMVERSION', '12.0.0', '12.0.0', '13.0.0', d), d)}"
|
||||
PREFERRED_VERSION_opencl-clang-native ?= "${@bb.utils.contains('LLVMVERSION', '10.0.1', '10.0.0', \
|
||||
bb.utils.contains('LLVMVERSION', '12.0.0', '12.0.0', '13.0.0', d), d)}"
|
||||
PREFERRED_VERSION_opencl-clang ?= "${@bb.utils.contains('LLVMVERSION', '12.0.0', '12.0.0', '13.0.0', d)}"
|
||||
PREFERRED_VERSION_opencl-clang-native ?= "${@bb.utils.contains('LLVMVERSION', '12.0.0', '12.0.0', '13.0.0', d)}"
|
||||
|
||||
XSERVER_X86_ASPEED_AST = "xf86-video-ast \
|
||||
"
|
||||
|
|
|
@ -1,111 +0,0 @@
|
|||
From eeb816d95f0910bd246e37bb2bb3923acf0edf6b Mon Sep 17 00:00:00 2001
|
||||
From: Aleksander Us <aleksander.us@intel.com>
|
||||
Date: Mon, 26 Aug 2019 15:47:41 +0300
|
||||
Subject: [PATCH] [BasicBlockUtils] Add metadata fixing in
|
||||
SplitBlockPredecessors.
|
||||
|
||||
In case when BB is header of some loop and predecessor is latch of
|
||||
this loop, metadata was not attached to newly created basic block.
|
||||
This led to loss of loop metadata for other passes.
|
||||
|
||||
Upstream-Status: Submitted [https://reviews.llvm.org/D66892]
|
||||
|
||||
https://github.com/intel/llvm-patches/commit/8af4449e2d201707f7f2f832b473a0439e255f32
|
||||
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
lib/Transforms/Utils/BasicBlockUtils.cpp | 23 ++++++++----
|
||||
test/Transforms/LoopSimplify/loop_metadata.ll | 36 +++++++++++++++++++
|
||||
2 files changed, 52 insertions(+), 7 deletions(-)
|
||||
create mode 100644 test/Transforms/LoopSimplify/loop_metadata.ll
|
||||
|
||||
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
|
||||
index 5fa371377c8..3a90ae061fb 100644
|
||||
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
|
||||
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
|
||||
@@ -579,24 +579,33 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
|
||||
|
||||
// The new block unconditionally branches to the old block.
|
||||
BranchInst *BI = BranchInst::Create(BB, NewBB);
|
||||
+ bool IsBBHeader = LI && LI->isLoopHeader(BB);
|
||||
+ Loop *BBLoop = LI ? LI->getLoopFor(BB) : nullptr;
|
||||
// Splitting the predecessors of a loop header creates a preheader block.
|
||||
- if (LI && LI->isLoopHeader(BB))
|
||||
+ if (IsBBHeader)
|
||||
// Using the loop start line number prevents debuggers stepping into the
|
||||
// loop body for this instruction.
|
||||
- BI->setDebugLoc(LI->getLoopFor(BB)->getStartLoc());
|
||||
+ BI->setDebugLoc(BBLoop->getStartLoc());
|
||||
else
|
||||
BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc());
|
||||
|
||||
// Move the edges from Preds to point to NewBB instead of BB.
|
||||
- for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
|
||||
+ for (BasicBlock *Pred : Preds) {
|
||||
+ Instruction *PI = Pred->getTerminator();
|
||||
// This is slightly more strict than necessary; the minimum requirement
|
||||
// is that there be no more than one indirectbr branching to BB. And
|
||||
// all BlockAddress uses would need to be updated.
|
||||
- assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
|
||||
+ assert(!isa<IndirectBrInst>(PI) &&
|
||||
"Cannot split an edge from an IndirectBrInst");
|
||||
- assert(!isa<CallBrInst>(Preds[i]->getTerminator()) &&
|
||||
- "Cannot split an edge from a CallBrInst");
|
||||
- Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
|
||||
+ assert(!isa<CallBrInst>(PI) && "Cannot split an edge from a CallBrInst");
|
||||
+ if (IsBBHeader && BBLoop->contains(Pred) && BBLoop->isLoopLatch(Pred)) {
|
||||
+ // Update loop metadata if it exists.
|
||||
+ if (MDNode *LoopMD = PI->getMetadata(LLVMContext::MD_loop)) {
|
||||
+ BI->setMetadata(LLVMContext::MD_loop, LoopMD);
|
||||
+ PI->setMetadata(LLVMContext::MD_loop, nullptr);
|
||||
+ }
|
||||
+ }
|
||||
+ PI->replaceUsesOfWith(BB, NewBB);
|
||||
}
|
||||
|
||||
// Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
|
||||
diff --git a/test/Transforms/LoopSimplify/loop_metadata.ll b/test/Transforms/LoopSimplify/loop_metadata.ll
|
||||
new file mode 100644
|
||||
index 00000000000..c15c92fe3ae
|
||||
--- /dev/null
|
||||
+++ b/test/Transforms/LoopSimplify/loop_metadata.ll
|
||||
@@ -0,0 +1,36 @@
|
||||
+; RUN: opt -S -loop-simplify < %s | FileCheck %s
|
||||
+
|
||||
+; CHECK: for.cond.loopexit:
|
||||
+; CHECK: br label %for.cond, !llvm.loop !0
|
||||
+; CHECK: br i1 %cmp1, label %for.body1, label %for.cond.loopexit
|
||||
+
|
||||
+define void @foo() {
|
||||
+entry:
|
||||
+ br label %for.cond
|
||||
+
|
||||
+for.cond: ; preds = %for.cond1, %entry
|
||||
+ %j = phi i32 [ 0, %entry ], [ %add, %for.cond1 ]
|
||||
+ %cmp = icmp ult i32 %j, 8
|
||||
+ br i1 %cmp, label %for.body, label %for.end
|
||||
+
|
||||
+for.body: ; preds = %for.cond
|
||||
+ %dummy1 = add i32 1, 1
|
||||
+ %add = add nuw nsw i32 %j, 1
|
||||
+ br label %for.cond1
|
||||
+
|
||||
+for.cond1: ; preds = %for.body1, %for.body
|
||||
+ %i.0 = phi i32 [ 1, %for.body ], [ %inc, %for.body1 ]
|
||||
+ %cmp1 = icmp ult i32 %i.0, 8
|
||||
+ br i1 %cmp1, label %for.body1, label %for.cond, !llvm.loop !0
|
||||
+
|
||||
+for.body1: ; preds = %for.cond1
|
||||
+ %dummy2 = add i32 1, 1
|
||||
+ %inc = add nuw nsw i32 %i.0, 1
|
||||
+ br label %for.cond1
|
||||
+
|
||||
+for.end: ; preds = %for.cond
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+!0 = distinct !{!0, !1}
|
||||
+!1 = !{!"llvm.loop.unroll.full"}
|
||||
--
|
||||
2.18.0
|
||||
|
|
@ -1,146 +0,0 @@
|
|||
From 35e218a886f4c066eabd18685240d55270bd5a6d Mon Sep 17 00:00:00 2001
|
||||
From: Aleksander Us <aleksander.us@intel.com>
|
||||
Date: Mon, 26 Aug 2019 15:45:47 +0300
|
||||
Subject: [PATCH] [IndVarSimplify] Do not use SCEV expander for IVCount in
|
||||
LFTR when possible.
|
||||
|
||||
SCEV analysis cannot properly cache instruction with poison flags
|
||||
(for example, add nsw outside of loop will not be reused by expander).
|
||||
This can lead to generating of additional instructions by SCEV expander.
|
||||
|
||||
Example IR:
|
||||
|
||||
...
|
||||
%maxval = add nuw nsw i32 %a1, %a2
|
||||
...
|
||||
for.body:
|
||||
...
|
||||
%cmp22 = icmp ult i32 %ivadd, %maxval
|
||||
br i1 %cmp22, label %for.body, label %for.end
|
||||
...
|
||||
|
||||
SCEV expander will generate copy of %maxval in preheader but without
|
||||
nuw/nsw flags. This can be avoided by explicit check that iv count
|
||||
value gives the same SCEV expressions as calculated by LFTR.
|
||||
|
||||
Upstream-Status: Submitted [https://reviews.llvm.org/D66890]
|
||||
|
||||
https://github.com/intel/llvm-patches/commit/fd6a6c97341a56fd21bc32bc940afea751312e8f
|
||||
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
lib/Transforms/Scalar/IndVarSimplify.cpp | 12 +++++++++-
|
||||
test/Transforms/IndVarSimplify/add_nsw.ll | 23 ++++++++++++++++++++
|
||||
test/Transforms/IndVarSimplify/lftr-reuse.ll | 9 +++-----
|
||||
test/Transforms/IndVarSimplify/udiv.ll | 1 +
|
||||
4 files changed, 38 insertions(+), 7 deletions(-)
|
||||
create mode 100644 test/Transforms/IndVarSimplify/add_nsw.ll
|
||||
|
||||
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
|
||||
index f9fc698a4a9..5e04dac8aa6 100644
|
||||
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
|
||||
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
|
||||
@@ -2375,6 +2375,17 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
|
||||
if (UsePostInc)
|
||||
IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
|
||||
|
||||
+ // If computed limit is equal to old limit then do not use SCEV expander
|
||||
+ // because it can lost NUW/NSW flags and create extra instructions.
|
||||
+ BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
|
||||
+ if (ICmpInst *Cmp = dyn_cast<ICmpInst>(BI->getOperand(0))) {
|
||||
+ Value *Limit = Cmp->getOperand(0);
|
||||
+ if (!L->isLoopInvariant(Limit))
|
||||
+ Limit = Cmp->getOperand(1);
|
||||
+ if (SE->getSCEV(Limit) == IVLimit)
|
||||
+ return Limit;
|
||||
+ }
|
||||
+
|
||||
// Expand the code for the iteration count.
|
||||
assert(SE->isLoopInvariant(IVLimit, L) &&
|
||||
"Computed iteration count is not loop invariant!");
|
||||
@@ -2383,7 +2394,6 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
|
||||
// SCEV expression (IVInit) for a pointer type IV value (IndVar).
|
||||
Type *LimitTy = ExitCount->getType()->isPointerTy() ?
|
||||
IndVar->getType() : ExitCount->getType();
|
||||
- BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
|
||||
return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
|
||||
}
|
||||
}
|
||||
diff --git a/test/Transforms/IndVarSimplify/add_nsw.ll b/test/Transforms/IndVarSimplify/add_nsw.ll
|
||||
new file mode 100644
|
||||
index 00000000000..abd1cbb6c51
|
||||
--- /dev/null
|
||||
+++ b/test/Transforms/IndVarSimplify/add_nsw.ll
|
||||
@@ -0,0 +1,23 @@
|
||||
+; RUN: opt -indvars -S %s | FileCheck %s
|
||||
+
|
||||
+target datalayout = "e-p:32:32-i64:64-n8:16:32"
|
||||
+
|
||||
+; CHECK: for.body.preheader:
|
||||
+; CHECK-NOT: add
|
||||
+; CHECK: for.body:
|
||||
+
|
||||
+define void @foo(i32 %a1, i32 %a2) {
|
||||
+entry:
|
||||
+ %maxval = add nuw nsw i32 %a1, %a2
|
||||
+ %cmp = icmp slt i32 %maxval, 1
|
||||
+ br i1 %cmp, label %for.end, label %for.body
|
||||
+
|
||||
+for.body: ; preds = %entry, %for.body
|
||||
+ %j.02 = phi i32 [ 0, %entry ], [ %add31, %for.body ]
|
||||
+ %add31 = add nuw nsw i32 %j.02, 1
|
||||
+ %cmp22 = icmp slt i32 %add31, %maxval
|
||||
+ br i1 %cmp22, label %for.body, label %for.end
|
||||
+
|
||||
+for.end: ; preds = %for.body
|
||||
+ ret void
|
||||
+}
|
||||
diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll
|
||||
index 14ae9738696..509d662b767 100644
|
||||
--- a/test/Transforms/IndVarSimplify/lftr-reuse.ll
|
||||
+++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll
|
||||
@@ -67,11 +67,9 @@ define void @expandOuterRecurrence(i32 %arg) nounwind {
|
||||
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]]
|
||||
; CHECK-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]]
|
||||
; CHECK: outer.preheader:
|
||||
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG]], -1
|
||||
; CHECK-NEXT: br label [[OUTER:%.*]]
|
||||
; CHECK: outer:
|
||||
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[TMP0]], [[OUTER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[OUTER_INC:%.*]] ]
|
||||
-; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC]] ], [ 0, [[OUTER_PREHEADER]] ]
|
||||
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ], [ 0, [[OUTER_PREHEADER]] ]
|
||||
; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]]
|
||||
; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1
|
||||
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]]
|
||||
@@ -81,14 +79,13 @@ define void @expandOuterRecurrence(i32 %arg) nounwind {
|
||||
; CHECK: inner:
|
||||
; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[INNER_PH]] ], [ [[J_INC:%.*]], [[INNER]] ]
|
||||
; CHECK-NEXT: [[J_INC]] = add nuw nsw i32 [[J]], 1
|
||||
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[INDVARS_IV]]
|
||||
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[SUB3]]
|
||||
; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]]
|
||||
; CHECK: outer.inc.loopexit:
|
||||
; CHECK-NEXT: br label [[OUTER_INC]]
|
||||
; CHECK: outer.inc:
|
||||
; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1
|
||||
-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], -1
|
||||
-; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[TMP0]]
|
||||
+; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[SUB1]]
|
||||
; CHECK-NEXT: br i1 [[EXITCOND1]], label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]]
|
||||
; CHECK: exit.loopexit:
|
||||
; CHECK-NEXT: br label [[EXIT]]
|
||||
diff --git a/test/Transforms/IndVarSimplify/udiv.ll b/test/Transforms/IndVarSimplify/udiv.ll
|
||||
index b3f2c2a6a66..3530343ef4a 100644
|
||||
--- a/test/Transforms/IndVarSimplify/udiv.ll
|
||||
+++ b/test/Transforms/IndVarSimplify/udiv.ll
|
||||
@@ -133,6 +133,7 @@ declare i32 @printf(i8* nocapture, ...) nounwind
|
||||
; CHECK-LABEL: @foo(
|
||||
; CHECK: for.body.preheader:
|
||||
; CHECK-NOT: udiv
|
||||
+; CHECK: for.body:
|
||||
|
||||
define void @foo(double* %p, i64 %n) nounwind {
|
||||
entry:
|
||||
--
|
||||
2.18.0
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -1,51 +0,0 @@
|
|||
From 661021749a168c423d69d0ba7cdfa16fed860836 Mon Sep 17 00:00:00 2001
|
||||
From: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
Date: Wed, 21 Aug 2019 14:35:31 +0800
|
||||
Subject: [PATCH 1/3] llvm-spirv: skip building tests
|
||||
|
||||
Some of these need clang to be built and since we're building this in-tree,
|
||||
that leads to problems when compiling libcxx, compiler-rt which aren't built
|
||||
in-tree.
|
||||
|
||||
Instead of using SPIRV_SKIP_CLANG_BUILD to skip clang build and adding this to
|
||||
all components, disable the building of tests altogether.
|
||||
|
||||
Upstream-Status: Inappropriate
|
||||
|
||||
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
CMakeLists.txt | 10 ----------
|
||||
1 file changed, 10 deletions(-)
|
||||
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index 92c50370..80999c98 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -25,13 +25,6 @@ if(LLVM_SPIRV_BUILD_EXTERNAL)
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
- if(LLVM_SPIRV_INCLUDE_TESTS)
|
||||
- set(LLVM_TEST_COMPONENTS
|
||||
- llvm-as
|
||||
- llvm-dis
|
||||
- )
|
||||
- endif(LLVM_SPIRV_INCLUDE_TESTS)
|
||||
-
|
||||
find_package(LLVM 10.0.0 REQUIRED
|
||||
COMPONENTS
|
||||
Analysis
|
||||
@@ -63,9 +56,6 @@ set(LLVM_SPIRV_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include)
|
||||
|
||||
add_subdirectory(lib/SPIRV)
|
||||
add_subdirectory(tools/llvm-spirv)
|
||||
-if(LLVM_SPIRV_INCLUDE_TESTS)
|
||||
- add_subdirectory(test)
|
||||
-endif(LLVM_SPIRV_INCLUDE_TESTS)
|
||||
|
||||
install(
|
||||
FILES
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -1,812 +0,0 @@
|
|||
From 3f544cfe44ee5f113a3fb554aca2cf5d64996062 Mon Sep 17 00:00:00 2001
|
||||
From: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
Date: Wed, 7 Apr 2021 16:38:38 +0800
|
||||
Subject: [PATCH 2/7] Add cl_khr_extended_subgroup extensions.
|
||||
|
||||
Added extensions and their function declarations into
|
||||
the standard header.
|
||||
|
||||
Patch by Piotr Fusik!
|
||||
|
||||
Tags: #clang
|
||||
|
||||
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/4a4402f0d72167477a6252e4c3daf5089ebc8f9a]
|
||||
Signed-off-by: Anastasia Stulova <anastasia.stulova@arm.com>
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
.../include/clang/Basic/OpenCLExtensions.def | 7 +
|
||||
clang/lib/Headers/opencl-c.h | 668 ++++++++++++++++++
|
||||
clang/test/SemaOpenCL/extension-version.cl | 83 +++
|
||||
3 files changed, 758 insertions(+)
|
||||
|
||||
diff --git a/clang/include/clang/Basic/OpenCLExtensions.def b/clang/include/clang/Basic/OpenCLExtensions.def
|
||||
index 608f78a13eef..d1574164f9b2 100644
|
||||
--- a/clang/include/clang/Basic/OpenCLExtensions.def
|
||||
+++ b/clang/include/clang/Basic/OpenCLExtensions.def
|
||||
@@ -74,6 +74,13 @@ OPENCLEXT_INTERNAL(cl_khr_mipmap_image_writes, 200, ~0U)
|
||||
OPENCLEXT_INTERNAL(cl_khr_srgb_image_writes, 200, ~0U)
|
||||
OPENCLEXT_INTERNAL(cl_khr_subgroups, 200, ~0U)
|
||||
OPENCLEXT_INTERNAL(cl_khr_terminate_context, 200, ~0U)
|
||||
+OPENCLEXT_INTERNAL(cl_khr_subgroup_extended_types, 200, ~0U)
|
||||
+OPENCLEXT_INTERNAL(cl_khr_subgroup_non_uniform_vote, 200, ~0U)
|
||||
+OPENCLEXT_INTERNAL(cl_khr_subgroup_ballot, 200, ~0U)
|
||||
+OPENCLEXT_INTERNAL(cl_khr_subgroup_non_uniform_arithmetic, 200, ~0U)
|
||||
+OPENCLEXT_INTERNAL(cl_khr_subgroup_shuffle, 200, ~0U)
|
||||
+OPENCLEXT_INTERNAL(cl_khr_subgroup_shuffle_relative, 200, ~0U)
|
||||
+OPENCLEXT_INTERNAL(cl_khr_subgroup_clustered_reduce, 200, ~0U)
|
||||
|
||||
// Clang Extensions.
|
||||
OPENCLEXT_INTERNAL(cl_clang_storage_class_specifiers, 100, ~0U)
|
||||
diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
|
||||
index 93a946cec5b1..67d900eb1c3d 100644
|
||||
--- a/clang/lib/Headers/opencl-c.h
|
||||
+++ b/clang/lib/Headers/opencl-c.h
|
||||
@@ -17530,6 +17530,674 @@ double __ovld __conv sub_group_scan_inclusive_max(double x);
|
||||
|
||||
#endif //cl_khr_subgroups cl_intel_subgroups
|
||||
|
||||
+#if defined(cl_khr_subgroup_extended_types)
|
||||
+char __ovld __conv sub_group_broadcast( char value, uint index );
|
||||
+char2 __ovld __conv sub_group_broadcast( char2 value, uint index );
|
||||
+char3 __ovld __conv sub_group_broadcast( char3 value, uint index );
|
||||
+char4 __ovld __conv sub_group_broadcast( char4 value, uint index );
|
||||
+char8 __ovld __conv sub_group_broadcast( char8 value, uint index );
|
||||
+char16 __ovld __conv sub_group_broadcast( char16 value, uint index );
|
||||
+
|
||||
+uchar __ovld __conv sub_group_broadcast( uchar value, uint index );
|
||||
+uchar2 __ovld __conv sub_group_broadcast( uchar2 value, uint index );
|
||||
+uchar3 __ovld __conv sub_group_broadcast( uchar3 value, uint index );
|
||||
+uchar4 __ovld __conv sub_group_broadcast( uchar4 value, uint index );
|
||||
+uchar8 __ovld __conv sub_group_broadcast( uchar8 value, uint index );
|
||||
+uchar16 __ovld __conv sub_group_broadcast( uchar16 value, uint index );
|
||||
+
|
||||
+short __ovld __conv sub_group_broadcast( short value, uint index );
|
||||
+short2 __ovld __conv sub_group_broadcast( short2 value, uint index );
|
||||
+short3 __ovld __conv sub_group_broadcast( short3 value, uint index );
|
||||
+short4 __ovld __conv sub_group_broadcast( short4 value, uint index );
|
||||
+short8 __ovld __conv sub_group_broadcast( short8 value, uint index );
|
||||
+short16 __ovld __conv sub_group_broadcast( short16 value, uint index );
|
||||
+
|
||||
+ushort __ovld __conv sub_group_broadcast( ushort value, uint index );
|
||||
+ushort2 __ovld __conv sub_group_broadcast( ushort2 value, uint index );
|
||||
+ushort3 __ovld __conv sub_group_broadcast( ushort3 value, uint index );
|
||||
+ushort4 __ovld __conv sub_group_broadcast( ushort4 value, uint index );
|
||||
+ushort8 __ovld __conv sub_group_broadcast( ushort8 value, uint index );
|
||||
+ushort16 __ovld __conv sub_group_broadcast( ushort16 value, uint index );
|
||||
+
|
||||
+// scalar int broadcast is part of cl_khr_subgroups
|
||||
+int2 __ovld __conv sub_group_broadcast( int2 value, uint index );
|
||||
+int3 __ovld __conv sub_group_broadcast( int3 value, uint index );
|
||||
+int4 __ovld __conv sub_group_broadcast( int4 value, uint index );
|
||||
+int8 __ovld __conv sub_group_broadcast( int8 value, uint index );
|
||||
+int16 __ovld __conv sub_group_broadcast( int16 value, uint index );
|
||||
+
|
||||
+// scalar uint broadcast is part of cl_khr_subgroups
|
||||
+uint2 __ovld __conv sub_group_broadcast( uint2 value, uint index );
|
||||
+uint3 __ovld __conv sub_group_broadcast( uint3 value, uint index );
|
||||
+uint4 __ovld __conv sub_group_broadcast( uint4 value, uint index );
|
||||
+uint8 __ovld __conv sub_group_broadcast( uint8 value, uint index );
|
||||
+uint16 __ovld __conv sub_group_broadcast( uint16 value, uint index );
|
||||
+
|
||||
+// scalar long broadcast is part of cl_khr_subgroups
|
||||
+long2 __ovld __conv sub_group_broadcast( long2 value, uint index );
|
||||
+long3 __ovld __conv sub_group_broadcast( long3 value, uint index );
|
||||
+long4 __ovld __conv sub_group_broadcast( long4 value, uint index );
|
||||
+long8 __ovld __conv sub_group_broadcast( long8 value, uint index );
|
||||
+long16 __ovld __conv sub_group_broadcast( long16 value, uint index );
|
||||
+
|
||||
+// scalar ulong broadcast is part of cl_khr_subgroups
|
||||
+ulong2 __ovld __conv sub_group_broadcast( ulong2 value, uint index );
|
||||
+ulong3 __ovld __conv sub_group_broadcast( ulong3 value, uint index );
|
||||
+ulong4 __ovld __conv sub_group_broadcast( ulong4 value, uint index );
|
||||
+ulong8 __ovld __conv sub_group_broadcast( ulong8 value, uint index );
|
||||
+ulong16 __ovld __conv sub_group_broadcast( ulong16 value, uint index );
|
||||
+
|
||||
+// scalar float broadcast is part of cl_khr_subgroups
|
||||
+float2 __ovld __conv sub_group_broadcast( float2 value, uint index );
|
||||
+float3 __ovld __conv sub_group_broadcast( float3 value, uint index );
|
||||
+float4 __ovld __conv sub_group_broadcast( float4 value, uint index );
|
||||
+float8 __ovld __conv sub_group_broadcast( float8 value, uint index );
|
||||
+float16 __ovld __conv sub_group_broadcast( float16 value, uint index );
|
||||
+
|
||||
+char __ovld __conv sub_group_reduce_add( char value );
|
||||
+uchar __ovld __conv sub_group_reduce_add( uchar value );
|
||||
+short __ovld __conv sub_group_reduce_add( short value );
|
||||
+ushort __ovld __conv sub_group_reduce_add( ushort value );
|
||||
+
|
||||
+char __ovld __conv sub_group_reduce_min( char value );
|
||||
+uchar __ovld __conv sub_group_reduce_min( uchar value );
|
||||
+short __ovld __conv sub_group_reduce_min( short value );
|
||||
+ushort __ovld __conv sub_group_reduce_min( ushort value );
|
||||
+
|
||||
+char __ovld __conv sub_group_reduce_max( char value );
|
||||
+uchar __ovld __conv sub_group_reduce_max( uchar value );
|
||||
+short __ovld __conv sub_group_reduce_max( short value );
|
||||
+ushort __ovld __conv sub_group_reduce_max( ushort value );
|
||||
+
|
||||
+char __ovld __conv sub_group_scan_inclusive_add( char value );
|
||||
+uchar __ovld __conv sub_group_scan_inclusive_add( uchar value );
|
||||
+short __ovld __conv sub_group_scan_inclusive_add( short value );
|
||||
+ushort __ovld __conv sub_group_scan_inclusive_add( ushort value );
|
||||
+
|
||||
+char __ovld __conv sub_group_scan_inclusive_min( char value );
|
||||
+uchar __ovld __conv sub_group_scan_inclusive_min( uchar value );
|
||||
+short __ovld __conv sub_group_scan_inclusive_min( short value );
|
||||
+ushort __ovld __conv sub_group_scan_inclusive_min( ushort value );
|
||||
+
|
||||
+char __ovld __conv sub_group_scan_inclusive_max( char value );
|
||||
+uchar __ovld __conv sub_group_scan_inclusive_max( uchar value );
|
||||
+short __ovld __conv sub_group_scan_inclusive_max( short value );
|
||||
+ushort __ovld __conv sub_group_scan_inclusive_max( ushort value );
|
||||
+
|
||||
+char __ovld __conv sub_group_scan_exclusive_add( char value );
|
||||
+uchar __ovld __conv sub_group_scan_exclusive_add( uchar value );
|
||||
+short __ovld __conv sub_group_scan_exclusive_add( short value );
|
||||
+ushort __ovld __conv sub_group_scan_exclusive_add( ushort value );
|
||||
+
|
||||
+char __ovld __conv sub_group_scan_exclusive_min( char value );
|
||||
+uchar __ovld __conv sub_group_scan_exclusive_min( uchar value );
|
||||
+short __ovld __conv sub_group_scan_exclusive_min( short value );
|
||||
+ushort __ovld __conv sub_group_scan_exclusive_min( ushort value );
|
||||
+
|
||||
+char __ovld __conv sub_group_scan_exclusive_max( char value );
|
||||
+uchar __ovld __conv sub_group_scan_exclusive_max( uchar value );
|
||||
+short __ovld __conv sub_group_scan_exclusive_max( short value );
|
||||
+ushort __ovld __conv sub_group_scan_exclusive_max( ushort value );
|
||||
+
|
||||
+#if defined(cl_khr_fp16)
|
||||
+// scalar half broadcast is part of cl_khr_subgroups
|
||||
+half2 __ovld __conv sub_group_broadcast( half2 value, uint index );
|
||||
+half3 __ovld __conv sub_group_broadcast( half3 value, uint index );
|
||||
+half4 __ovld __conv sub_group_broadcast( half4 value, uint index );
|
||||
+half8 __ovld __conv sub_group_broadcast( half8 value, uint index );
|
||||
+half16 __ovld __conv sub_group_broadcast( half16 value, uint index );
|
||||
+#endif // cl_khr_fp16
|
||||
+
|
||||
+#if defined(cl_khr_fp64)
|
||||
+// scalar double broadcast is part of cl_khr_subgroups
|
||||
+double2 __ovld __conv sub_group_broadcast( double2 value, uint index );
|
||||
+double3 __ovld __conv sub_group_broadcast( double3 value, uint index );
|
||||
+double4 __ovld __conv sub_group_broadcast( double4 value, uint index );
|
||||
+double8 __ovld __conv sub_group_broadcast( double8 value, uint index );
|
||||
+double16 __ovld __conv sub_group_broadcast( double16 value, uint index );
|
||||
+#endif // cl_khr_fp64
|
||||
+
|
||||
+#endif // cl_khr_subgroup_extended_types
|
||||
+
|
||||
+#if defined(cl_khr_subgroup_non_uniform_vote)
|
||||
+int __ovld sub_group_elect(void);
|
||||
+int __ovld sub_group_non_uniform_all( int predicate );
|
||||
+int __ovld sub_group_non_uniform_any( int predicate );
|
||||
+
|
||||
+int __ovld sub_group_non_uniform_all_equal( char value );
|
||||
+int __ovld sub_group_non_uniform_all_equal( uchar value );
|
||||
+int __ovld sub_group_non_uniform_all_equal( short value );
|
||||
+int __ovld sub_group_non_uniform_all_equal( ushort value );
|
||||
+int __ovld sub_group_non_uniform_all_equal( int value );
|
||||
+int __ovld sub_group_non_uniform_all_equal( uint value );
|
||||
+int __ovld sub_group_non_uniform_all_equal( long value );
|
||||
+int __ovld sub_group_non_uniform_all_equal( ulong value );
|
||||
+int __ovld sub_group_non_uniform_all_equal( float value );
|
||||
+
|
||||
+#if defined(cl_khr_fp16)
|
||||
+int __ovld sub_group_non_uniform_all_equal( half value );
|
||||
+#endif // cl_khr_fp16
|
||||
+
|
||||
+#if defined(cl_khr_fp64)
|
||||
+int __ovld sub_group_non_uniform_all_equal( double value );
|
||||
+#endif // cl_khr_fp64
|
||||
+
|
||||
+#endif // cl_khr_subgroup_non_uniform_vote
|
||||
+
|
||||
+#if defined(cl_khr_subgroup_ballot)
|
||||
+char __ovld sub_group_non_uniform_broadcast( char value, uint index );
|
||||
+char2 __ovld sub_group_non_uniform_broadcast( char2 value, uint index );
|
||||
+char3 __ovld sub_group_non_uniform_broadcast( char3 value, uint index );
|
||||
+char4 __ovld sub_group_non_uniform_broadcast( char4 value, uint index );
|
||||
+char8 __ovld sub_group_non_uniform_broadcast( char8 value, uint index );
|
||||
+char16 __ovld sub_group_non_uniform_broadcast( char16 value, uint index );
|
||||
+
|
||||
+uchar __ovld sub_group_non_uniform_broadcast( uchar value, uint index );
|
||||
+uchar2 __ovld sub_group_non_uniform_broadcast( uchar2 value, uint index );
|
||||
+uchar3 __ovld sub_group_non_uniform_broadcast( uchar3 value, uint index );
|
||||
+uchar4 __ovld sub_group_non_uniform_broadcast( uchar4 value, uint index );
|
||||
+uchar8 __ovld sub_group_non_uniform_broadcast( uchar8 value, uint index );
|
||||
+uchar16 __ovld sub_group_non_uniform_broadcast( uchar16 value, uint index );
|
||||
+
|
||||
+short __ovld sub_group_non_uniform_broadcast( short value, uint index );
|
||||
+short2 __ovld sub_group_non_uniform_broadcast( short2 value, uint index );
|
||||
+short3 __ovld sub_group_non_uniform_broadcast( short3 value, uint index );
|
||||
+short4 __ovld sub_group_non_uniform_broadcast( short4 value, uint index );
|
||||
+short8 __ovld sub_group_non_uniform_broadcast( short8 value, uint index );
|
||||
+short16 __ovld sub_group_non_uniform_broadcast( short16 value, uint index );
|
||||
+
|
||||
+ushort __ovld sub_group_non_uniform_broadcast( ushort value, uint index );
|
||||
+ushort2 __ovld sub_group_non_uniform_broadcast( ushort2 value, uint index );
|
||||
+ushort3 __ovld sub_group_non_uniform_broadcast( ushort3 value, uint index );
|
||||
+ushort4 __ovld sub_group_non_uniform_broadcast( ushort4 value, uint index );
|
||||
+ushort8 __ovld sub_group_non_uniform_broadcast( ushort8 value, uint index );
|
||||
+ushort16 __ovld sub_group_non_uniform_broadcast( ushort16 value, uint index );
|
||||
+
|
||||
+int __ovld sub_group_non_uniform_broadcast( int value, uint index );
|
||||
+int2 __ovld sub_group_non_uniform_broadcast( int2 value, uint index );
|
||||
+int3 __ovld sub_group_non_uniform_broadcast( int3 value, uint index );
|
||||
+int4 __ovld sub_group_non_uniform_broadcast( int4 value, uint index );
|
||||
+int8 __ovld sub_group_non_uniform_broadcast( int8 value, uint index );
|
||||
+int16 __ovld sub_group_non_uniform_broadcast( int16 value, uint index );
|
||||
+
|
||||
+uint __ovld sub_group_non_uniform_broadcast( uint value, uint index );
|
||||
+uint2 __ovld sub_group_non_uniform_broadcast( uint2 value, uint index );
|
||||
+uint3 __ovld sub_group_non_uniform_broadcast( uint3 value, uint index );
|
||||
+uint4 __ovld sub_group_non_uniform_broadcast( uint4 value, uint index );
|
||||
+uint8 __ovld sub_group_non_uniform_broadcast( uint8 value, uint index );
|
||||
+uint16 __ovld sub_group_non_uniform_broadcast( uint16 value, uint index );
|
||||
+
|
||||
+long __ovld sub_group_non_uniform_broadcast( long value, uint index );
|
||||
+long2 __ovld sub_group_non_uniform_broadcast( long2 value, uint index );
|
||||
+long3 __ovld sub_group_non_uniform_broadcast( long3 value, uint index );
|
||||
+long4 __ovld sub_group_non_uniform_broadcast( long4 value, uint index );
|
||||
+long8 __ovld sub_group_non_uniform_broadcast( long8 value, uint index );
|
||||
+long16 __ovld sub_group_non_uniform_broadcast( long16 value, uint index );
|
||||
+
|
||||
+ulong __ovld sub_group_non_uniform_broadcast( ulong value, uint index );
|
||||
+ulong2 __ovld sub_group_non_uniform_broadcast( ulong2 value, uint index );
|
||||
+ulong3 __ovld sub_group_non_uniform_broadcast( ulong3 value, uint index );
|
||||
+ulong4 __ovld sub_group_non_uniform_broadcast( ulong4 value, uint index );
|
||||
+ulong8 __ovld sub_group_non_uniform_broadcast( ulong8 value, uint index );
|
||||
+ulong16 __ovld sub_group_non_uniform_broadcast( ulong16 value, uint index );
|
||||
+
|
||||
+float __ovld sub_group_non_uniform_broadcast( float value, uint index );
|
||||
+float2 __ovld sub_group_non_uniform_broadcast( float2 value, uint index );
|
||||
+float3 __ovld sub_group_non_uniform_broadcast( float3 value, uint index );
|
||||
+float4 __ovld sub_group_non_uniform_broadcast( float4 value, uint index );
|
||||
+float8 __ovld sub_group_non_uniform_broadcast( float8 value, uint index );
|
||||
+float16 __ovld sub_group_non_uniform_broadcast( float16 value, uint index );
|
||||
+
|
||||
+char __ovld sub_group_broadcast_first( char value );
|
||||
+uchar __ovld sub_group_broadcast_first( uchar value );
|
||||
+short __ovld sub_group_broadcast_first( short value );
|
||||
+ushort __ovld sub_group_broadcast_first( ushort value );
|
||||
+int __ovld sub_group_broadcast_first( int value );
|
||||
+uint __ovld sub_group_broadcast_first( uint value );
|
||||
+long __ovld sub_group_broadcast_first( long value );
|
||||
+ulong __ovld sub_group_broadcast_first( ulong value );
|
||||
+float __ovld sub_group_broadcast_first( float value );
|
||||
+
|
||||
+uint4 __ovld sub_group_ballot( int predicate );
|
||||
+int __ovld __cnfn sub_group_inverse_ballot( uint4 value );
|
||||
+int __ovld __cnfn sub_group_ballot_bit_extract( uint4 value, uint index );
|
||||
+uint __ovld __cnfn sub_group_ballot_bit_count( uint4 value );
|
||||
+
|
||||
+uint __ovld sub_group_ballot_inclusive_scan( uint4 value );
|
||||
+uint __ovld sub_group_ballot_exclusive_scan( uint4 value );
|
||||
+uint __ovld sub_group_ballot_find_lsb( uint4 value );
|
||||
+uint __ovld sub_group_ballot_find_msb( uint4 value );
|
||||
+
|
||||
+uint4 __ovld __cnfn get_sub_group_eq_mask(void);
|
||||
+uint4 __ovld __cnfn get_sub_group_ge_mask(void);
|
||||
+uint4 __ovld __cnfn get_sub_group_gt_mask(void);
|
||||
+uint4 __ovld __cnfn get_sub_group_le_mask(void);
|
||||
+uint4 __ovld __cnfn get_sub_group_lt_mask(void);
|
||||
+
|
||||
+#if defined(cl_khr_fp16)
|
||||
+half __ovld sub_group_non_uniform_broadcast( half value, uint index );
|
||||
+half2 __ovld sub_group_non_uniform_broadcast( half2 value, uint index );
|
||||
+half3 __ovld sub_group_non_uniform_broadcast( half3 value, uint index );
|
||||
+half4 __ovld sub_group_non_uniform_broadcast( half4 value, uint index );
|
||||
+half8 __ovld sub_group_non_uniform_broadcast( half8 value, uint index );
|
||||
+half16 __ovld sub_group_non_uniform_broadcast( half16 value, uint index );
|
||||
+
|
||||
+half __ovld sub_group_broadcast_first( half value );
|
||||
+#endif // cl_khr_fp16
|
||||
+
|
||||
+#if defined(cl_khr_fp64)
|
||||
+double __ovld sub_group_non_uniform_broadcast( double value, uint index );
|
||||
+double2 __ovld sub_group_non_uniform_broadcast( double2 value, uint index );
|
||||
+double3 __ovld sub_group_non_uniform_broadcast( double3 value, uint index );
|
||||
+double4 __ovld sub_group_non_uniform_broadcast( double4 value, uint index );
|
||||
+double8 __ovld sub_group_non_uniform_broadcast( double8 value, uint index );
|
||||
+double16 __ovld sub_group_non_uniform_broadcast( double16 value, uint index );
|
||||
+
|
||||
+double __ovld sub_group_broadcast_first( double value );
|
||||
+#endif // cl_khr_fp64
|
||||
+
|
||||
+#endif // cl_khr_subgroup_ballot
|
||||
+
|
||||
+#if defined(cl_khr_subgroup_non_uniform_arithmetic)
|
||||
+char __ovld sub_group_non_uniform_reduce_add( char value );
|
||||
+uchar __ovld sub_group_non_uniform_reduce_add( uchar value );
|
||||
+short __ovld sub_group_non_uniform_reduce_add( short value );
|
||||
+ushort __ovld sub_group_non_uniform_reduce_add( ushort value );
|
||||
+int __ovld sub_group_non_uniform_reduce_add( int value );
|
||||
+uint __ovld sub_group_non_uniform_reduce_add( uint value );
|
||||
+long __ovld sub_group_non_uniform_reduce_add( long value );
|
||||
+ulong __ovld sub_group_non_uniform_reduce_add( ulong value );
|
||||
+float __ovld sub_group_non_uniform_reduce_add( float value );
|
||||
+
|
||||
+char __ovld sub_group_non_uniform_reduce_mul( char value );
|
||||
+uchar __ovld sub_group_non_uniform_reduce_mul( uchar value );
|
||||
+short __ovld sub_group_non_uniform_reduce_mul( short value );
|
||||
+ushort __ovld sub_group_non_uniform_reduce_mul( ushort value );
|
||||
+int __ovld sub_group_non_uniform_reduce_mul( int value );
|
||||
+uint __ovld sub_group_non_uniform_reduce_mul( uint value );
|
||||
+long __ovld sub_group_non_uniform_reduce_mul( long value );
|
||||
+ulong __ovld sub_group_non_uniform_reduce_mul( ulong value );
|
||||
+float __ovld sub_group_non_uniform_reduce_mul( float value );
|
||||
+
|
||||
+char __ovld sub_group_non_uniform_reduce_min( char value );
|
||||
+uchar __ovld sub_group_non_uniform_reduce_min( uchar value );
|
||||
+short __ovld sub_group_non_uniform_reduce_min( short value );
|
||||
+ushort __ovld sub_group_non_uniform_reduce_min( ushort value );
|
||||
+int __ovld sub_group_non_uniform_reduce_min( int value );
|
||||
+uint __ovld sub_group_non_uniform_reduce_min( uint value );
|
||||
+long __ovld sub_group_non_uniform_reduce_min( long value );
|
||||
+ulong __ovld sub_group_non_uniform_reduce_min( ulong value );
|
||||
+float __ovld sub_group_non_uniform_reduce_min( float value );
|
||||
+
|
||||
+char __ovld sub_group_non_uniform_reduce_max( char value );
|
||||
+uchar __ovld sub_group_non_uniform_reduce_max( uchar value );
|
||||
+short __ovld sub_group_non_uniform_reduce_max( short value );
|
||||
+ushort __ovld sub_group_non_uniform_reduce_max( ushort value );
|
||||
+int __ovld sub_group_non_uniform_reduce_max( int value );
|
||||
+uint __ovld sub_group_non_uniform_reduce_max( uint value );
|
||||
+long __ovld sub_group_non_uniform_reduce_max( long value );
|
||||
+ulong __ovld sub_group_non_uniform_reduce_max( ulong value );
|
||||
+float __ovld sub_group_non_uniform_reduce_max( float value );
|
||||
+
|
||||
+char __ovld sub_group_non_uniform_scan_inclusive_add( char value );
|
||||
+uchar __ovld sub_group_non_uniform_scan_inclusive_add( uchar value );
|
||||
+short __ovld sub_group_non_uniform_scan_inclusive_add( short value );
|
||||
+ushort __ovld sub_group_non_uniform_scan_inclusive_add( ushort value );
|
||||
+int __ovld sub_group_non_uniform_scan_inclusive_add( int value );
|
||||
+uint __ovld sub_group_non_uniform_scan_inclusive_add( uint value );
|
||||
+long __ovld sub_group_non_uniform_scan_inclusive_add( long value );
|
||||
+ulong __ovld sub_group_non_uniform_scan_inclusive_add( ulong value );
|
||||
+float __ovld sub_group_non_uniform_scan_inclusive_add( float value );
|
||||
+
|
||||
+char __ovld sub_group_non_uniform_scan_inclusive_mul( char value );
|
||||
+uchar __ovld sub_group_non_uniform_scan_inclusive_mul( uchar value );
|
||||
+short __ovld sub_group_non_uniform_scan_inclusive_mul( short value );
|
||||
+ushort __ovld sub_group_non_uniform_scan_inclusive_mul( ushort value );
|
||||
+int __ovld sub_group_non_uniform_scan_inclusive_mul( int value );
|
||||
+uint __ovld sub_group_non_uniform_scan_inclusive_mul( uint value );
|
||||
+long __ovld sub_group_non_uniform_scan_inclusive_mul( long value );
|
||||
+ulong __ovld sub_group_non_uniform_scan_inclusive_mul( ulong value );
|
||||
+float __ovld sub_group_non_uniform_scan_inclusive_mul( float value );
|
||||
+
|
||||
+char __ovld sub_group_non_uniform_scan_inclusive_min( char value );
|
||||
+uchar __ovld sub_group_non_uniform_scan_inclusive_min( uchar value );
|
||||
+short __ovld sub_group_non_uniform_scan_inclusive_min( short value );
|
||||
+ushort __ovld sub_group_non_uniform_scan_inclusive_min( ushort value );
|
||||
+int __ovld sub_group_non_uniform_scan_inclusive_min( int value );
|
||||
+uint __ovld sub_group_non_uniform_scan_inclusive_min( uint value );
|
||||
+long __ovld sub_group_non_uniform_scan_inclusive_min( long value );
|
||||
+ulong __ovld sub_group_non_uniform_scan_inclusive_min( ulong value );
|
||||
+float __ovld sub_group_non_uniform_scan_inclusive_min( float value );
|
||||
+
|
||||
+char __ovld sub_group_non_uniform_scan_inclusive_max( char value );
|
||||
+uchar __ovld sub_group_non_uniform_scan_inclusive_max( uchar value );
|
||||
+short __ovld sub_group_non_uniform_scan_inclusive_max( short value );
|
||||
+ushort __ovld sub_group_non_uniform_scan_inclusive_max( ushort value );
|
||||
+int __ovld sub_group_non_uniform_scan_inclusive_max( int value );
|
||||
+uint __ovld sub_group_non_uniform_scan_inclusive_max( uint value );
|
||||
+long __ovld sub_group_non_uniform_scan_inclusive_max( long value );
|
||||
+ulong __ovld sub_group_non_uniform_scan_inclusive_max( ulong value );
|
||||
+float __ovld sub_group_non_uniform_scan_inclusive_max( float value );
|
||||
+
|
||||
+char __ovld sub_group_non_uniform_scan_exclusive_add( char value );
|
||||
+uchar __ovld sub_group_non_uniform_scan_exclusive_add( uchar value );
|
||||
+short __ovld sub_group_non_uniform_scan_exclusive_add( short value );
|
||||
+ushort __ovld sub_group_non_uniform_scan_exclusive_add( ushort value );
|
||||
+int __ovld sub_group_non_uniform_scan_exclusive_add( int value );
|
||||
+uint __ovld sub_group_non_uniform_scan_exclusive_add( uint value );
|
||||
+long __ovld sub_group_non_uniform_scan_exclusive_add( long value );
|
||||
+ulong __ovld sub_group_non_uniform_scan_exclusive_add( ulong value );
|
||||
+float __ovld sub_group_non_uniform_scan_exclusive_add( float value );
|
||||
+
|
||||
+char __ovld sub_group_non_uniform_scan_exclusive_mul( char value );
|
||||
+uchar __ovld sub_group_non_uniform_scan_exclusive_mul( uchar value );
|
||||
+short __ovld sub_group_non_uniform_scan_exclusive_mul( short value );
|
||||
+ushort __ovld sub_group_non_uniform_scan_exclusive_mul( ushort value );
|
||||
+int __ovld sub_group_non_uniform_scan_exclusive_mul( int value );
|
||||
+uint __ovld sub_group_non_uniform_scan_exclusive_mul( uint value );
|
||||
+long __ovld sub_group_non_uniform_scan_exclusive_mul( long value );
|
||||
+ulong __ovld sub_group_non_uniform_scan_exclusive_mul( ulong value );
|
||||
+float __ovld sub_group_non_uniform_scan_exclusive_mul( float value );
|
||||
+
|
||||
+char __ovld sub_group_non_uniform_scan_exclusive_min( char value );
|
||||
+uchar __ovld sub_group_non_uniform_scan_exclusive_min( uchar value );
|
||||
+short __ovld sub_group_non_uniform_scan_exclusive_min( short value );
|
||||
+ushort __ovld sub_group_non_uniform_scan_exclusive_min( ushort value );
|
||||
+int __ovld sub_group_non_uniform_scan_exclusive_min( int value );
|
||||
+uint __ovld sub_group_non_uniform_scan_exclusive_min( uint value );
|
||||
+long __ovld sub_group_non_uniform_scan_exclusive_min( long value );
|
||||
+ulong __ovld sub_group_non_uniform_scan_exclusive_min( ulong value );
|
||||
+float __ovld sub_group_non_uniform_scan_exclusive_min( float value );
|
||||
+
|
||||
+char __ovld sub_group_non_uniform_scan_exclusive_max( char value );
|
||||
+uchar __ovld sub_group_non_uniform_scan_exclusive_max( uchar value );
|
||||
+short __ovld sub_group_non_uniform_scan_exclusive_max( short value );
|
||||
+ushort __ovld sub_group_non_uniform_scan_exclusive_max( ushort value );
|
||||
+int __ovld sub_group_non_uniform_scan_exclusive_max( int value );
|
||||
+uint __ovld sub_group_non_uniform_scan_exclusive_max( uint value );
|
||||
+long __ovld sub_group_non_uniform_scan_exclusive_max( long value );
|
||||
+ulong __ovld sub_group_non_uniform_scan_exclusive_max( ulong value );
|
||||
+float __ovld sub_group_non_uniform_scan_exclusive_max( float value );
|
||||
+
|
||||
+char __ovld sub_group_non_uniform_reduce_and( char value );
|
||||
+uchar __ovld sub_group_non_uniform_reduce_and( uchar value );
|
||||
+short __ovld sub_group_non_uniform_reduce_and( short value );
|
||||
+ushort __ovld sub_group_non_uniform_reduce_and( ushort value );
|
||||
+int __ovld sub_group_non_uniform_reduce_and( int value );
|
||||
+uint __ovld sub_group_non_uniform_reduce_and( uint value );
|
||||
+long __ovld sub_group_non_uniform_reduce_and( long value );
|
||||
+ulong __ovld sub_group_non_uniform_reduce_and( ulong value );
|
||||
+
|
||||
+char __ovld sub_group_non_uniform_reduce_or( char value );
|
||||
+uchar __ovld sub_group_non_uniform_reduce_or( uchar value );
|
||||
+short __ovld sub_group_non_uniform_reduce_or( short value );
|
||||
+ushort __ovld sub_group_non_uniform_reduce_or( ushort value );
|
||||
+int __ovld sub_group_non_uniform_reduce_or( int value );
|
||||
+uint __ovld sub_group_non_uniform_reduce_or( uint value );
|
||||
+long __ovld sub_group_non_uniform_reduce_or( long value );
|
||||
+ulong __ovld sub_group_non_uniform_reduce_or( ulong value );
|
||||
+
|
||||
+char __ovld sub_group_non_uniform_reduce_xor( char value );
|
||||
+uchar __ovld sub_group_non_uniform_reduce_xor( uchar value );
|
||||
+short __ovld sub_group_non_uniform_reduce_xor( short value );
|
||||
+ushort __ovld sub_group_non_uniform_reduce_xor( ushort value );
|
||||
+int __ovld sub_group_non_uniform_reduce_xor( int value );
|
||||
+uint __ovld sub_group_non_uniform_reduce_xor( uint value );
|
||||
+long __ovld sub_group_non_uniform_reduce_xor( long value );
|
||||
+ulong __ovld sub_group_non_uniform_reduce_xor( ulong value );
|
||||
+
|
||||
+char __ovld sub_group_non_uniform_scan_inclusive_and( char value );
|
||||
+uchar __ovld sub_group_non_uniform_scan_inclusive_and( uchar value );
|
||||
+short __ovld sub_group_non_uniform_scan_inclusive_and( short value );
|
||||
+ushort __ovld sub_group_non_uniform_scan_inclusive_and( ushort value );
|
||||
+int __ovld sub_group_non_uniform_scan_inclusive_and( int value );
|
||||
+uint __ovld sub_group_non_uniform_scan_inclusive_and( uint value );
|
||||
+long __ovld sub_group_non_uniform_scan_inclusive_and( long value );
|
||||
+ulong __ovld sub_group_non_uniform_scan_inclusive_and( ulong value );
|
||||
+
|
||||
+char __ovld sub_group_non_uniform_scan_inclusive_or( char value );
|
||||
+uchar __ovld sub_group_non_uniform_scan_inclusive_or( uchar value );
|
||||
+short __ovld sub_group_non_uniform_scan_inclusive_or( short value );
|
||||
+ushort __ovld sub_group_non_uniform_scan_inclusive_or( ushort value );
|
||||
+int __ovld sub_group_non_uniform_scan_inclusive_or( int value );
|
||||
+uint __ovld sub_group_non_uniform_scan_inclusive_or( uint value );
|
||||
+long __ovld sub_group_non_uniform_scan_inclusive_or( long value );
|
||||
+ulong __ovld sub_group_non_uniform_scan_inclusive_or( ulong value );
|
||||
+
|
||||
+char __ovld sub_group_non_uniform_scan_inclusive_xor( char value );
|
||||
+uchar __ovld sub_group_non_uniform_scan_inclusive_xor( uchar value );
|
||||
+short __ovld sub_group_non_uniform_scan_inclusive_xor( short value );
|
||||
+ushort __ovld sub_group_non_uniform_scan_inclusive_xor( ushort value );
|
||||
+int __ovld sub_group_non_uniform_scan_inclusive_xor( int value );
|
||||
+uint __ovld sub_group_non_uniform_scan_inclusive_xor( uint value );
|
||||
+long __ovld sub_group_non_uniform_scan_inclusive_xor( long value );
|
||||
+ulong __ovld sub_group_non_uniform_scan_inclusive_xor( ulong value );
|
||||
+
|
||||
+char __ovld sub_group_non_uniform_scan_exclusive_and( char value );
|
||||
+uchar __ovld sub_group_non_uniform_scan_exclusive_and( uchar value );
|
||||
+short __ovld sub_group_non_uniform_scan_exclusive_and( short value );
|
||||
+ushort __ovld sub_group_non_uniform_scan_exclusive_and( ushort value );
|
||||
+int __ovld sub_group_non_uniform_scan_exclusive_and( int value );
|
||||
+uint __ovld sub_group_non_uniform_scan_exclusive_and( uint value );
|
||||
+long __ovld sub_group_non_uniform_scan_exclusive_and( long value );
|
||||
+ulong __ovld sub_group_non_uniform_scan_exclusive_and( ulong value );
|
||||
+
|
||||
+char __ovld sub_group_non_uniform_scan_exclusive_or( char value );
|
||||
+uchar __ovld sub_group_non_uniform_scan_exclusive_or( uchar value );
|
||||
+short __ovld sub_group_non_uniform_scan_exclusive_or( short value );
|
||||
+ushort __ovld sub_group_non_uniform_scan_exclusive_or( ushort value );
|
||||
+int __ovld sub_group_non_uniform_scan_exclusive_or( int value );
|
||||
+uint __ovld sub_group_non_uniform_scan_exclusive_or( uint value );
|
||||
+long __ovld sub_group_non_uniform_scan_exclusive_or( long value );
|
||||
+ulong __ovld sub_group_non_uniform_scan_exclusive_or( ulong value );
|
||||
+
|
||||
+char __ovld sub_group_non_uniform_scan_exclusive_xor( char value );
|
||||
+uchar __ovld sub_group_non_uniform_scan_exclusive_xor( uchar value );
|
||||
+short __ovld sub_group_non_uniform_scan_exclusive_xor( short value );
|
||||
+ushort __ovld sub_group_non_uniform_scan_exclusive_xor( ushort value );
|
||||
+int __ovld sub_group_non_uniform_scan_exclusive_xor( int value );
|
||||
+uint __ovld sub_group_non_uniform_scan_exclusive_xor( uint value );
|
||||
+long __ovld sub_group_non_uniform_scan_exclusive_xor( long value );
|
||||
+ulong __ovld sub_group_non_uniform_scan_exclusive_xor( ulong value );
|
||||
+
|
||||
+int __ovld sub_group_non_uniform_reduce_logical_and( int predicate );
|
||||
+int __ovld sub_group_non_uniform_reduce_logical_or( int predicate );
|
||||
+int __ovld sub_group_non_uniform_reduce_logical_xor( int predicate );
|
||||
+
|
||||
+int __ovld sub_group_non_uniform_scan_inclusive_logical_and( int predicate );
|
||||
+int __ovld sub_group_non_uniform_scan_inclusive_logical_or( int predicate );
|
||||
+int __ovld sub_group_non_uniform_scan_inclusive_logical_xor( int predicate );
|
||||
+
|
||||
+int __ovld sub_group_non_uniform_scan_exclusive_logical_and( int predicate );
|
||||
+int __ovld sub_group_non_uniform_scan_exclusive_logical_or( int predicate );
|
||||
+int __ovld sub_group_non_uniform_scan_exclusive_logical_xor( int predicate );
|
||||
+
|
||||
+#if defined(cl_khr_fp16)
|
||||
+half __ovld sub_group_non_uniform_reduce_add( half value );
|
||||
+half __ovld sub_group_non_uniform_reduce_mul( half value );
|
||||
+half __ovld sub_group_non_uniform_reduce_min( half value );
|
||||
+half __ovld sub_group_non_uniform_reduce_max( half value );
|
||||
+half __ovld sub_group_non_uniform_scan_inclusive_add( half value );
|
||||
+half __ovld sub_group_non_uniform_scan_inclusive_mul( half value );
|
||||
+half __ovld sub_group_non_uniform_scan_inclusive_min( half value );
|
||||
+half __ovld sub_group_non_uniform_scan_inclusive_max( half value );
|
||||
+half __ovld sub_group_non_uniform_scan_exclusive_add( half value );
|
||||
+half __ovld sub_group_non_uniform_scan_exclusive_mul( half value );
|
||||
+half __ovld sub_group_non_uniform_scan_exclusive_min( half value );
|
||||
+half __ovld sub_group_non_uniform_scan_exclusive_max( half value );
|
||||
+#endif // cl_khr_fp16
|
||||
+
|
||||
+#if defined(cl_khr_fp64)
|
||||
+double __ovld sub_group_non_uniform_reduce_add( double value );
|
||||
+double __ovld sub_group_non_uniform_reduce_mul( double value );
|
||||
+double __ovld sub_group_non_uniform_reduce_min( double value );
|
||||
+double __ovld sub_group_non_uniform_reduce_max( double value );
|
||||
+double __ovld sub_group_non_uniform_scan_inclusive_add( double value );
|
||||
+double __ovld sub_group_non_uniform_scan_inclusive_mul( double value );
|
||||
+double __ovld sub_group_non_uniform_scan_inclusive_min( double value );
|
||||
+double __ovld sub_group_non_uniform_scan_inclusive_max( double value );
|
||||
+double __ovld sub_group_non_uniform_scan_exclusive_add( double value );
|
||||
+double __ovld sub_group_non_uniform_scan_exclusive_mul( double value );
|
||||
+double __ovld sub_group_non_uniform_scan_exclusive_min( double value );
|
||||
+double __ovld sub_group_non_uniform_scan_exclusive_max( double value );
|
||||
+#endif // cl_khr_fp64
|
||||
+
|
||||
+#endif // cl_khr_subgroup_non_uniform_arithmetic
|
||||
+
|
||||
+#if defined(cl_khr_subgroup_shuffle)
|
||||
+char __ovld sub_group_shuffle( char value, uint index );
|
||||
+uchar __ovld sub_group_shuffle( uchar value, uint index );
|
||||
+short __ovld sub_group_shuffle( short value, uint index );
|
||||
+ushort __ovld sub_group_shuffle( ushort value, uint index );
|
||||
+int __ovld sub_group_shuffle( int value, uint index );
|
||||
+uint __ovld sub_group_shuffle( uint value, uint index );
|
||||
+long __ovld sub_group_shuffle( long value, uint index );
|
||||
+ulong __ovld sub_group_shuffle( ulong value, uint index );
|
||||
+float __ovld sub_group_shuffle( float value, uint index );
|
||||
+
|
||||
+char __ovld sub_group_shuffle_xor( char value, uint mask );
|
||||
+uchar __ovld sub_group_shuffle_xor( uchar value, uint mask );
|
||||
+short __ovld sub_group_shuffle_xor( short value, uint mask );
|
||||
+ushort __ovld sub_group_shuffle_xor( ushort value, uint mask );
|
||||
+int __ovld sub_group_shuffle_xor( int value, uint mask );
|
||||
+uint __ovld sub_group_shuffle_xor( uint value, uint mask );
|
||||
+long __ovld sub_group_shuffle_xor( long value, uint mask );
|
||||
+ulong __ovld sub_group_shuffle_xor( ulong value, uint mask );
|
||||
+float __ovld sub_group_shuffle_xor( float value, uint mask );
|
||||
+
|
||||
+#if defined(cl_khr_fp16)
|
||||
+half __ovld sub_group_shuffle( half value, uint index );
|
||||
+half __ovld sub_group_shuffle_xor( half value, uint mask );
|
||||
+#endif // cl_khr_fp16
|
||||
+
|
||||
+#if defined(cl_khr_fp64)
|
||||
+double __ovld sub_group_shuffle( double value, uint index );
|
||||
+double __ovld sub_group_shuffle_xor( double value, uint mask );
|
||||
+#endif // cl_khr_fp64
|
||||
+
|
||||
+#endif // cl_khr_subgroup_shuffle
|
||||
+
|
||||
+#if defined(cl_khr_subgroup_shuffle_relative)
|
||||
+char __ovld sub_group_shuffle_up( char value, uint delta );
|
||||
+uchar __ovld sub_group_shuffle_up( uchar value, uint delta );
|
||||
+short __ovld sub_group_shuffle_up( short value, uint delta );
|
||||
+ushort __ovld sub_group_shuffle_up( ushort value, uint delta );
|
||||
+int __ovld sub_group_shuffle_up( int value, uint delta );
|
||||
+uint __ovld sub_group_shuffle_up( uint value, uint delta );
|
||||
+long __ovld sub_group_shuffle_up( long value, uint delta );
|
||||
+ulong __ovld sub_group_shuffle_up( ulong value, uint delta );
|
||||
+float __ovld sub_group_shuffle_up( float value, uint delta );
|
||||
+
|
||||
+char __ovld sub_group_shuffle_down( char value, uint delta );
|
||||
+uchar __ovld sub_group_shuffle_down( uchar value, uint delta );
|
||||
+short __ovld sub_group_shuffle_down( short value, uint delta );
|
||||
+ushort __ovld sub_group_shuffle_down( ushort value, uint delta );
|
||||
+int __ovld sub_group_shuffle_down( int value, uint delta );
|
||||
+uint __ovld sub_group_shuffle_down( uint value, uint delta );
|
||||
+long __ovld sub_group_shuffle_down( long value, uint delta );
|
||||
+ulong __ovld sub_group_shuffle_down( ulong value, uint delta );
|
||||
+float __ovld sub_group_shuffle_down( float value, uint delta );
|
||||
+
|
||||
+#if defined(cl_khr_fp16)
|
||||
+half __ovld sub_group_shuffle_up( half value, uint delta );
|
||||
+half __ovld sub_group_shuffle_down( half value, uint delta );
|
||||
+#endif // cl_khr_fp16
|
||||
+
|
||||
+#if defined(cl_khr_fp64)
|
||||
+double __ovld sub_group_shuffle_up( double value, uint delta );
|
||||
+double __ovld sub_group_shuffle_down( double value, uint delta );
|
||||
+#endif // cl_khr_fp64
|
||||
+
|
||||
+#endif // cl_khr_subgroup_shuffle_relative
|
||||
+
|
||||
+#if defined(cl_khr_subgroup_clustered_reduce)
|
||||
+char __ovld sub_group_clustered_reduce_add( char value, uint clustersize );
|
||||
+uchar __ovld sub_group_clustered_reduce_add( uchar value, uint clustersize );
|
||||
+short __ovld sub_group_clustered_reduce_add( short value, uint clustersize );
|
||||
+ushort __ovld sub_group_clustered_reduce_add( ushort value, uint clustersize );
|
||||
+int __ovld sub_group_clustered_reduce_add( int value, uint clustersize );
|
||||
+uint __ovld sub_group_clustered_reduce_add( uint value, uint clustersize );
|
||||
+long __ovld sub_group_clustered_reduce_add( long value, uint clustersize );
|
||||
+ulong __ovld sub_group_clustered_reduce_add( ulong value, uint clustersize );
|
||||
+float __ovld sub_group_clustered_reduce_add( float value, uint clustersize );
|
||||
+
|
||||
+char __ovld sub_group_clustered_reduce_mul( char value, uint clustersize );
|
||||
+uchar __ovld sub_group_clustered_reduce_mul( uchar value, uint clustersize );
|
||||
+short __ovld sub_group_clustered_reduce_mul( short value, uint clustersize );
|
||||
+ushort __ovld sub_group_clustered_reduce_mul( ushort value, uint clustersize );
|
||||
+int __ovld sub_group_clustered_reduce_mul( int value, uint clustersize );
|
||||
+uint __ovld sub_group_clustered_reduce_mul( uint value, uint clustersize );
|
||||
+long __ovld sub_group_clustered_reduce_mul( long value, uint clustersize );
|
||||
+ulong __ovld sub_group_clustered_reduce_mul( ulong value, uint clustersize );
|
||||
+float __ovld sub_group_clustered_reduce_mul( float value, uint clustersize );
|
||||
+
|
||||
+char __ovld sub_group_clustered_reduce_min( char value, uint clustersize );
|
||||
+uchar __ovld sub_group_clustered_reduce_min( uchar value, uint clustersize );
|
||||
+short __ovld sub_group_clustered_reduce_min( short value, uint clustersize );
|
||||
+ushort __ovld sub_group_clustered_reduce_min( ushort value, uint clustersize );
|
||||
+int __ovld sub_group_clustered_reduce_min( int value, uint clustersize );
|
||||
+uint __ovld sub_group_clustered_reduce_min( uint value, uint clustersize );
|
||||
+long __ovld sub_group_clustered_reduce_min( long value, uint clustersize );
|
||||
+ulong __ovld sub_group_clustered_reduce_min( ulong value, uint clustersize );
|
||||
+float __ovld sub_group_clustered_reduce_min( float value, uint clustersize );
|
||||
+
|
||||
+char __ovld sub_group_clustered_reduce_max( char value, uint clustersize );
|
||||
+uchar __ovld sub_group_clustered_reduce_max( uchar value, uint clustersize );
|
||||
+short __ovld sub_group_clustered_reduce_max( short value, uint clustersize );
|
||||
+ushort __ovld sub_group_clustered_reduce_max( ushort value, uint clustersize );
|
||||
+int __ovld sub_group_clustered_reduce_max( int value, uint clustersize );
|
||||
+uint __ovld sub_group_clustered_reduce_max( uint value, uint clustersize );
|
||||
+long __ovld sub_group_clustered_reduce_max( long value, uint clustersize );
|
||||
+ulong __ovld sub_group_clustered_reduce_max( ulong value, uint clustersize );
|
||||
+float __ovld sub_group_clustered_reduce_max( float value, uint clustersize );
|
||||
+
|
||||
+char __ovld sub_group_clustered_reduce_and( char value, uint clustersize );
|
||||
+uchar __ovld sub_group_clustered_reduce_and( uchar value, uint clustersize );
|
||||
+short __ovld sub_group_clustered_reduce_and( short value, uint clustersize );
|
||||
+ushort __ovld sub_group_clustered_reduce_and( ushort value, uint clustersize );
|
||||
+int __ovld sub_group_clustered_reduce_and( int value, uint clustersize );
|
||||
+uint __ovld sub_group_clustered_reduce_and( uint value, uint clustersize );
|
||||
+long __ovld sub_group_clustered_reduce_and( long value, uint clustersize );
|
||||
+ulong __ovld sub_group_clustered_reduce_and( ulong value, uint clustersize );
|
||||
+
|
||||
+char __ovld sub_group_clustered_reduce_or( char value, uint clustersize );
|
||||
+uchar __ovld sub_group_clustered_reduce_or( uchar value, uint clustersize );
|
||||
+short __ovld sub_group_clustered_reduce_or( short value, uint clustersize );
|
||||
+ushort __ovld sub_group_clustered_reduce_or( ushort value, uint clustersize );
|
||||
+int __ovld sub_group_clustered_reduce_or( int value, uint clustersize );
|
||||
+uint __ovld sub_group_clustered_reduce_or( uint value, uint clustersize );
|
||||
+long __ovld sub_group_clustered_reduce_or( long value, uint clustersize );
|
||||
+ulong __ovld sub_group_clustered_reduce_or( ulong value, uint clustersize );
|
||||
+
|
||||
+char __ovld sub_group_clustered_reduce_xor( char value, uint clustersize );
|
||||
+uchar __ovld sub_group_clustered_reduce_xor( uchar value, uint clustersize );
|
||||
+short __ovld sub_group_clustered_reduce_xor( short value, uint clustersize );
|
||||
+ushort __ovld sub_group_clustered_reduce_xor( ushort value, uint clustersize );
|
||||
+int __ovld sub_group_clustered_reduce_xor( int value, uint clustersize );
|
||||
+uint __ovld sub_group_clustered_reduce_xor( uint value, uint clustersize );
|
||||
+long __ovld sub_group_clustered_reduce_xor( long value, uint clustersize );
|
||||
+ulong __ovld sub_group_clustered_reduce_xor( ulong value, uint clustersize );
|
||||
+
|
||||
+int __ovld sub_group_clustered_reduce_logical_and( int predicate, uint clustersize );
|
||||
+int __ovld sub_group_clustered_reduce_logical_or( int predicate, uint clustersize );
|
||||
+int __ovld sub_group_clustered_reduce_logical_xor( int predicate, uint clustersize );
|
||||
+
|
||||
+#if defined(cl_khr_fp16)
|
||||
+half __ovld sub_group_clustered_reduce_add( half value, uint clustersize );
|
||||
+half __ovld sub_group_clustered_reduce_mul( half value, uint clustersize );
|
||||
+half __ovld sub_group_clustered_reduce_min( half value, uint clustersize );
|
||||
+half __ovld sub_group_clustered_reduce_max( half value, uint clustersize );
|
||||
+#endif // cl_khr_fp16
|
||||
+
|
||||
+#if defined(cl_khr_fp64)
|
||||
+double __ovld sub_group_clustered_reduce_add( double value, uint clustersize );
|
||||
+double __ovld sub_group_clustered_reduce_mul( double value, uint clustersize );
|
||||
+double __ovld sub_group_clustered_reduce_min( double value, uint clustersize );
|
||||
+double __ovld sub_group_clustered_reduce_max( double value, uint clustersize );
|
||||
+#endif // cl_khr_fp64
|
||||
+
|
||||
+#endif // cl_khr_subgroup_clustered_reduce
|
||||
+
|
||||
#if defined(cl_intel_subgroups)
|
||||
// Intel-Specific Sub Group Functions
|
||||
float __ovld __conv intel_sub_group_shuffle( float x, uint c );
|
||||
diff --git a/clang/test/SemaOpenCL/extension-version.cl b/clang/test/SemaOpenCL/extension-version.cl
|
||||
index 0e6bbb7d3bcd..86c78143a0eb 100644
|
||||
--- a/clang/test/SemaOpenCL/extension-version.cl
|
||||
+++ b/clang/test/SemaOpenCL/extension-version.cl
|
||||
@@ -333,3 +333,86 @@
|
||||
#endif
|
||||
#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : enable
|
||||
|
||||
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
|
||||
+#ifndef cl_khr_subgroup_extended_types
|
||||
+#error "Missing cl_khr_subgroup_extended_types"
|
||||
+#endif
|
||||
+#else
|
||||
+#ifdef cl_khr_subgroup_extended_types
|
||||
+#error "Incorrect cl_khr_subgroup_extended_types define"
|
||||
+#endif
|
||||
+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_extended_types' - ignoring}}
|
||||
+#endif
|
||||
+#pragma OPENCL EXTENSION cl_khr_subgroup_extended_types : enable
|
||||
+
|
||||
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
|
||||
+#ifndef cl_khr_subgroup_non_uniform_vote
|
||||
+#error "Missing cl_khr_subgroup_non_uniform_vote"
|
||||
+#endif
|
||||
+#else
|
||||
+#ifdef cl_khr_subgroup_non_uniform_vote
|
||||
+#error "Incorrect cl_khr_subgroup_non_uniform_vote define"
|
||||
+#endif
|
||||
+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_non_uniform_vote' - ignoring}}
|
||||
+#endif
|
||||
+#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_vote : enable
|
||||
+
|
||||
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
|
||||
+#ifndef cl_khr_subgroup_ballot
|
||||
+#error "Missing cl_khr_subgroup_ballot"
|
||||
+#endif
|
||||
+#else
|
||||
+#ifdef cl_khr_subgroup_ballot
|
||||
+#error "Incorrect cl_khr_subgroup_ballot define"
|
||||
+#endif
|
||||
+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_ballot' - ignoring}}
|
||||
+#endif
|
||||
+#pragma OPENCL EXTENSION cl_khr_subgroup_ballot : enable
|
||||
+
|
||||
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
|
||||
+#ifndef cl_khr_subgroup_non_uniform_arithmetic
|
||||
+#error "Missing cl_khr_subgroup_non_uniform_arithmetic"
|
||||
+#endif
|
||||
+#else
|
||||
+#ifdef cl_khr_subgroup_non_uniform_arithmetic
|
||||
+#error "Incorrect cl_khr_subgroup_non_uniform_arithmetic define"
|
||||
+#endif
|
||||
+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_non_uniform_arithmetic' - ignoring}}
|
||||
+#endif
|
||||
+#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_arithmetic : enable
|
||||
+
|
||||
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
|
||||
+#ifndef cl_khr_subgroup_shuffle
|
||||
+#error "Missing cl_khr_subgroup_shuffle"
|
||||
+#endif
|
||||
+#else
|
||||
+#ifdef cl_khr_subgroup_shuffle
|
||||
+#error "Incorrect cl_khr_subgroup_shuffle define"
|
||||
+#endif
|
||||
+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_shuffle' - ignoring}}
|
||||
+#endif
|
||||
+#pragma OPENCL EXTENSION cl_khr_subgroup_shuffle : enable
|
||||
+
|
||||
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
|
||||
+#ifndef cl_khr_subgroup_shuffle_relative
|
||||
+#error "Missing cl_khr_subgroup_shuffle_relative"
|
||||
+#endif
|
||||
+#else
|
||||
+#ifdef cl_khr_subgroup_shuffle_relative
|
||||
+#error "Incorrect cl_khr_subgroup_shuffle_relative define"
|
||||
+#endif
|
||||
+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_shuffle_relative' - ignoring}}
|
||||
+#endif
|
||||
+#pragma OPENCL EXTENSION cl_khr_subgroup_shuffle_relative : enable
|
||||
+
|
||||
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
|
||||
+#ifndef cl_khr_subgroup_clustered_reduce
|
||||
+#error "Missing cl_khr_subgroup_clustered_reduce"
|
||||
+#endif
|
||||
+#else
|
||||
+#ifdef cl_khr_subgroup_clustered_reduce
|
||||
+#error "Incorrect cl_khr_subgroup_clustered_reduce define"
|
||||
+#endif
|
||||
+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_clustered_reduce' - ignoring}}
|
||||
+#endif
|
||||
+#pragma OPENCL EXTENSION cl_khr_subgroup_clustered_reduce : enable
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
From 331e323ae2633a8999a660314022491d670c442c Mon Sep 17 00:00:00 2001
|
||||
From: Andrea Bocci <andrea.bocci@cern.ch>
|
||||
Date: Sun, 15 Mar 2020 17:35:44 +0100
|
||||
Subject: [PATCH 2/3] Fix building in-tree with cmake -DLLVM_LINK_LLVM_DYLIB=ON
|
||||
|
||||
Building in-tree with LLVM 11.0 master with the LLVM_LINK_LLVM_DYLIB
|
||||
cmake flag fails to link with the LLVMSPIRVLib library.
|
||||
|
||||
Add an explicit dependency to force the correct build order and linking.
|
||||
|
||||
Signed-off-by: Andrea Bocci <andrea.bocci@cern.ch>
|
||||
Upstream-Status: Backport
|
||||
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
|
||||
---
|
||||
tools/llvm-spirv/CMakeLists.txt | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/tools/llvm-spirv/CMakeLists.txt b/tools/llvm-spirv/CMakeLists.txt
|
||||
index 9aa96d9c..501c0daf 100644
|
||||
--- a/tools/llvm-spirv/CMakeLists.txt
|
||||
+++ b/tools/llvm-spirv/CMakeLists.txt
|
||||
@@ -14,7 +14,7 @@ add_llvm_tool(llvm-spirv
|
||||
NO_INSTALL_RPATH
|
||||
)
|
||||
|
||||
-if (LLVM_SPIRV_BUILD_EXTERNAL)
|
||||
+if (LLVM_SPIRV_BUILD_EXTERNAL OR LLVM_LINK_LLVM_DYLIB)
|
||||
target_link_libraries(llvm-spirv PRIVATE LLVMSPIRVLib)
|
||||
endif()
|
||||
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -1,982 +0,0 @@
|
|||
From fbc9996d6490a5d4720b85b47f38335e7fdc99d9 Mon Sep 17 00:00:00 2001
|
||||
From: haonanya <haonan.yang@intel.com>
|
||||
Date: Mon, 19 Jul 2021 10:14:20 +0800
|
||||
Subject: [PATCH 3/3] Add support for cl_ext_float_atomics in SPIRVWriter
|
||||
|
||||
Upstream-Status: Backport [Taken from opencl-clang patches, https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/spirv/0001-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch]
|
||||
|
||||
Signed-off-by: haonanya <haonan.yang@intel.com>
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
lib/SPIRV/OCL20ToSPIRV.cpp | 79 ++++++++++++++++--
|
||||
lib/SPIRV/SPIRVToOCL.h | 3 +
|
||||
lib/SPIRV/SPIRVToOCL12.cpp | 21 +++++
|
||||
lib/SPIRV/SPIRVToOCL20.cpp | 28 ++++++-
|
||||
lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h | 1 -
|
||||
lib/SPIRV/libSPIRV/SPIRVOpCode.h | 8 +-
|
||||
test/AtomicFAddEXTForOCL.ll | 64 +++++++++++++++
|
||||
test/AtomicFAddExt.ll | 111 ++++++++-----------------
|
||||
test/AtomicFMaxEXT.ll | 113 +++++++-------------------
|
||||
test/AtomicFMaxEXTForOCL.ll | 64 +++++++++++++++
|
||||
test/AtomicFMinEXT.ll | 113 +++++++-------------------
|
||||
test/AtomicFMinEXTForOCL.ll | 64 +++++++++++++++
|
||||
test/InvalidAtomicBuiltins.cl | 8 --
|
||||
13 files changed, 417 insertions(+), 260 deletions(-)
|
||||
create mode 100644 test/AtomicFAddEXTForOCL.ll
|
||||
create mode 100644 test/AtomicFMaxEXTForOCL.ll
|
||||
create mode 100644 test/AtomicFMinEXTForOCL.ll
|
||||
|
||||
diff --git a/lib/SPIRV/OCL20ToSPIRV.cpp b/lib/SPIRV/OCL20ToSPIRV.cpp
|
||||
index e30aa5be..b676a009 100644
|
||||
--- a/lib/SPIRV/OCL20ToSPIRV.cpp
|
||||
+++ b/lib/SPIRV/OCL20ToSPIRV.cpp
|
||||
@@ -408,10 +408,63 @@ void OCL20ToSPIRV::visitCallInst(CallInst &CI) {
|
||||
if (DemangledName.find(kOCLBuiltinName::AtomicPrefix) == 0 ||
|
||||
DemangledName.find(kOCLBuiltinName::AtomPrefix) == 0) {
|
||||
|
||||
- // Compute atomic builtins do not support floating types.
|
||||
- if (CI.getType()->isFloatingPointTy() &&
|
||||
- isComputeAtomicOCLBuiltin(DemangledName))
|
||||
- return;
|
||||
+ // Compute "atom" prefixed builtins do not support floating types.
|
||||
+ if (CI.getType()->isFloatingPointTy()) {
|
||||
+ if (DemangledName.find(kOCLBuiltinName::AtomPrefix) == 0)
|
||||
+ return;
|
||||
+ // handle functions which are "atomic_" prefixed.
|
||||
+ StringRef Stem = DemangledName;
|
||||
+ Stem = Stem.drop_front(strlen("atomic_"));
|
||||
+ // FP-typed atomic_{add, sub, inc, dec, exchange, min, max, or, and, xor,
|
||||
+ // fetch_or, fetch_xor, fetch_and, fetch_or_explicit, fetch_xor_explicit,
|
||||
+ // fetch_and_explicit} should be identified as function call
|
||||
+ bool IsFunctionCall = llvm::StringSwitch<bool>(Stem)
|
||||
+ .Case("add", true)
|
||||
+ .Case("sub", true)
|
||||
+ .Case("inc", true)
|
||||
+ .Case("dec", true)
|
||||
+ .Case("cmpxchg", true)
|
||||
+ .Case("min", true)
|
||||
+ .Case("max", true)
|
||||
+ .Case("or", true)
|
||||
+ .Case("xor", true)
|
||||
+ .Case("and", true)
|
||||
+ .Case("fetch_or", true)
|
||||
+ .Case("fetch_and", true)
|
||||
+ .Case("fetch_xor", true)
|
||||
+ .Case("fetch_or_explicit", true)
|
||||
+ .Case("fetch_xor_explicit", true)
|
||||
+ .Case("fetch_and_explicit", true)
|
||||
+ .Default(false);
|
||||
+ if (IsFunctionCall)
|
||||
+ return;
|
||||
+ if (F->arg_size() != 2) {
|
||||
+ IsFunctionCall = llvm::StringSwitch<bool>(Stem)
|
||||
+ .Case("exchange", true)
|
||||
+ .Case("fetch_add", true)
|
||||
+ .Case("fetch_sub", true)
|
||||
+ .Case("fetch_min", true)
|
||||
+ .Case("fetch_max", true)
|
||||
+ .Case("load", true)
|
||||
+ .Case("store", true)
|
||||
+ .Default(false);
|
||||
+ if (IsFunctionCall)
|
||||
+ return;
|
||||
+ }
|
||||
+ if (F->arg_size() != 3 && F->arg_size() != 4) {
|
||||
+ IsFunctionCall = llvm::StringSwitch<bool>(Stem)
|
||||
+ .Case("exchange_explicit", true)
|
||||
+ .Case("fetch_add_explicit", true)
|
||||
+ .Case("fetch_sub_explicit", true)
|
||||
+ .Case("fetch_min_explicit", true)
|
||||
+ .Case("fetch_max_explicit", true)
|
||||
+ .Case("load_explicit", true)
|
||||
+ .Case("store_explicit", true)
|
||||
+ .Default(false);
|
||||
+ if (IsFunctionCall)
|
||||
+ return;
|
||||
+ }
|
||||
+ }
|
||||
|
||||
auto PCI = &CI;
|
||||
if (DemangledName == kOCLBuiltinName::AtomicInit) {
|
||||
@@ -819,7 +872,7 @@ void OCL20ToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) {
|
||||
AttributeList Attrs = CI->getCalledFunction()->getAttributes();
|
||||
mutateCallInstSPIRV(
|
||||
M, CI,
|
||||
- [=](CallInst *CI, std::vector<Value *> &Args) {
|
||||
+ [=](CallInst *CI, std::vector<Value *> &Args) -> std::string {
|
||||
Info.PostProc(Args);
|
||||
// Order of args in OCL20:
|
||||
// object, 0-2 other args, 1-2 order, scope
|
||||
@@ -864,7 +917,21 @@ void OCL20ToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) {
|
||||
std::rotate(Args.begin() + 2, Args.begin() + OrderIdx,
|
||||
Args.end() - Offset);
|
||||
}
|
||||
- return getSPIRVFuncName(OCLSPIRVBuiltinMap::map(Info.UniqName));
|
||||
+ llvm::Type* AtomicBuiltinsReturnType =
|
||||
+ CI->getCalledFunction()->getReturnType();
|
||||
+ auto IsFPType = [](llvm::Type *ReturnType) {
|
||||
+ return ReturnType->isHalfTy() || ReturnType->isFloatTy() ||
|
||||
+ ReturnType->isDoubleTy();
|
||||
+ };
|
||||
+ auto SPIRVFunctionName =
|
||||
+ getSPIRVFuncName(OCLSPIRVBuiltinMap::map(Info.UniqName));
|
||||
+ if (!IsFPType(AtomicBuiltinsReturnType))
|
||||
+ return SPIRVFunctionName;
|
||||
+ // Translate FP-typed atomic builtins.
|
||||
+ return llvm::StringSwitch<std::string>(SPIRVFunctionName)
|
||||
+ .Case("__spirv_AtomicIAdd", "__spirv_AtomicFAddEXT")
|
||||
+ .Case("__spirv_AtomicSMax", "__spirv_AtomicFMaxEXT")
|
||||
+ .Case("__spirv_AtomicSMin", "__spirv_AtomicFMinEXT");
|
||||
},
|
||||
&Attrs);
|
||||
}
|
||||
diff --git a/lib/SPIRV/SPIRVToOCL.h b/lib/SPIRV/SPIRVToOCL.h
|
||||
index ddeec0b6..006fb0b1 100644
|
||||
--- a/lib/SPIRV/SPIRVToOCL.h
|
||||
+++ b/lib/SPIRV/SPIRVToOCL.h
|
||||
@@ -178,6 +178,9 @@ public:
|
||||
/// using separate maps for OpenCL 1.2 and OpenCL 2.0
|
||||
virtual Instruction *mutateAtomicName(CallInst *CI, Op OC) = 0;
|
||||
|
||||
+ // Transform FP atomic opcode to corresponding OpenCL function name
|
||||
+ virtual std::string mapFPAtomicName(Op OC) = 0;
|
||||
+
|
||||
private:
|
||||
/// Transform uniform group opcode to corresponding OpenCL function name,
|
||||
/// example: GroupIAdd(Reduce) => group_iadd => work_group_reduce_add |
|
||||
diff --git a/lib/SPIRV/SPIRVToOCL12.cpp b/lib/SPIRV/SPIRVToOCL12.cpp
|
||||
index afddd596..d7f00de3 100644
|
||||
--- a/lib/SPIRV/SPIRVToOCL12.cpp
|
||||
+++ b/lib/SPIRV/SPIRVToOCL12.cpp
|
||||
@@ -104,6 +104,9 @@ public:
|
||||
/// cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics extensions.
|
||||
std::string mapAtomicName(Op OC, Type *Ty);
|
||||
|
||||
+ // Transform FP atomic opcode to corresponding OpenCL function name
|
||||
+ std::string mapFPAtomicName(Op OC) override;
|
||||
+
|
||||
static char ID;
|
||||
};
|
||||
|
||||
@@ -338,6 +341,21 @@ Instruction *SPIRVToOCL12::visitCallSPIRVAtomicBuiltin(CallInst *CI, Op OC) {
|
||||
return NewCI;
|
||||
}
|
||||
|
||||
+std::string SPIRVToOCL12::mapFPAtomicName(Op OC) {
|
||||
+ assert(isFPAtomicOpCode(OC) && "Not intended to handle other opcodes than "
|
||||
+ "AtomicF{Add/Min/Max}EXT!");
|
||||
+ switch (OC) {
|
||||
+ case OpAtomicFAddEXT:
|
||||
+ return "atomic_add";
|
||||
+ case OpAtomicFMinEXT:
|
||||
+ return "atomic_min";
|
||||
+ case OpAtomicFMaxEXT:
|
||||
+ return "atomic_max";
|
||||
+ default:
|
||||
+ llvm_unreachable("Unsupported opcode!");
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
Instruction *SPIRVToOCL12::mutateAtomicName(CallInst *CI, Op OC) {
|
||||
AttributeList Attrs = CI->getCalledFunction()->getAttributes();
|
||||
return mutateCallInstOCL(
|
||||
@@ -351,6 +369,9 @@ Instruction *SPIRVToOCL12::mutateAtomicName(CallInst *CI, Op OC) {
|
||||
std::string SPIRVToOCL12::mapAtomicName(Op OC, Type *Ty) {
|
||||
std::string Prefix = Ty->isIntegerTy(64) ? kOCLBuiltinName::AtomPrefix
|
||||
: kOCLBuiltinName::AtomicPrefix;
|
||||
+ // Map fp atomic instructions to regular OpenCL built-ins.
|
||||
+ if (isFPAtomicOpCode(OC))
|
||||
+ return mapFPAtomicName(OC);
|
||||
return Prefix += OCL12SPIRVBuiltinMap::rmap(OC);
|
||||
}
|
||||
|
||||
diff --git a/lib/SPIRV/SPIRVToOCL20.cpp b/lib/SPIRV/SPIRVToOCL20.cpp
|
||||
index d829ff42..01d088e9 100644
|
||||
--- a/lib/SPIRV/SPIRVToOCL20.cpp
|
||||
+++ b/lib/SPIRV/SPIRVToOCL20.cpp
|
||||
@@ -82,6 +82,9 @@ public:
|
||||
/// compare_exchange_strong/weak_explicit
|
||||
Instruction *visitCallSPIRVAtomicCmpExchg(CallInst *CI, Op OC) override;
|
||||
|
||||
+ // Transform FP atomic opcode to corresponding OpenCL function name
|
||||
+ std::string mapFPAtomicName(Op OC) override;
|
||||
+
|
||||
static char ID;
|
||||
};
|
||||
|
||||
@@ -144,11 +147,29 @@ void SPIRVToOCL20::visitCallSPIRVControlBarrier(CallInst *CI) {
|
||||
&Attrs);
|
||||
}
|
||||
|
||||
+std::string SPIRVToOCL20::mapFPAtomicName(Op OC) {
|
||||
+ assert(isFPAtomicOpCode(OC) && "Not intended to handle other opcodes than "
|
||||
+ "AtomicF{Add/Min/Max}EXT!");
|
||||
+ switch (OC) {
|
||||
+ case OpAtomicFAddEXT:
|
||||
+ return "atomic_fetch_add_explicit";
|
||||
+ case OpAtomicFMinEXT:
|
||||
+ return "atomic_fetch_min_explicit";
|
||||
+ case OpAtomicFMaxEXT:
|
||||
+ return "atomic_fetch_max_explicit";
|
||||
+ default:
|
||||
+ llvm_unreachable("Unsupported opcode!");
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
Instruction *SPIRVToOCL20::mutateAtomicName(CallInst *CI, Op OC) {
|
||||
AttributeList Attrs = CI->getCalledFunction()->getAttributes();
|
||||
return mutateCallInstOCL(
|
||||
M, CI,
|
||||
[=](CallInst *, std::vector<Value *> &Args) {
|
||||
+ // Map fp atomic instructions to regular OpenCL built-ins.
|
||||
+ if (isFPAtomicOpCode(OC))
|
||||
+ return mapFPAtomicName(OC);
|
||||
return OCLSPIRVBuiltinMap::rmap(OC);
|
||||
},
|
||||
&Attrs);
|
||||
@@ -215,7 +236,12 @@ CallInst *SPIRVToOCL20::mutateCommonAtomicArguments(CallInst *CI, Op OC) {
|
||||
}
|
||||
}
|
||||
auto Ptr = findFirstPtr(Args);
|
||||
- auto Name = OCLSPIRVBuiltinMap::rmap(OC);
|
||||
+ std::string Name;
|
||||
+ // Map fp atomic instructions to regular OpenCL built-ins.
|
||||
+ if (isFPAtomicOpCode(OC))
|
||||
+ Name = mapFPAtomicName(OC);
|
||||
+ else
|
||||
+ Name = OCLSPIRVBuiltinMap::rmap(OC);
|
||||
auto NumOrder = getSPIRVAtomicBuiltinNumMemoryOrderArgs(OC);
|
||||
auto ScopeIdx = Ptr + 1;
|
||||
auto OrderIdx = Ptr + 2;
|
||||
diff --git a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h
|
||||
index 13f93fbe..7b707993 100644
|
||||
--- a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h
|
||||
+++ b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h
|
||||
@@ -521,7 +521,6 @@ template <> inline void SPIRVMap<Capability, std::string>::init() {
|
||||
add(CapabilityAtomicFloat64AddEXT, "AtomicFloat64AddEXT");
|
||||
add(CapabilityAtomicFloat32MinMaxEXT, "AtomicFloat32MinMaxEXT");
|
||||
add(CapabilityAtomicFloat64MinMaxEXT, "AtomicFloat64MinMaxEXT");
|
||||
- add(CapabilityAtomicFloat16MinMaxEXT, "AtomicFloat16MinMaxEXT");
|
||||
add(CapabilitySubgroupShuffleINTEL, "SubgroupShuffleINTEL");
|
||||
add(CapabilitySubgroupBufferBlockIOINTEL, "SubgroupBufferBlockIOINTEL");
|
||||
add(CapabilitySubgroupImageBlockIOINTEL, "SubgroupImageBlockIOINTEL");
|
||||
diff --git a/lib/SPIRV/libSPIRV/SPIRVOpCode.h b/lib/SPIRV/libSPIRV/SPIRVOpCode.h
|
||||
index feec70f6..8e595e83 100644
|
||||
--- a/lib/SPIRV/libSPIRV/SPIRVOpCode.h
|
||||
+++ b/lib/SPIRV/libSPIRV/SPIRVOpCode.h
|
||||
@@ -54,11 +54,17 @@ template <> inline void SPIRVMap<Op, std::string>::init() {
|
||||
}
|
||||
SPIRV_DEF_NAMEMAP(Op, OpCodeNameMap)
|
||||
|
||||
+inline bool isFPAtomicOpCode(Op OpCode) {
|
||||
+ return OpCode == OpAtomicFAddEXT || OpCode == OpAtomicFMinEXT ||
|
||||
+ OpCode == OpAtomicFMaxEXT;
|
||||
+}
|
||||
+
|
||||
inline bool isAtomicOpCode(Op OpCode) {
|
||||
static_assert(OpAtomicLoad < OpAtomicXor, "");
|
||||
return ((unsigned)OpCode >= OpAtomicLoad &&
|
||||
(unsigned)OpCode <= OpAtomicXor) ||
|
||||
- OpCode == OpAtomicFlagTestAndSet || OpCode == OpAtomicFlagClear;
|
||||
+ OpCode == OpAtomicFlagTestAndSet || OpCode == OpAtomicFlagClear ||
|
||||
+ isFPAtomicOpCode(OpCode);
|
||||
}
|
||||
inline bool isBinaryOpCode(Op OpCode) {
|
||||
return ((unsigned)OpCode >= OpIAdd && (unsigned)OpCode <= OpFMod) ||
|
||||
diff --git a/test/AtomicFAddEXTForOCL.ll b/test/AtomicFAddEXTForOCL.ll
|
||||
new file mode 100644
|
||||
index 00000000..fb146fb9
|
||||
--- /dev/null
|
||||
+++ b/test/AtomicFAddEXTForOCL.ll
|
||||
@@ -0,0 +1,64 @@
|
||||
+; RUN: llvm-as %s -o %t.bc
|
||||
+; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_EXT_shader_atomic_float_add -o %t.spv
|
||||
+; RUN: spirv-val %t.spv
|
||||
+; RUN: llvm-spirv -to-text %t.spv -o %t.spt
|
||||
+; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV
|
||||
+
|
||||
+; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc
|
||||
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20
|
||||
+
|
||||
+; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc
|
||||
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV
|
||||
+
|
||||
+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
|
||||
+target triple = "spir-unknown-unknown"
|
||||
+
|
||||
+; CHECK-SPIRV: Capability AtomicFloat32AddEXT
|
||||
+; CHECK-SPIRV: Capability AtomicFloat64AddEXT
|
||||
+; CHECK-SPIRV: Extension "SPV_EXT_shader_atomic_float_add"
|
||||
+; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32
|
||||
+; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64
|
||||
+
|
||||
+
|
||||
+; Function Attrs: convergent norecurse nounwind
|
||||
+define dso_local spir_func void @test_atomic_float(float addrspace(1)* %a) local_unnamed_addr #0 {
|
||||
+entry:
|
||||
+ ; CHECK-SPIRV: 7 AtomicFAddEXT [[TYPE_FLOAT_32]]
|
||||
+ ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_add_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}})
|
||||
+ ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+fiif]]({{.*}})
|
||||
+ %call = tail call spir_func float @_Z25atomic_fetch_add_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)* %a, float 0.000000e+00, i32 0) #2
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+; Function Attrs: convergent
|
||||
+declare spir_func float @_Z25atomic_fetch_add_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)*, float, i32) local_unnamed_addr #1
|
||||
+; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
|
||||
+
|
||||
+; Function Attrs: convergent norecurse nounwind
|
||||
+define dso_local spir_func void @test_atomic_double(double addrspace(1)* %a) local_unnamed_addr #0 {
|
||||
+entry:
|
||||
+ ; CHECK-SPIRV: 7 AtomicFAddEXT [[TYPE_FLOAT_64]]
|
||||
+ ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_add_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}})
|
||||
+ ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+diid]]({{.*}})
|
||||
+ %call = tail call spir_func double @_Z25atomic_fetch_add_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)* %a, double 0.000000e+00, i32 0) #2
|
||||
+ ret void
|
||||
+}
|
||||
+; Function Attrs: convergent
|
||||
+declare spir_func double @_Z25atomic_fetch_add_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)*, double, i32) local_unnamed_addr #1
|
||||
+; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
|
||||
+
|
||||
+; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
|
||||
+; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
|
||||
+
|
||||
+attributes #0 = { convergent norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
+attributes #1 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
+attributes #2 = { convergent nounwind }
|
||||
+
|
||||
+!llvm.module.flags = !{!0}
|
||||
+!opencl.ocl.version = !{!1}
|
||||
+!opencl.spir.version = !{!1}
|
||||
+!llvm.ident = !{!2}
|
||||
+
|
||||
+!0 = !{i32 1, !"wchar_size", i32 4}
|
||||
+!1 = !{i32 2, i32 0}
|
||||
+!2 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 94aa388f0ce0723bb15503cf41c2c15b288375b9)"}
|
||||
diff --git a/test/AtomicFAddExt.ll b/test/AtomicFAddExt.ll
|
||||
index 011dd8a7..42bdfeea 100644
|
||||
--- a/test/AtomicFAddExt.ll
|
||||
+++ b/test/AtomicFAddExt.ll
|
||||
@@ -4,20 +4,16 @@
|
||||
; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV
|
||||
|
||||
; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
|
||||
-; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM
|
||||
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL12
|
||||
|
||||
-target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
|
||||
-target triple = "spir64-unknown-unknown-sycldevice"
|
||||
-
|
||||
-%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
|
||||
-%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] }
|
||||
-%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
|
||||
-
|
||||
-$_ZTSZZ3addIfEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_37 = comdat any
|
||||
+; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc
|
||||
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20
|
||||
|
||||
-$_ZTSZZ3addIdEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_37 = comdat any
|
||||
+; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc
|
||||
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV
|
||||
|
||||
-@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
|
||||
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
|
||||
+target triple = "spir64-unknown-unknown-sycldevice"
|
||||
|
||||
; CHECK-SPIRV: Capability AtomicFloat32AddEXT
|
||||
; CHECK-SPIRV: Capability AtomicFloat64AddEXT
|
||||
@@ -25,62 +21,43 @@ $_ZTSZZ3addIdEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_3
|
||||
; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32
|
||||
; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64
|
||||
|
||||
-; Function Attrs: convergent norecurse mustprogress
|
||||
-define weak_odr dso_local spir_kernel void @_ZTSZZ3addIfEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_37(float addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, float addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 {
|
||||
+; Function Attrs: convergent norecurse nounwind
|
||||
+define dso_local spir_func float @_Z14AtomicFloatIncRf(float addrspace(4)* align 4 dereferenceable(4) %Arg) local_unnamed_addr #0 {
|
||||
entry:
|
||||
- %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0
|
||||
- %1 = load i64, i64* %0, align 8
|
||||
- %add.ptr.i29 = getelementptr inbounds float, float addrspace(1)* %_arg_, i64 %1
|
||||
- %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0
|
||||
- %3 = load i64, i64* %2, align 8
|
||||
- %add.ptr.i = getelementptr inbounds float, float addrspace(1)* %_arg_4, i64 %3
|
||||
- %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !5
|
||||
- %5 = extractelement <3 x i64> %4, i64 0
|
||||
+ %0 = addrspacecast float addrspace(4)* %Arg to float addrspace(1)*
|
||||
; CHECK-SPIRV: 7 AtomicFAddEXT [[TYPE_FLOAT_32]]
|
||||
- ; CHECK-LLVM: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+]]({{.*}})
|
||||
- %call3.i.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFAddEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %add.ptr.i29, i32 1, i32 896, float 1.000000e+00) #2
|
||||
- %add.i.i = fadd float %call3.i.i.i.i, 1.000000e+00
|
||||
- %sext.i = shl i64 %5, 32
|
||||
- %conv5.i = ashr exact i64 %sext.i, 32
|
||||
- %ptridx.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i, i64 %conv5.i
|
||||
- %ptridx.ascast.i.i = addrspacecast float addrspace(1)* %ptridx.i.i to float addrspace(4)*
|
||||
- store float %add.i.i, float addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !14
|
||||
- ret void
|
||||
+ ; CHECK-LLVM-CL12: call spir_func float @[[FLOAT_FUNC_NAME:_Z10atomic_add[[:alnum:]]+ff]]({{.*}})
|
||||
+ ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_add_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}})
|
||||
+ ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+fiif]]({{.*}})
|
||||
+ %call3.i.i = tail call spir_func float @_Z21__spirv_AtomicFAddEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %0, i32 1, i32 896, float 1.000000e+00) #2
|
||||
+ ret float %call3.i.i
|
||||
}
|
||||
|
||||
; Function Attrs: convergent
|
||||
-; CHECK-LLVM: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float addrspace(1)*, i32, i32, float)
|
||||
declare dso_local spir_func float @_Z21__spirv_AtomicFAddEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)*, i32, i32, float) local_unnamed_addr #1
|
||||
+; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
|
||||
|
||||
-; Function Attrs: convergent norecurse mustprogress
|
||||
-define weak_odr dso_local spir_kernel void @_ZTSZZ3addIdEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_37(double addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, double addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 {
|
||||
+; Function Attrs: convergent norecurse nounwind
|
||||
+define dso_local spir_func double @_Z15AtomicDoubleIncRd(double addrspace(4)* align 8 dereferenceable(8) %Arg) local_unnamed_addr #0 {
|
||||
entry:
|
||||
- %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0
|
||||
- %1 = load i64, i64* %0, align 8
|
||||
- %add.ptr.i29 = getelementptr inbounds double, double addrspace(1)* %_arg_, i64 %1
|
||||
- %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0
|
||||
- %3 = load i64, i64* %2, align 8
|
||||
- %add.ptr.i = getelementptr inbounds double, double addrspace(1)* %_arg_4, i64 %3
|
||||
- %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !18
|
||||
- %5 = extractelement <3 x i64> %4, i64 0
|
||||
+ %0 = addrspacecast double addrspace(4)* %Arg to double addrspace(1)*
|
||||
; CHECK-SPIRV: 7 AtomicFAddEXT [[TYPE_FLOAT_64]]
|
||||
- ; CHECK-LLVM: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+]]({{.*}})
|
||||
- %call3.i.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFAddEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %add.ptr.i29, i32 1, i32 896, double 1.000000e+00) #2
|
||||
- %add.i.i = fadd double %call3.i.i.i.i, 1.000000e+00
|
||||
- %sext.i = shl i64 %5, 32
|
||||
- %conv5.i = ashr exact i64 %sext.i, 32
|
||||
- %ptridx.i.i = getelementptr inbounds double, double addrspace(1)* %add.ptr.i, i64 %conv5.i
|
||||
- %ptridx.ascast.i.i = addrspacecast double addrspace(1)* %ptridx.i.i to double addrspace(4)*
|
||||
- store double %add.i.i, double addrspace(4)* %ptridx.ascast.i.i, align 8, !tbaa !27
|
||||
- ret void
|
||||
+ ; CHECK-LLVM-CL12: call spir_func double @[[DOUBLE_FUNC_NAME:_Z10atomic_add[[:alnum:]]+dd]]({{.*}})
|
||||
+ ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_add_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}})
|
||||
+ ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+diid]]({{.*}})
|
||||
+ %call3.i.i = tail call spir_func double @_Z21__spirv_AtomicFAddEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %0, i32 1, i32 896, double 1.000000e+00) #2
|
||||
+ ret double %call3.i.i
|
||||
}
|
||||
|
||||
; Function Attrs: convergent
|
||||
-; CHECK-LLVM: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double addrspace(1)*, i32, i32, double)
|
||||
declare dso_local spir_func double @_Z21__spirv_AtomicFAddEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)*, i32, i32, double) local_unnamed_addr #1
|
||||
+; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
|
||||
|
||||
-attributes #0 = { convergent norecurse }
|
||||
-attributes #1 = { convergent }
|
||||
+; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
|
||||
+; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
|
||||
+
|
||||
+attributes #0 = { convergent norecurse nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
+attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #2 = { convergent nounwind }
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
@@ -91,29 +68,5 @@ attributes #2 = { convergent nounwind }
|
||||
!0 = !{i32 1, !"wchar_size", i32 4}
|
||||
!1 = !{i32 1, i32 2}
|
||||
!2 = !{i32 4, i32 100000}
|
||||
-!3 = !{!"clang version 12.0.0"}
|
||||
-!4 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1}
|
||||
-!5 = !{!6, !8, !10, !12}
|
||||
-!6 = distinct !{!6, !7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"}
|
||||
-!7 = distinct !{!7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"}
|
||||
-!8 = distinct !{!8, !9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"}
|
||||
-!9 = distinct !{!9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"}
|
||||
-!10 = distinct !{!10, !11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"}
|
||||
-!11 = distinct !{!11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"}
|
||||
-!12 = distinct !{!12, !13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"}
|
||||
-!13 = distinct !{!13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"}
|
||||
-!14 = !{!15, !15, i64 0}
|
||||
-!15 = !{!"float", !16, i64 0}
|
||||
-!16 = !{!"omnipotent char", !17, i64 0}
|
||||
-!17 = !{!"Simple C++ TBAA"}
|
||||
-!18 = !{!19, !21, !23, !25}
|
||||
-!19 = distinct !{!19, !20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"}
|
||||
-!20 = distinct !{!20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"}
|
||||
-!21 = distinct !{!21, !22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"}
|
||||
-!22 = distinct !{!22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"}
|
||||
-!23 = distinct !{!23, !24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"}
|
||||
-!24 = distinct !{!24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"}
|
||||
-!25 = distinct !{!25, !26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"}
|
||||
-!26 = distinct !{!26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"}
|
||||
-!27 = !{!28, !28, i64 0}
|
||||
-!28 = !{!"double", !16, i64 0}
|
||||
+!3 = !{!"clang version 13.0.0"}
|
||||
+
|
||||
diff --git a/test/AtomicFMaxEXT.ll b/test/AtomicFMaxEXT.ll
|
||||
index 1b81e53b..1c2eec93 100644
|
||||
--- a/test/AtomicFMaxEXT.ll
|
||||
+++ b/test/AtomicFMaxEXT.ll
|
||||
@@ -4,20 +4,16 @@
|
||||
; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV
|
||||
|
||||
; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
|
||||
-; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM
|
||||
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL12
|
||||
|
||||
-target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
|
||||
-target triple = "spir64-unknown-unknown-sycldevice"
|
||||
-
|
||||
-%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
|
||||
-%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] }
|
||||
-%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
|
||||
-
|
||||
-$_ZTSZZ8max_testIfEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37 = comdat any
|
||||
+; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc
|
||||
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20
|
||||
|
||||
-$_ZTSZZ8max_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37 = comdat any
|
||||
+; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc
|
||||
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV
|
||||
|
||||
-@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
|
||||
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
|
||||
+target triple = "spir64-unknown-unknown-sycldevice"
|
||||
|
||||
; CHECK-SPIRV: Capability AtomicFloat32MinMaxEXT
|
||||
; CHECK-SPIRV: Capability AtomicFloat64MinMaxEXT
|
||||
@@ -25,65 +21,42 @@ $_ZTSZZ8max_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4item
|
||||
; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32
|
||||
; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64
|
||||
|
||||
-; Function Attrs: convergent norecurse
|
||||
-define weak_odr dso_local spir_kernel void @_ZTSZZ8max_testIfEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37(float addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, float addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 {
|
||||
+; Function Attrs: convergent norecurse nounwind
|
||||
+define dso_local spir_func float @_Z14AtomicFloatMaxRf(float addrspace(4)* align 4 dereferenceable(4) %Arg) local_unnamed_addr #0 {
|
||||
entry:
|
||||
- %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0
|
||||
- %1 = load i64, i64* %0, align 8
|
||||
- %add.ptr.i29 = getelementptr inbounds float, float addrspace(1)* %_arg_, i64 %1
|
||||
- %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0
|
||||
- %3 = load i64, i64* %2, align 8
|
||||
- %add.ptr.i = getelementptr inbounds float, float addrspace(1)* %_arg_4, i64 %3
|
||||
- %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !5
|
||||
- %5 = extractelement <3 x i64> %4, i64 0
|
||||
- %conv.i = trunc i64 %5 to i32
|
||||
- %conv3.i = sitofp i32 %conv.i to float
|
||||
- %add.i = fadd float %conv3.i, 1.000000e+00
|
||||
+ %0 = addrspacecast float addrspace(4)* %Arg to float addrspace(1)*
|
||||
; CHECK-SPIRV: 7 AtomicFMaxEXT [[TYPE_FLOAT_32]]
|
||||
- ; CHECK-LLVM: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+]]({{.*}})
|
||||
- %call3.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFMaxEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %add.ptr.i29, i32 1, i32 896, float %add.i) #2
|
||||
- %sext.i = shl i64 %5, 32
|
||||
- %conv6.i = ashr exact i64 %sext.i, 32
|
||||
- %ptridx.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i, i64 %conv6.i
|
||||
- %ptridx.ascast.i.i = addrspacecast float addrspace(1)* %ptridx.i.i to float addrspace(4)*
|
||||
- store float %call3.i.i.i, float addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !14
|
||||
- ret void
|
||||
+ ; CHECK-LLVM-CL12: call spir_func float @[[FLOAT_FUNC_NAME:_Z10atomic_max[[:alnum:]]+ff]]({{.*}})
|
||||
+ ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_max_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}})
|
||||
+ ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+fiif]]({{.*}})
|
||||
+ %call.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFMaxEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %0, i32 1, i32 896, float 1.000000e+00) #2
|
||||
+ ret float %call.i.i.i
|
||||
}
|
||||
|
||||
; Function Attrs: convergent
|
||||
-; CHECK-LLVM: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float addrspace(1)*, i32, i32, float)
|
||||
declare dso_local spir_func float @_Z21__spirv_AtomicFMaxEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)*, i32, i32, float) local_unnamed_addr #1
|
||||
+; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
|
||||
|
||||
-; Function Attrs: convergent norecurse
|
||||
-define weak_odr dso_local spir_kernel void @_ZTSZZ8max_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37(double addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, double addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 {
|
||||
+; Function Attrs: convergent norecurse nounwind
|
||||
+define dso_local spir_func double @_Z15AtomicDoubleMaxRd(double addrspace(4)* align 8 dereferenceable(8) %Arg) local_unnamed_addr #0 {
|
||||
entry:
|
||||
- %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0
|
||||
- %1 = load i64, i64* %0, align 8
|
||||
- %add.ptr.i29 = getelementptr inbounds double, double addrspace(1)* %_arg_, i64 %1
|
||||
- %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0
|
||||
- %3 = load i64, i64* %2, align 8
|
||||
- %add.ptr.i = getelementptr inbounds double, double addrspace(1)* %_arg_4, i64 %3
|
||||
- %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !18
|
||||
- %5 = extractelement <3 x i64> %4, i64 0
|
||||
- %conv.i = trunc i64 %5 to i32
|
||||
- %conv3.i = sitofp i32 %conv.i to double
|
||||
- %add.i = fadd double %conv3.i, 1.000000e+00
|
||||
+ %0 = addrspacecast double addrspace(4)* %Arg to double addrspace(1)*
|
||||
; CHECK-SPIRV: 7 AtomicFMaxEXT [[TYPE_FLOAT_64]]
|
||||
- ; CHECK-LLVM: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+]]({{.*}})
|
||||
- %call3.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFMaxEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %add.ptr.i29, i32 1, i32 896, double %add.i) #2
|
||||
- %sext.i = shl i64 %5, 32
|
||||
- %conv6.i = ashr exact i64 %sext.i, 32
|
||||
- %ptridx.i.i = getelementptr inbounds double, double addrspace(1)* %add.ptr.i, i64 %conv6.i
|
||||
- %ptridx.ascast.i.i = addrspacecast double addrspace(1)* %ptridx.i.i to double addrspace(4)*
|
||||
- store double %call3.i.i.i, double addrspace(4)* %ptridx.ascast.i.i, align 8, !tbaa !27
|
||||
- ret void
|
||||
+ ; CHECK-LLVM-CL12: call spir_func double @[[DOUBLE_FUNC_NAME:_Z10atomic_max[[:alnum:]]+dd]]({{.*}})
|
||||
+ ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_max_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}})
|
||||
+ ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+diid]]({{.*}})
|
||||
+ %call.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFMaxEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %0, i32 1, i32 896, double 1.000000e+00) #2
|
||||
+ ret double %call.i.i.i
|
||||
}
|
||||
|
||||
; Function Attrs: convergent
|
||||
-; CHECK-LLVM: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double addrspace(1)*, i32, i32, double)
|
||||
declare dso_local spir_func double @_Z21__spirv_AtomicFMaxEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)*, i32, i32, double) local_unnamed_addr #1
|
||||
+; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
|
||||
|
||||
-attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
+; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
|
||||
+; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
|
||||
+
|
||||
+attributes #0 = { convergent norecurse nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #2 = { convergent nounwind }
|
||||
|
||||
@@ -95,29 +68,5 @@ attributes #2 = { convergent nounwind }
|
||||
!0 = !{i32 1, !"wchar_size", i32 4}
|
||||
!1 = !{i32 1, i32 2}
|
||||
!2 = !{i32 4, i32 100000}
|
||||
-!3 = !{!"clang version 12.0.0"}
|
||||
-!4 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1}
|
||||
-!5 = !{!6, !8, !10, !12}
|
||||
-!6 = distinct !{!6, !7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"}
|
||||
-!7 = distinct !{!7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"}
|
||||
-!8 = distinct !{!8, !9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"}
|
||||
-!9 = distinct !{!9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"}
|
||||
-!10 = distinct !{!10, !11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"}
|
||||
-!11 = distinct !{!11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"}
|
||||
-!12 = distinct !{!12, !13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"}
|
||||
-!13 = distinct !{!13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"}
|
||||
-!14 = !{!15, !15, i64 0}
|
||||
-!15 = !{!"float", !16, i64 0}
|
||||
-!16 = !{!"omnipotent char", !17, i64 0}
|
||||
-!17 = !{!"Simple C++ TBAA"}
|
||||
-!18 = !{!19, !21, !23, !25}
|
||||
-!19 = distinct !{!19, !20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"}
|
||||
-!20 = distinct !{!20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"}
|
||||
-!21 = distinct !{!21, !22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"}
|
||||
-!22 = distinct !{!22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"}
|
||||
-!23 = distinct !{!23, !24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"}
|
||||
-!24 = distinct !{!24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"}
|
||||
-!25 = distinct !{!25, !26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"}
|
||||
-!26 = distinct !{!26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"}
|
||||
-!27 = !{!28, !28, i64 0}
|
||||
-!28 = !{!"double", !16, i64 0}
|
||||
+!3 = !{!"clang version 13.0.0"}
|
||||
+
|
||||
diff --git a/test/AtomicFMaxEXTForOCL.ll b/test/AtomicFMaxEXTForOCL.ll
|
||||
new file mode 100644
|
||||
index 00000000..1f2530d9
|
||||
--- /dev/null
|
||||
+++ b/test/AtomicFMaxEXTForOCL.ll
|
||||
@@ -0,0 +1,64 @@
|
||||
+; RUN: llvm-as %s -o %t.bc
|
||||
+; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_EXT_shader_atomic_float_min_max -o %t.spv
|
||||
+; RUN: spirv-val %t.spv
|
||||
+; RUN: llvm-spirv -to-text %t.spv -o %t.spt
|
||||
+; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV
|
||||
+
|
||||
+; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc
|
||||
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20
|
||||
+
|
||||
+; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc
|
||||
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV
|
||||
+
|
||||
+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
|
||||
+target triple = "spir-unknown-unknown"
|
||||
+
|
||||
+; CHECK-SPIRV: Capability AtomicFloat32MinMaxEXT
|
||||
+; CHECK-SPIRV: Capability AtomicFloat64MinMaxEXT
|
||||
+; CHECK-SPIRV: Extension "SPV_EXT_shader_atomic_float_min_max"
|
||||
+; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32
|
||||
+; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64
|
||||
+
|
||||
+; Function Attrs: convergent norecurse nounwind
|
||||
+define dso_local spir_func void @test_float(float addrspace(1)* %a) local_unnamed_addr #0 {
|
||||
+entry:
|
||||
+ ; CHECK-SPIRV: 7 AtomicFMaxEXT [[TYPE_FLOAT_32]]
|
||||
+ ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_max_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}})
|
||||
+ ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+fiif]]({{.*}})
|
||||
+ %call = tail call spir_func float @_Z25atomic_fetch_max_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)* %a, float 0.000000e+00, i32 0) #2
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+; Function Attrs: convergent
|
||||
+declare spir_func float @_Z25atomic_fetch_max_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)*, float, i32) local_unnamed_addr #1
|
||||
+; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
|
||||
+
|
||||
+; Function Attrs: convergent norecurse nounwind
|
||||
+define dso_local spir_func void @test_double(double addrspace(1)* %a) local_unnamed_addr #0 {
|
||||
+entry:
|
||||
+ ; CHECK-SPIRV: 7 AtomicFMaxEXT [[TYPE_FLOAT_64]]
|
||||
+ ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_max_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}})
|
||||
+ ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+diid]]({{.*}})
|
||||
+ %call = tail call spir_func double @_Z25atomic_fetch_max_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)* %a, double 0.000000e+00, i32 0) #2
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+; Function Attrs: convergent
|
||||
+declare spir_func double @_Z25atomic_fetch_max_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)*, double, i32) local_unnamed_addr #1
|
||||
+; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
|
||||
+
|
||||
+; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
|
||||
+; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
|
||||
+
|
||||
+attributes #0 = { convergent norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
+attributes #1 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
+attributes #2 = { convergent nounwind }
|
||||
+
|
||||
+!llvm.module.flags = !{!0}
|
||||
+!opencl.ocl.version = !{!1}
|
||||
+!opencl.spir.version = !{!1}
|
||||
+!llvm.ident = !{!2}
|
||||
+
|
||||
+!0 = !{i32 1, !"wchar_size", i32 4}
|
||||
+!1 = !{i32 2, i32 0}
|
||||
+!2 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 94aa388f0ce0723bb15503cf41c2c15b288375b9)"}
|
||||
diff --git a/test/AtomicFMinEXT.ll b/test/AtomicFMinEXT.ll
|
||||
index 98c98b8e..9e40a669 100644
|
||||
--- a/test/AtomicFMinEXT.ll
|
||||
+++ b/test/AtomicFMinEXT.ll
|
||||
@@ -4,20 +4,16 @@
|
||||
; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV
|
||||
|
||||
; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
|
||||
-; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM
|
||||
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL12
|
||||
|
||||
-target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
|
||||
-target triple = "spir64-unknown-unknown-sycldevice"
|
||||
-
|
||||
-%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
|
||||
-%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] }
|
||||
-%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
|
||||
-
|
||||
-$_ZTSZZ8min_testIfEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37 = comdat any
|
||||
+; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc
|
||||
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20
|
||||
|
||||
-$_ZTSZZ8min_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37 = comdat any
|
||||
+; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc
|
||||
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV
|
||||
|
||||
-@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
|
||||
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
|
||||
+target triple = "spir64-unknown-unknown-sycldevice"
|
||||
|
||||
; CHECK-SPIRV: Capability AtomicFloat32MinMaxEXT
|
||||
; CHECK-SPIRV: Capability AtomicFloat64MinMaxEXT
|
||||
@@ -25,65 +21,42 @@ $_ZTSZZ8min_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4item
|
||||
; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32
|
||||
; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64
|
||||
|
||||
-; Function Attrs: convergent norecurse
|
||||
-define weak_odr dso_local spir_kernel void @_ZTSZZ8min_testIfEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37(float addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, float addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 {
|
||||
+; Function Attrs: convergent norecurse nounwind
|
||||
+define dso_local spir_func float @_Z14AtomicFloatMinRf(float addrspace(4)* align 4 dereferenceable(4) %Arg) local_unnamed_addr #0 {
|
||||
entry:
|
||||
- %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0
|
||||
- %1 = load i64, i64* %0, align 8
|
||||
- %add.ptr.i29 = getelementptr inbounds float, float addrspace(1)* %_arg_, i64 %1
|
||||
- %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0
|
||||
- %3 = load i64, i64* %2, align 8
|
||||
- %add.ptr.i = getelementptr inbounds float, float addrspace(1)* %_arg_4, i64 %3
|
||||
- %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !5
|
||||
- %5 = extractelement <3 x i64> %4, i64 0
|
||||
- %conv.i = trunc i64 %5 to i32
|
||||
- %conv3.i = sitofp i32 %conv.i to float
|
||||
- %add.i = fadd float %conv3.i, 1.000000e+00
|
||||
+ %0 = addrspacecast float addrspace(4)* %Arg to float addrspace(1)*
|
||||
; CHECK-SPIRV: 7 AtomicFMinEXT [[TYPE_FLOAT_32]]
|
||||
- ; CHECK-LLVM: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+]]({{.*}})
|
||||
- %call3.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFMinEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %add.ptr.i29, i32 1, i32 896, float %add.i) #2
|
||||
- %sext.i = shl i64 %5, 32
|
||||
- %conv6.i = ashr exact i64 %sext.i, 32
|
||||
- %ptridx.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i, i64 %conv6.i
|
||||
- %ptridx.ascast.i.i = addrspacecast float addrspace(1)* %ptridx.i.i to float addrspace(4)*
|
||||
- store float %call3.i.i.i, float addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !14
|
||||
- ret void
|
||||
+ ; CHECK-LLVM-CL12: call spir_func float @[[FLOAT_FUNC_NAME:_Z10atomic_min[[:alnum:]]+ff]]({{.*}})
|
||||
+ ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_min_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}})
|
||||
+ ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+fiif]]({{.*}})
|
||||
+ %call.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFMinEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %0, i32 1, i32 896, float 1.000000e+00) #2
|
||||
+ ret float %call.i.i.i
|
||||
}
|
||||
|
||||
; Function Attrs: convergent
|
||||
-; CHECK-LLVM: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float addrspace(1)*, i32, i32, float)
|
||||
declare dso_local spir_func float @_Z21__spirv_AtomicFMinEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)*, i32, i32, float) local_unnamed_addr #1
|
||||
+; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
|
||||
|
||||
-; Function Attrs: convergent norecurse
|
||||
-define weak_odr dso_local spir_kernel void @_ZTSZZ8min_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37(double addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, double addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 {
|
||||
+; Function Attrs: convergent norecurse nounwind
|
||||
+define dso_local spir_func double @_Z15AtomicDoubleMinRd(double addrspace(4)* align 8 dereferenceable(8) %Arg) local_unnamed_addr #0 {
|
||||
entry:
|
||||
- %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0
|
||||
- %1 = load i64, i64* %0, align 8
|
||||
- %add.ptr.i29 = getelementptr inbounds double, double addrspace(1)* %_arg_, i64 %1
|
||||
- %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0
|
||||
- %3 = load i64, i64* %2, align 8
|
||||
- %add.ptr.i = getelementptr inbounds double, double addrspace(1)* %_arg_4, i64 %3
|
||||
- %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !18
|
||||
- %5 = extractelement <3 x i64> %4, i64 0
|
||||
- %conv.i = trunc i64 %5 to i32
|
||||
- %conv3.i = sitofp i32 %conv.i to double
|
||||
- %add.i = fadd double %conv3.i, 1.000000e+00
|
||||
+ %0 = addrspacecast double addrspace(4)* %Arg to double addrspace(1)*
|
||||
; CHECK-SPIRV: 7 AtomicFMinEXT [[TYPE_FLOAT_64]]
|
||||
- ; CHECK-LLVM: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+]]({{.*}})
|
||||
- %call3.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFMinEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %add.ptr.i29, i32 1, i32 896, double %add.i) #2
|
||||
- %sext.i = shl i64 %5, 32
|
||||
- %conv6.i = ashr exact i64 %sext.i, 32
|
||||
- %ptridx.i.i = getelementptr inbounds double, double addrspace(1)* %add.ptr.i, i64 %conv6.i
|
||||
- %ptridx.ascast.i.i = addrspacecast double addrspace(1)* %ptridx.i.i to double addrspace(4)*
|
||||
- store double %call3.i.i.i, double addrspace(4)* %ptridx.ascast.i.i, align 8, !tbaa !27
|
||||
- ret void
|
||||
+ ; CHECK-LLVM-CL12: call spir_func double @[[DOUBLE_FUNC_NAME:_Z10atomic_min[[:alnum:]]+dd]]({{.*}})
|
||||
+ ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_min_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}})
|
||||
+ ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+diid]]({{.*}})
|
||||
+ %call.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFMinEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %0, i32 1, i32 896, double 1.000000e+00) #2
|
||||
+ ret double %call.i.i.i
|
||||
}
|
||||
|
||||
; Function Attrs: convergent
|
||||
-; CHECK-LLVM: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double addrspace(1)*, i32, i32, double)
|
||||
declare dso_local spir_func double @_Z21__spirv_AtomicFMinEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)*, i32, i32, double) local_unnamed_addr #1
|
||||
+; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
|
||||
|
||||
-attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
+; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
|
||||
+; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
|
||||
+
|
||||
+attributes #0 = { convergent norecurse nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #2 = { convergent nounwind }
|
||||
|
||||
@@ -95,29 +68,5 @@ attributes #2 = { convergent nounwind }
|
||||
!0 = !{i32 1, !"wchar_size", i32 4}
|
||||
!1 = !{i32 1, i32 2}
|
||||
!2 = !{i32 4, i32 100000}
|
||||
-!3 = !{!"clang version 12.0.0 (https://github.com/otcshare/llvm.git 67add71766d55d6a8d8d894822f583d6365a3b7d)"}
|
||||
-!4 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1}
|
||||
-!5 = !{!6, !8, !10, !12}
|
||||
-!6 = distinct !{!6, !7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"}
|
||||
-!7 = distinct !{!7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"}
|
||||
-!8 = distinct !{!8, !9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"}
|
||||
-!9 = distinct !{!9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"}
|
||||
-!10 = distinct !{!10, !11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"}
|
||||
-!11 = distinct !{!11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"}
|
||||
-!12 = distinct !{!12, !13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"}
|
||||
-!13 = distinct !{!13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"}
|
||||
-!14 = !{!15, !15, i64 0}
|
||||
-!15 = !{!"float", !16, i64 0}
|
||||
-!16 = !{!"omnipotent char", !17, i64 0}
|
||||
-!17 = !{!"Simple C++ TBAA"}
|
||||
-!18 = !{!19, !21, !23, !25}
|
||||
-!19 = distinct !{!19, !20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"}
|
||||
-!20 = distinct !{!20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"}
|
||||
-!21 = distinct !{!21, !22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"}
|
||||
-!22 = distinct !{!22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"}
|
||||
-!23 = distinct !{!23, !24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"}
|
||||
-!24 = distinct !{!24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"}
|
||||
-!25 = distinct !{!25, !26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"}
|
||||
-!26 = distinct !{!26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"}
|
||||
-!27 = !{!28, !28, i64 0}
|
||||
-!28 = !{!"double", !16, i64 0}
|
||||
+!3 = !{!"clang version 13.0.0"}
|
||||
+
|
||||
diff --git a/test/AtomicFMinEXTForOCL.ll b/test/AtomicFMinEXTForOCL.ll
|
||||
new file mode 100644
|
||||
index 00000000..6196b0f8
|
||||
--- /dev/null
|
||||
+++ b/test/AtomicFMinEXTForOCL.ll
|
||||
@@ -0,0 +1,64 @@
|
||||
+; RUN: llvm-as %s -o %t.bc
|
||||
+; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_EXT_shader_atomic_float_min_max -o %t.spv
|
||||
+; RUN: spirv-val %t.spv
|
||||
+; RUN: llvm-spirv -to-text %t.spv -o %t.spt
|
||||
+; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV
|
||||
+
|
||||
+; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc
|
||||
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20
|
||||
+
|
||||
+; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc
|
||||
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV
|
||||
+
|
||||
+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
|
||||
+target triple = "spir-unknown-unknown"
|
||||
+
|
||||
+; CHECK-SPIRV: Capability AtomicFloat32MinMaxEXT
|
||||
+; CHECK-SPIRV: Capability AtomicFloat64MinMaxEXT
|
||||
+; CHECK-SPIRV: Extension "SPV_EXT_shader_atomic_float_min_max"
|
||||
+; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32
|
||||
+; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64
|
||||
+
|
||||
+; Function Attrs: convergent norecurse nounwind
|
||||
+define dso_local spir_func void @test_float(float addrspace(1)* %a) local_unnamed_addr #0 {
|
||||
+entry:
|
||||
+ ; CHECK-SPIRV: 7 AtomicFMinEXT [[TYPE_FLOAT_32]]
|
||||
+ ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_min_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}})
|
||||
+ ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+fiif]]({{.*}})
|
||||
+ %call = tail call spir_func float @_Z25atomic_fetch_min_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)* %a, float 0.000000e+00, i32 0) #2
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+; Function Attrs: convergent
|
||||
+declare spir_func float @_Z25atomic_fetch_min_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)*, float, i32) local_unnamed_addr #1
|
||||
+; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
|
||||
+
|
||||
+; Function Attrs: convergent norecurse nounwind
|
||||
+define dso_local spir_func void @test_double(double addrspace(1)* %a) local_unnamed_addr #0 {
|
||||
+entry:
|
||||
+ ; CHECK-SPIRV: 7 AtomicFMinEXT [[TYPE_FLOAT_64]]
|
||||
+ ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_min_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}})
|
||||
+ ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+diid]]({{.*}})
|
||||
+ %call = tail call spir_func double @_Z25atomic_fetch_min_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)* %a, double 0.000000e+00, i32 0) #2
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+; Function Attrs: convergent
|
||||
+declare spir_func double @_Z25atomic_fetch_min_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)*, double, i32) local_unnamed_addr #1
|
||||
+; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
|
||||
+
|
||||
+; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
|
||||
+; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
|
||||
+
|
||||
+attributes #0 = { convergent norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
+attributes #1 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
|
||||
+attributes #2 = { convergent nounwind }
|
||||
+
|
||||
+!llvm.module.flags = !{!0}
|
||||
+!opencl.ocl.version = !{!1}
|
||||
+!opencl.spir.version = !{!1}
|
||||
+!llvm.ident = !{!2}
|
||||
+
|
||||
+!0 = !{i32 1, !"wchar_size", i32 4}
|
||||
+!1 = !{i32 2, i32 0}
|
||||
+!2 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 94aa388f0ce0723bb15503cf41c2c15b288375b9)"}
|
||||
diff --git a/test/InvalidAtomicBuiltins.cl b/test/InvalidAtomicBuiltins.cl
|
||||
index b8ec5b89..2182f070 100644
|
||||
--- a/test/InvalidAtomicBuiltins.cl
|
||||
+++ b/test/InvalidAtomicBuiltins.cl
|
||||
@@ -41,13 +41,9 @@ float __attribute__((overloadable)) atomic_fetch_xor(volatile generic atomic_flo
|
||||
double __attribute__((overloadable)) atomic_fetch_and(volatile generic atomic_double *object, double operand, memory_order order);
|
||||
double __attribute__((overloadable)) atomic_fetch_max(volatile generic atomic_double *object, double operand, memory_order order);
|
||||
double __attribute__((overloadable)) atomic_fetch_min(volatile generic atomic_double *object, double operand, memory_order order);
|
||||
-float __attribute__((overloadable)) atomic_fetch_add_explicit(volatile generic atomic_float *object, float operand, memory_order order);
|
||||
-float __attribute__((overloadable)) atomic_fetch_sub_explicit(volatile generic atomic_float *object, float operand, memory_order order);
|
||||
float __attribute__((overloadable)) atomic_fetch_or_explicit(volatile generic atomic_float *object, float operand, memory_order order);
|
||||
float __attribute__((overloadable)) atomic_fetch_xor_explicit(volatile generic atomic_float *object, float operand, memory_order order);
|
||||
double __attribute__((overloadable)) atomic_fetch_and_explicit(volatile generic atomic_double *object, double operand, memory_order order);
|
||||
-double __attribute__((overloadable)) atomic_fetch_max_explicit(volatile generic atomic_double *object, double operand, memory_order order);
|
||||
-double __attribute__((overloadable)) atomic_fetch_min_explicit(volatile generic atomic_double *object, double operand, memory_order order);
|
||||
|
||||
__kernel void test_atomic_fn(volatile __global float *p,
|
||||
volatile __global double *pp,
|
||||
@@ -86,11 +82,7 @@ __kernel void test_atomic_fn(volatile __global float *p,
|
||||
d = atomic_fetch_and(pp, val, order);
|
||||
d = atomic_fetch_min(pp, val, order);
|
||||
d = atomic_fetch_max(pp, val, order);
|
||||
- f = atomic_fetch_add_explicit(p, val, order);
|
||||
- f = atomic_fetch_sub_explicit(p, val, order);
|
||||
f = atomic_fetch_or_explicit(p, val, order);
|
||||
f = atomic_fetch_xor_explicit(p, val, order);
|
||||
d = atomic_fetch_and_explicit(pp, val, order);
|
||||
- d = atomic_fetch_min_explicit(pp, val, order);
|
||||
- d = atomic_fetch_max_explicit(pp, val, order);
|
||||
}
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -1,35 +0,0 @@
|
|||
From cfb18b75e8a353bc7486f337541476a36994b063 Mon Sep 17 00:00:00 2001
|
||||
From: juanrod2 <>
|
||||
Date: Tue, 22 Dec 2020 08:33:08 +0800
|
||||
Subject: [PATCH 3/7] Memory leak fix for Managed Static Mutex
|
||||
|
||||
Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/llvm/0001-Memory-leak-fix-for-Managed-Static-Mutex.patch]
|
||||
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
|
||||
Cleaning a mutex inside ManagedStatic llvm class.
|
||||
---
|
||||
llvm/lib/Support/ManagedStatic.cpp | 6 +++++-
|
||||
1 file changed, 5 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/llvm/lib/Support/ManagedStatic.cpp b/llvm/lib/Support/ManagedStatic.cpp
|
||||
index 053493f72fb5..6571580ccecf 100644
|
||||
--- a/llvm/lib/Support/ManagedStatic.cpp
|
||||
+++ b/llvm/lib/Support/ManagedStatic.cpp
|
||||
@@ -76,8 +76,12 @@ void ManagedStaticBase::destroy() const {
|
||||
|
||||
/// llvm_shutdown - Deallocate and destroy all ManagedStatic variables.
|
||||
void llvm::llvm_shutdown() {
|
||||
- std::lock_guard<std::recursive_mutex> Lock(*getManagedStaticMutex());
|
||||
+ getManagedStaticMutex()->lock();
|
||||
|
||||
while (StaticList)
|
||||
StaticList->destroy();
|
||||
+
|
||||
+ getManagedStaticMutex()->unlock();
|
||||
+ delete ManagedStaticMutex;
|
||||
+ ManagedStaticMutex = nullptr;
|
||||
}
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -1,49 +0,0 @@
|
|||
From b794037bf1f90a93efa4c542855ad569cb13b4c5 Mon Sep 17 00:00:00 2001
|
||||
From: Feng Zou <feng.zou@intel.com>
|
||||
Date: Mon, 19 Oct 2020 14:43:38 +0800
|
||||
Subject: [PATCH 4/7] Remove repo name in LLVM IR
|
||||
|
||||
Upstream-Status: Backport [Taken from opencl-clang patches, https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/llvm/0003-Remove-repo-name-in-LLVM-IR.patch]
|
||||
Signed-off-by: Feng Zou <feng.zou@intel.com>
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
llvm/cmake/modules/VersionFromVCS.cmake | 23 ++++++++++++-----------
|
||||
1 file changed, 12 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/llvm/cmake/modules/VersionFromVCS.cmake b/llvm/cmake/modules/VersionFromVCS.cmake
|
||||
index 1b6519b4b7c4..8fd6b23bb345 100644
|
||||
--- a/llvm/cmake/modules/VersionFromVCS.cmake
|
||||
+++ b/llvm/cmake/modules/VersionFromVCS.cmake
|
||||
@@ -33,17 +33,18 @@ function(get_source_info path revision repository)
|
||||
else()
|
||||
set(remote "origin")
|
||||
endif()
|
||||
- execute_process(COMMAND ${GIT_EXECUTABLE} remote get-url ${remote}
|
||||
- WORKING_DIRECTORY ${path}
|
||||
- RESULT_VARIABLE git_result
|
||||
- OUTPUT_VARIABLE git_output
|
||||
- ERROR_QUIET)
|
||||
- if(git_result EQUAL 0)
|
||||
- string(STRIP "${git_output}" git_output)
|
||||
- set(${repository} ${git_output} PARENT_SCOPE)
|
||||
- else()
|
||||
- set(${repository} ${path} PARENT_SCOPE)
|
||||
- endif()
|
||||
+ # Do not show repo name in IR
|
||||
+ # execute_process(COMMAND ${GIT_EXECUTABLE} remote get-url ${remote}
|
||||
+ # WORKING_DIRECTORY ${path}
|
||||
+ # RESULT_VARIABLE git_result
|
||||
+ # OUTPUT_VARIABLE git_output
|
||||
+ # ERROR_QUIET)
|
||||
+ # if(git_result EQUAL 0)
|
||||
+ # string(STRIP "${git_output}" git_output)
|
||||
+ # set(${repository} ${git_output} PARENT_SCOPE)
|
||||
+ # else()
|
||||
+ # set(${repository} ${path} PARENT_SCOPE)
|
||||
+ # endif()
|
||||
endif()
|
||||
endif()
|
||||
endfunction()
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -1,47 +0,0 @@
|
|||
From 3dd4766499d25e5978a5d90001f18e657e875da0 Mon Sep 17 00:00:00 2001
|
||||
From: haonanya <haonan.yang@intel.com>
|
||||
Date: Thu, 12 Aug 2021 15:48:34 +0800
|
||||
Subject: [PATCH 5/7] Remove __IMAGE_SUPPORT__ macro for SPIR since SPIR
|
||||
doesn't require image support
|
||||
|
||||
Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/clang/0003-Remove-__IMAGE_SUPPORT__-macro-for-SPIR.patch]
|
||||
|
||||
Signed-off-by: haonanya <haonan.yang@intel.com>
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
clang/lib/Frontend/InitPreprocessor.cpp | 3 ---
|
||||
clang/test/Preprocessor/predefined-macros.c | 4 ----
|
||||
2 files changed, 7 deletions(-)
|
||||
|
||||
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
|
||||
index aefd208e6cd3..b4a84636673a 100644
|
||||
--- a/clang/lib/Frontend/InitPreprocessor.cpp
|
||||
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
|
||||
@@ -1108,9 +1108,6 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
|
||||
if (TI.getSupportedOpenCLOpts().isSupported(#Ext)) \
|
||||
Builder.defineMacro(#Ext);
|
||||
#include "clang/Basic/OpenCLExtensions.def"
|
||||
-
|
||||
- if (TI.getTriple().isSPIR())
|
||||
- Builder.defineMacro("__IMAGE_SUPPORT__");
|
||||
}
|
||||
|
||||
if (TI.hasInt128Type() && LangOpts.CPlusPlus && LangOpts.GNUMode) {
|
||||
diff --git a/clang/test/Preprocessor/predefined-macros.c b/clang/test/Preprocessor/predefined-macros.c
|
||||
index b088a37ba665..39a222d02faf 100644
|
||||
--- a/clang/test/Preprocessor/predefined-macros.c
|
||||
+++ b/clang/test/Preprocessor/predefined-macros.c
|
||||
@@ -184,10 +184,6 @@
|
||||
// MSCOPE:#define __OPENCL_MEMORY_SCOPE_WORK_GROUP 1
|
||||
// MSCOPE:#define __OPENCL_MEMORY_SCOPE_WORK_ITEM 0
|
||||
|
||||
-// RUN: %clang_cc1 %s -E -dM -o - -x cl -triple spir-unknown-unknown \
|
||||
-// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-SPIR
|
||||
-// CHECK-SPIR: #define __IMAGE_SUPPORT__ 1
|
||||
-
|
||||
// RUN: %clang_cc1 %s -E -dM -o - -x hip -triple amdgcn-amd-amdhsa \
|
||||
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-HIP
|
||||
// CHECK-HIP-NOT: #define __CUDA_ARCH__
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -1,53 +0,0 @@
|
|||
From 2c53abd0008bbecfcfe871c6060f4bbf1c94c74a Mon Sep 17 00:00:00 2001
|
||||
From: Raphael Isemann <teemperor@gmail.com>
|
||||
Date: Thu, 1 Apr 2021 18:41:44 +0200
|
||||
Subject: [PATCH 6/7] Avoid calling ParseCommandLineOptions in BackendUtil if
|
||||
possible
|
||||
|
||||
Calling `ParseCommandLineOptions` should only be called from `main` as the
|
||||
CommandLine setup code isn't thread-safe. As BackendUtil is part of the
|
||||
generic Clang FrontendAction logic, a process which has several threads executing
|
||||
Clang FrontendActions will randomly crash in the unsafe setup code.
|
||||
|
||||
This patch avoids calling the function unless either the debug-pass option or
|
||||
limit-float-precision option is set. Without these two options set the
|
||||
`ParseCommandLineOptions` call doesn't do anything beside parsing
|
||||
the command line `clang` which doesn't set any options.
|
||||
|
||||
See also D99652 where LLDB received a workaround for this crash.
|
||||
|
||||
Reviewed By: JDevlieghere
|
||||
|
||||
Differential Revision: https://reviews.llvm.org/D99740
|
||||
|
||||
Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/clang/0004-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch]
|
||||
|
||||
Signed-off-by: Raphael Isemann <teemperor@gmail.com>
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
clang/lib/CodeGen/BackendUtil.cpp | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
|
||||
index 0bfcab88a3a9..db8fd4166d7a 100644
|
||||
--- a/clang/lib/CodeGen/BackendUtil.cpp
|
||||
+++ b/clang/lib/CodeGen/BackendUtil.cpp
|
||||
@@ -743,7 +743,15 @@ static void setCommandLineOpts(const CodeGenOptions &CodeGenOpts) {
|
||||
BackendArgs.push_back("-limit-float-precision");
|
||||
BackendArgs.push_back(CodeGenOpts.LimitFloatPrecision.c_str());
|
||||
}
|
||||
+ // Check for the default "clang" invocation that won't set any cl::opt values.
|
||||
+ // Skip trying to parse the command line invocation to avoid the issues
|
||||
+ // described below.
|
||||
+ if (BackendArgs.size() == 1)
|
||||
+ return;
|
||||
BackendArgs.push_back(nullptr);
|
||||
+ // FIXME: The command line parser below is not thread-safe and shares a global
|
||||
+ // state, so this call might crash or overwrite the options of another Clang
|
||||
+ // instance in the same process.
|
||||
llvm::cl::ParseCommandLineOptions(BackendArgs.size() - 1,
|
||||
BackendArgs.data());
|
||||
}
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -1,377 +0,0 @@
|
|||
From a685de6fc45afcdbe4a7120e9d5b33e175dd71cd Mon Sep 17 00:00:00 2001
|
||||
From: haonanya <haonan.yang@intel.com>
|
||||
Date: Fri, 13 Aug 2021 10:00:02 +0800
|
||||
Subject: [PATCH 7/7] support cl_ext_float_atomics
|
||||
|
||||
Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/clang/0005-OpenCL-support-cl_ext_float_atomics.patch]
|
||||
|
||||
Signed-off-by: haonanya <haonan.yang@intel.com>
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
clang/lib/Headers/opencl-c-base.h | 25 ++++
|
||||
clang/lib/Headers/opencl-c.h | 208 ++++++++++++++++++++++++++
|
||||
clang/test/Headers/opencl-c-header.cl | 96 ++++++++++++
|
||||
3 files changed, 329 insertions(+)
|
||||
|
||||
diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h
|
||||
index 2cc688ccc3da..86bbee12fdf8 100644
|
||||
--- a/clang/lib/Headers/opencl-c-base.h
|
||||
+++ b/clang/lib/Headers/opencl-c-base.h
|
||||
@@ -14,6 +14,31 @@
|
||||
#define CL_VERSION_3_0 300
|
||||
#endif
|
||||
|
||||
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
|
||||
+// For SPIR all extensions are supported.
|
||||
+#if defined(__SPIR__)
|
||||
+#define cl_ext_float_atomics 1
|
||||
+#ifdef cl_khr_fp16
|
||||
+#define __opencl_c_ext_fp16_global_atomic_load_store 1
|
||||
+#define __opencl_c_ext_fp16_local_atomic_load_store 1
|
||||
+#define __opencl_c_ext_fp16_global_atomic_add 1
|
||||
+#define __opencl_c_ext_fp16_local_atomic_add 1
|
||||
+#define __opencl_c_ext_fp16_global_atomic_min_max 1
|
||||
+#define __opencl_c_ext_fp16_local_atomic_min_max 1
|
||||
+#endif
|
||||
+#ifdef __opencl_c_fp64
|
||||
+#define __opencl_c_ext_fp64_global_atomic_add 1
|
||||
+#define __opencl_c_ext_fp64_local_atomic_add 1
|
||||
+#define __opencl_c_ext_fp64_global_atomic_min_max 1
|
||||
+#define __opencl_c_ext_fp64_local_atomic_min_max 1
|
||||
+#endif
|
||||
+#define __opencl_c_ext_fp32_global_atomic_add 1
|
||||
+#define __opencl_c_ext_fp32_local_atomic_add 1
|
||||
+#define __opencl_c_ext_fp32_global_atomic_min_max 1
|
||||
+#define __opencl_c_ext_fp32_local_atomic_min_max 1
|
||||
+#endif // defined(__SPIR__)
|
||||
+#endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
|
||||
+
|
||||
// Define features for 2.0 for header backward compatibility
|
||||
#ifndef __opencl_c_int64
|
||||
#define __opencl_c_int64 1
|
||||
diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
|
||||
index 67d900eb1c3d..b463e702d95e 100644
|
||||
--- a/clang/lib/Headers/opencl-c.h
|
||||
+++ b/clang/lib/Headers/opencl-c.h
|
||||
@@ -14354,6 +14354,214 @@ intptr_t __ovld atomic_fetch_max_explicit(
|
||||
// defined(cl_khr_int64_extended_atomics)
|
||||
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
|
||||
|
||||
+#if defined(cl_ext_float_atomics)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp32_global_atomic_min_max)
|
||||
+float __ovld atomic_fetch_min(volatile __global atomic_float *object,
|
||||
+ float operand);
|
||||
+float __ovld atomic_fetch_max(volatile __global atomic_float *object,
|
||||
+ float operand);
|
||||
+float __ovld atomic_fetch_min_explicit(volatile __global atomic_float *object,
|
||||
+ float operand, memory_order order);
|
||||
+float __ovld atomic_fetch_max_explicit(volatile __global atomic_float *object,
|
||||
+ float operand, memory_order order);
|
||||
+float __ovld atomic_fetch_min_explicit(volatile __global atomic_float *object,
|
||||
+ float operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+float __ovld atomic_fetch_max_explicit(volatile __global atomic_float *object,
|
||||
+ float operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp32_local_atomic_min_max)
|
||||
+float __ovld atomic_fetch_min(volatile __local atomic_float *object,
|
||||
+ float operand);
|
||||
+float __ovld atomic_fetch_max(volatile __local atomic_float *object,
|
||||
+ float operand);
|
||||
+float __ovld atomic_fetch_min_explicit(volatile __local atomic_float *object,
|
||||
+ float operand, memory_order order);
|
||||
+float __ovld atomic_fetch_max_explicit(volatile __local atomic_float *object,
|
||||
+ float operand, memory_order order);
|
||||
+float __ovld atomic_fetch_min_explicit(volatile __local atomic_float *object,
|
||||
+ float operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+float __ovld atomic_fetch_max_explicit(volatile __local atomic_float *object,
|
||||
+ float operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp32_local_atomic_min_max)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp32_global_atomic_min_max) || \
|
||||
+ defined(__opencl_c_ext_fp32_local_atomic_min_max)
|
||||
+float __ovld atomic_fetch_min(volatile atomic_float *object, float operand);
|
||||
+float __ovld atomic_fetch_max(volatile atomic_float *object, float operand);
|
||||
+float __ovld atomic_fetch_min_explicit(volatile atomic_float *object,
|
||||
+ float operand, memory_order order);
|
||||
+float __ovld atomic_fetch_max_explicit(volatile atomic_float *object,
|
||||
+ float operand, memory_order order);
|
||||
+float __ovld atomic_fetch_min_explicit(volatile atomic_float *object,
|
||||
+ float operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+float __ovld atomic_fetch_max_explicit(volatile atomic_float *object,
|
||||
+ float operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) || \
|
||||
+ defined(__opencl_c_ext_fp32_local_atomic_min_max)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp64_global_atomic_min_max)
|
||||
+double __ovld atomic_fetch_min(volatile __global atomic_double *object,
|
||||
+ double operand);
|
||||
+double __ovld atomic_fetch_max(volatile __global atomic_double *object,
|
||||
+ double operand);
|
||||
+double __ovld atomic_fetch_min_explicit(volatile __global atomic_double *object,
|
||||
+ double operand, memory_order order);
|
||||
+double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *object,
|
||||
+ double operand, memory_order order);
|
||||
+double __ovld atomic_fetch_min_explicit(volatile __global atomic_double *object,
|
||||
+ double operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *object,
|
||||
+ double operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp64_local_atomic_min_max)
|
||||
+double __ovld atomic_fetch_min(volatile __local atomic_double *object,
|
||||
+ double operand);
|
||||
+double __ovld atomic_fetch_max(volatile __local atomic_double *object,
|
||||
+ double operand);
|
||||
+double __ovld atomic_fetch_min_explicit(volatile __local atomic_double *object,
|
||||
+ double operand, memory_order order);
|
||||
+double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *object,
|
||||
+ double operand, memory_order order);
|
||||
+double __ovld atomic_fetch_min_explicit(volatile __local atomic_double *object,
|
||||
+ double operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *object,
|
||||
+ double operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp64_local_atomic_min_max)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp64_global_atomic_min_max) || \
|
||||
+ defined(__opencl_c_ext_fp64_local_atomic_min_max)
|
||||
+double __ovld atomic_fetch_min(volatile atomic_double *object, double operand);
|
||||
+double __ovld atomic_fetch_max(volatile atomic_double *object, double operand);
|
||||
+double __ovld atomic_fetch_min_explicit(volatile atomic_double *object,
|
||||
+ double operand, memory_order order);
|
||||
+double __ovld atomic_fetch_max_explicit(volatile atomic_double *object,
|
||||
+ double operand, memory_order order);
|
||||
+double __ovld atomic_fetch_min_explicit(volatile atomic_double *object,
|
||||
+ double operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+double __ovld atomic_fetch_max_explicit(volatile atomic_double *object,
|
||||
+ double operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) || \
|
||||
+ defined(__opencl_c_ext_fp64_local_atomic_min_max)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp32_global_atomic_add)
|
||||
+float __ovld atomic_fetch_add(volatile __global atomic_float *object,
|
||||
+ float operand);
|
||||
+float __ovld atomic_fetch_sub(volatile __global atomic_float *object,
|
||||
+ float operand);
|
||||
+float __ovld atomic_fetch_add_explicit(volatile __global atomic_float *object,
|
||||
+ float operand, memory_order order);
|
||||
+float __ovld atomic_fetch_sub_explicit(volatile __global atomic_float *object,
|
||||
+ float operand, memory_order order);
|
||||
+float __ovld atomic_fetch_add_explicit(volatile __global atomic_float *object,
|
||||
+ float operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+float __ovld atomic_fetch_sub_explicit(volatile __global atomic_float *object,
|
||||
+ float operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp32_global_atomic_add)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp32_local_atomic_add)
|
||||
+float __ovld atomic_fetch_add(volatile __local atomic_float *object,
|
||||
+ float operand);
|
||||
+float __ovld atomic_fetch_sub(volatile __local atomic_float *object,
|
||||
+ float operand);
|
||||
+float __ovld atomic_fetch_add_explicit(volatile __local atomic_float *object,
|
||||
+ float operand, memory_order order);
|
||||
+float __ovld atomic_fetch_sub_explicit(volatile __local atomic_float *object,
|
||||
+ float operand, memory_order order);
|
||||
+float __ovld atomic_fetch_add_explicit(volatile __local atomic_float *object,
|
||||
+ float operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+float __ovld atomic_fetch_sub_explicit(volatile __local atomic_float *object,
|
||||
+ float operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp32_local_atomic_add)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp32_global_atomic_add) || \
|
||||
+ defined(__opencl_c_ext_fp32_local_atomic_add)
|
||||
+float __ovld atomic_fetch_add(volatile atomic_float *object, float operand);
|
||||
+float __ovld atomic_fetch_sub(volatile atomic_float *object, float operand);
|
||||
+float __ovld atomic_fetch_add_explicit(volatile atomic_float *object,
|
||||
+ float operand, memory_order order);
|
||||
+float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object,
|
||||
+ float operand, memory_order order);
|
||||
+float __ovld atomic_fetch_add_explicit(volatile atomic_float *object,
|
||||
+ float operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object,
|
||||
+ float operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp32_global_atomic_add) || \
|
||||
+ defined(__opencl_c_ext_fp32_local_atomic_add)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp64_global_atomic_add)
|
||||
+double __ovld atomic_fetch_add(volatile __global atomic_double *object,
|
||||
+ double operand);
|
||||
+double __ovld atomic_fetch_sub(volatile __global atomic_double *object,
|
||||
+ double operand);
|
||||
+double __ovld atomic_fetch_add_explicit(volatile __global atomic_double *object,
|
||||
+ double operand, memory_order order);
|
||||
+double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *object,
|
||||
+ double operand, memory_order order);
|
||||
+double __ovld atomic_fetch_add_explicit(volatile __global atomic_double *object,
|
||||
+ double operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *object,
|
||||
+ double operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp64_global_atomic_add)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp64_local_atomic_add)
|
||||
+double __ovld atomic_fetch_add(volatile __local atomic_double *object,
|
||||
+ double operand);
|
||||
+double __ovld atomic_fetch_sub(volatile __local atomic_double *object,
|
||||
+ double operand);
|
||||
+double __ovld atomic_fetch_add_explicit(volatile __local atomic_double *object,
|
||||
+ double operand, memory_order order);
|
||||
+double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *object,
|
||||
+ double operand, memory_order order);
|
||||
+double __ovld atomic_fetch_add_explicit(volatile __local atomic_double *object,
|
||||
+ double operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *object,
|
||||
+ double operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp64_local_atomic_add)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp64_global_atomic_add) || \
|
||||
+ defined(__opencl_c_ext_fp64_local_atomic_add)
|
||||
+double __ovld atomic_fetch_add(volatile atomic_double *object, double operand);
|
||||
+double __ovld atomic_fetch_sub(volatile atomic_double *object, double operand);
|
||||
+double __ovld atomic_fetch_add_explicit(volatile atomic_double *object,
|
||||
+ double operand, memory_order order);
|
||||
+double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object,
|
||||
+ double operand, memory_order order);
|
||||
+double __ovld atomic_fetch_add_explicit(volatile atomic_double *object,
|
||||
+ double operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object,
|
||||
+ double operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp64_global_atomic_add) || \
|
||||
+ defined(__opencl_c_ext_fp64_local_atomic_add)
|
||||
+
|
||||
+#endif // cl_ext_float_atomics
|
||||
+
|
||||
// atomic_store()
|
||||
|
||||
#if defined(__opencl_c_atomic_scope_device) && \
|
||||
diff --git a/clang/test/Headers/opencl-c-header.cl b/clang/test/Headers/opencl-c-header.cl
|
||||
index 2716076acdcf..7f720cf28142 100644
|
||||
--- a/clang/test/Headers/opencl-c-header.cl
|
||||
+++ b/clang/test/Headers/opencl-c-header.cl
|
||||
@@ -98,3 +98,99 @@ global atomic_int z = ATOMIC_VAR_INIT(99);
|
||||
#pragma OPENCL EXTENSION cl_intel_planar_yuv : enable
|
||||
|
||||
// CHECK-MOD: Reading modules
|
||||
+
|
||||
+// For SPIR all extensions are supported.
|
||||
+#if defined(__SPIR__)
|
||||
+
|
||||
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
|
||||
+
|
||||
+#if __opencl_c_ext_fp16_global_atomic_load_store != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_load_store"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp16_local_atomic_load_store != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_load_store"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp16_global_atomic_add != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_add"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp32_global_atomic_add != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp32_global_atomic_add"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp64_global_atomic_add != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp64_global_atomic_add"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp16_local_atomic_add != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_add"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp32_local_atomic_add != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp32_local_atomic_add"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp64_local_atomic_add != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp64_local_atomic_add"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp16_global_atomic_min_max != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_min_max"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp32_global_atomic_min_max != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp32_global_atomic_min_max"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp64_global_atomic_min_max != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp64_global_atomic_min_max"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp16_local_atomic_min_max != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_min_max"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp32_local_atomic_min_max != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp32_local_atomic_min_max"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp64_local_atomic_min_max != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp64_local_atomic_min_max"
|
||||
+#endif
|
||||
+#else
|
||||
+
|
||||
+#ifdef __opencl_c_ext_fp16_global_atomic_load_store
|
||||
+#error "Incorrectly __opencl_c_ext_fp16_global_atomic_load_store defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp16_local_atomic_load_store
|
||||
+#error "Incorrectly __opencl_c_ext_fp16_local_atomic_load_store defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp16_global_atomic_add
|
||||
+#error "Incorrectly __opencl_c_ext_fp16_global_atomic_add defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp32_global_atomic_add
|
||||
+#error "Incorrectly __opencl_c_ext_fp32_global_atomic_add defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp64_global_atomic_add
|
||||
+#error "Incorrectly __opencl_c_ext_fp64_global_atomic_add defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp16_local_atomic_add
|
||||
+#error "Incorrectly __opencl_c_ext_fp16_local_atomic_add defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp32_local_atomic_add
|
||||
+#error "Incorrectly __opencl_c_ext_fp32_local_atomic_add defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp64_local_atomic_add
|
||||
+#error "Incorrectly __opencl_c_ext_fp64_local_atomic_add defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp16_global_atomic_min_max
|
||||
+#error "Incorrectly __opencl_c_ext_fp16_global_atomic_min_max defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp32_global_atomic_min_max
|
||||
+#error "Incorrectly __opencl_c_ext_fp32_global_atomic_min_max defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp64_global_atomic_min_max
|
||||
+#error "Incorrectly __opencl_c_ext_fp64_global_atomic_min_max defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp16_local_atomic_min_max
|
||||
+#error "Incorrectly __opencl_c_ext_fp16_local_atomic_min_max defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp32_local_atomic_min_max
|
||||
+#error "Incorrectly __opencl_c_ext_fp32_local_atomic_min_max defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp64_local_atomic_min_max
|
||||
+#error "Incorrectly __opencl_c_ext_fp64_local_atomic_min_max defined"
|
||||
+#endif
|
||||
+
|
||||
+#endif //(defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
|
||||
+
|
||||
+#endif // defined(__SPIR__)
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -1,96 +0,0 @@
|
|||
From 294ca2fd69a077b35acec9d498120d6cb0324dae Mon Sep 17 00:00:00 2001
|
||||
From: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
Date: Fri, 27 Aug 2021 11:53:27 +0800
|
||||
Subject: [PATCH 1/2] This patch is required to fix the crash referenced to in
|
||||
#1767
|
||||
|
||||
It is a port of the following llvm 11.0 commit : https://reviews.llvm.org/D76994.
|
||||
|
||||
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/41f13f1f64d2074ae7512fb23656c22585e912bd]
|
||||
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
.../CodeGen/SelectionDAG/LegalizeTypes.cpp | 3 +-
|
||||
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 31 ++++++++++++-------
|
||||
2 files changed, 21 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
|
||||
index 63ddb59fce68..822da2183269 100644
|
||||
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
|
||||
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
|
||||
@@ -173,7 +173,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
|
||||
}
|
||||
}
|
||||
}
|
||||
-
|
||||
+#ifndef NDEBUG
|
||||
// Checked that NewNodes are only used by other NewNodes.
|
||||
for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) {
|
||||
SDNode *N = NewNodes[i];
|
||||
@@ -181,6 +181,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
|
||||
UI != UE; ++UI)
|
||||
assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!");
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
/// This is the main entry point for the type legalizer. This does a top-down
|
||||
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
|
||||
index faae14444d51..b908c5c58e9f 100644
|
||||
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
|
||||
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
|
||||
@@ -155,7 +155,9 @@ private:
|
||||
const SDValue &getSDValue(TableId &Id) {
|
||||
RemapId(Id);
|
||||
assert(Id && "TableId should be non-zero");
|
||||
- return IdToValueMap[Id];
|
||||
+ auto I = IdToValueMap.find(Id);
|
||||
+ assert(I != IdToValueMap.end() && "cannot find Id in map");
|
||||
+ return I->second;
|
||||
}
|
||||
|
||||
public:
|
||||
@@ -172,24 +174,29 @@ public:
|
||||
bool run();
|
||||
|
||||
void NoteDeletion(SDNode *Old, SDNode *New) {
|
||||
+ assert(Old != New && "node replaced with self");
|
||||
for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) {
|
||||
TableId NewId = getTableId(SDValue(New, i));
|
||||
TableId OldId = getTableId(SDValue(Old, i));
|
||||
|
||||
- if (OldId != NewId)
|
||||
+ if (OldId != NewId) {
|
||||
ReplacedValues[OldId] = NewId;
|
||||
|
||||
- // Delete Node from tables.
|
||||
+ // Delete Node from tables. We cannot do this when OldId == NewId,
|
||||
+ // because NewId can still have table references to it in
|
||||
+ // ReplacedValues.
|
||||
+ IdToValueMap.erase(OldId);
|
||||
+ PromotedIntegers.erase(OldId);
|
||||
+ ExpandedIntegers.erase(OldId);
|
||||
+ SoftenedFloats.erase(OldId);
|
||||
+ PromotedFloats.erase(OldId);
|
||||
+ ExpandedFloats.erase(OldId);
|
||||
+ ScalarizedVectors.erase(OldId);
|
||||
+ SplitVectors.erase(OldId);
|
||||
+ WidenedVectors.erase(OldId);
|
||||
+ }
|
||||
+
|
||||
ValueToIdMap.erase(SDValue(Old, i));
|
||||
- IdToValueMap.erase(OldId);
|
||||
- PromotedIntegers.erase(OldId);
|
||||
- ExpandedIntegers.erase(OldId);
|
||||
- SoftenedFloats.erase(OldId);
|
||||
- PromotedFloats.erase(OldId);
|
||||
- ExpandedFloats.erase(OldId);
|
||||
- ScalarizedVectors.erase(OldId);
|
||||
- SplitVectors.erase(OldId);
|
||||
- WidenedVectors.erase(OldId);
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -1,105 +0,0 @@
|
|||
From d266087e8dba9e8fd4984e1cb85c20376e2c8ea3 Mon Sep 17 00:00:00 2001
|
||||
From: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
Date: Fri, 27 Aug 2021 11:56:01 +0800
|
||||
Subject: [PATCH 2/2] This patch is a fix for #1788.
|
||||
|
||||
It is a port of the following llvm 11.0 commit: https://reviews.llvm.org/D81698
|
||||
This also needed part of another llvm 11.0 commit: https://reviews.llvm.org/D72975
|
||||
|
||||
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/aeb50448019ce1b1002f3781f9647d486320d83c]
|
||||
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
llvm/include/llvm/IR/PatternMatch.h | 22 ++++++++++++---
|
||||
.../InstCombine/InstructionCombining.cpp | 27 +++++++++++++++++--
|
||||
2 files changed, 44 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
|
||||
index 6621fc9f819c..fb7ad93519f6 100644
|
||||
--- a/llvm/include/llvm/IR/PatternMatch.h
|
||||
+++ b/llvm/include/llvm/IR/PatternMatch.h
|
||||
@@ -152,8 +152,10 @@ inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) {
|
||||
|
||||
struct apint_match {
|
||||
const APInt *&Res;
|
||||
+ bool AllowUndef;
|
||||
|
||||
- apint_match(const APInt *&R) : Res(R) {}
|
||||
+ apint_match(const APInt *&Res, bool AllowUndef)
|
||||
+ : Res(Res), AllowUndef(AllowUndef) {}
|
||||
|
||||
template <typename ITy> bool match(ITy *V) {
|
||||
if (auto *CI = dyn_cast<ConstantInt>(V)) {
|
||||
@@ -162,7 +164,8 @@ struct apint_match {
|
||||
}
|
||||
if (V->getType()->isVectorTy())
|
||||
if (const auto *C = dyn_cast<Constant>(V))
|
||||
- if (auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) {
|
||||
+ if (auto *CI = dyn_cast_or_null<ConstantInt>(
|
||||
+ C->getSplatValue(AllowUndef))) {
|
||||
Res = &CI->getValue();
|
||||
return true;
|
||||
}
|
||||
@@ -192,7 +195,20 @@ struct apfloat_match {
|
||||
|
||||
/// Match a ConstantInt or splatted ConstantVector, binding the
|
||||
/// specified pointer to the contained APInt.
|
||||
-inline apint_match m_APInt(const APInt *&Res) { return Res; }
|
||||
+inline apint_match m_APInt(const APInt *&Res) {
|
||||
+ // Forbid undefs by default to maintain previous behavior.
|
||||
+ return apint_match(Res, /* AllowUndef */ false);
|
||||
+}
|
||||
+
|
||||
+/// Match APInt while allowing undefs in splat vector constants.
|
||||
+inline apint_match m_APIntAllowUndef(const APInt *&Res) {
|
||||
+ return apint_match(Res, /* AllowUndef */ true);
|
||||
+}
|
||||
+
|
||||
+/// Match APInt while forbidding undefs in splat vector constants.
|
||||
+inline apint_match m_APIntForbidUndef(const APInt *&Res) {
|
||||
+ return apint_match(Res, /* AllowUndef */ false);
|
||||
+}
|
||||
|
||||
/// Match a ConstantFP or splatted ConstantVector, binding the
|
||||
/// specified pointer to the contained APFloat.
|
||||
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
|
||||
index bf32996d96e2..40a246b9d7a7 100644
|
||||
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
|
||||
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
|
||||
@@ -925,8 +925,31 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
|
||||
if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
|
||||
if (CI->hasOneUse()) {
|
||||
Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
|
||||
- if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
|
||||
- (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
|
||||
+
|
||||
+ // FIXME: This is a hack to avoid infinite looping with min/max patterns.
|
||||
+ // We have to ensure that vector constants that only differ with
|
||||
+ // undef elements are treated as equivalent.
|
||||
+ auto areLooselyEqual = [](Value *A, Value *B) {
|
||||
+ if (A == B)
|
||||
+ return true;
|
||||
+
|
||||
+ // Test for vector constants.
|
||||
+ Constant *ConstA, *ConstB;
|
||||
+ if (!match(A, m_Constant(ConstA)) || !match(B, m_Constant(ConstB)))
|
||||
+ return false;
|
||||
+
|
||||
+ // TODO: Deal with FP constants?
|
||||
+ if (!A->getType()->isIntOrIntVectorTy() || A->getType() != B->getType())
|
||||
+ return false;
|
||||
+
|
||||
+ // Compare for equality including undefs as equal.
|
||||
+ auto *Cmp = ConstantExpr::getCompare(ICmpInst::ICMP_EQ, ConstA, ConstB);
|
||||
+ const APInt *C;
|
||||
+ return match(Cmp, m_APIntAllowUndef(C)) && C->isOneValue();
|
||||
+ };
|
||||
+
|
||||
+ if ((areLooselyEqual(TV, Op0) && areLooselyEqual(FV, Op1)) ||
|
||||
+ (areLooselyEqual(FV, Op0) && areLooselyEqual(TV, Op1)))
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -1,43 +0,0 @@
|
|||
From 8f83e2b7618da7a98a30839a8f41a6dd82dec468 Mon Sep 17 00:00:00 2001
|
||||
From: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
Date: Fri, 27 Aug 2021 12:00:23 +0800
|
||||
Subject: [PATCH 1/2] This patch is required to fix stability problem #1793
|
||||
|
||||
It's backport of the following llvm 11.0 commit: 120c5f1057dc50229f73bc75bbabf4df6ee50fef
|
||||
|
||||
Upstream-Status: Backport
|
||||
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 ++++--
|
||||
1 file changed, 4 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
|
||||
index 2476fd26f250..2743acc89bca 100644
|
||||
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
|
||||
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
|
||||
@@ -10702,8 +10702,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
+ // zext_vector_inreg(undef) = 0 because the top bits will be zero.
|
||||
if (N0.isUndef())
|
||||
- return DAG.getUNDEF(VT);
|
||||
+ return DAG.getConstant(0, SDLoc(N), VT);
|
||||
|
||||
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
|
||||
return Res;
|
||||
@@ -10718,8 +10719,9 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
+ // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
|
||||
if (N0.isUndef())
|
||||
- return DAG.getUNDEF(VT);
|
||||
+ return DAG.getConstant(0, SDLoc(N), VT);
|
||||
|
||||
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
|
||||
return Res;
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -1,34 +0,0 @@
|
|||
From 62b05a69b4a185cd0b7535f19742686e19fcaf22 Mon Sep 17 00:00:00 2001
|
||||
From: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
Date: Fri, 27 Aug 2021 12:02:37 +0800
|
||||
Subject: [PATCH 2/2] Fix for #1844, affects avx512skx-i8x64 and
|
||||
avx512skx-i16x32.
|
||||
|
||||
It's a port of 11.0 commit edcfb47ff6d5562e22207f364c65f84302aa346b
|
||||
https://reviews.llvm.org/D76312
|
||||
|
||||
Upstream-Status: Backport
|
||||
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
|
||||
index 2743acc89bca..439a8367dabe 100644
|
||||
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
|
||||
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
|
||||
@@ -10841,7 +10841,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
|
||||
|
||||
// Attempt to pre-truncate BUILD_VECTOR sources.
|
||||
if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
|
||||
- TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
|
||||
+ TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
|
||||
+ // Avoid creating illegal types if running after type legalizer.
|
||||
+ (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
|
||||
SDLoc DL(N);
|
||||
EVT SVT = VT.getScalarType();
|
||||
SmallVector<SDValue, 8> TruncOps;
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -1,40 +0,0 @@
|
|||
From cc4301f82ca1bde1d438c3708de285b0ab8c72d3 Mon Sep 17 00:00:00 2001
|
||||
From: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
Date: Fri, 27 Aug 2021 12:07:25 +0800
|
||||
Subject: [PATCH 1/2] [X86] createVariablePermute - handle case where recursive
|
||||
createVariablePermute call fails
|
||||
|
||||
Account for the case where a recursive createVariablePermute call with a wider vector type fails.
|
||||
|
||||
Original test case from @craig.topper (Craig Topper)
|
||||
|
||||
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/6bdd63dc28208a597542b0c6bc41093f32417804]
|
||||
|
||||
Signed-off-by: Simon Pilgrim <llvm-dev@redking.me.uk>
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
llvm/lib/Target/X86/X86ISelLowering.cpp | 8 +++++---
|
||||
1 file changed, 5 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
index c8720d9ae3a6..63eb050e9b3a 100644
|
||||
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
@@ -9571,9 +9571,11 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
|
||||
IndicesVT = EVT(VT).changeVectorElementTypeToInteger();
|
||||
IndicesVec = widenSubVector(IndicesVT.getSimpleVT(), IndicesVec, false,
|
||||
Subtarget, DAG, SDLoc(IndicesVec));
|
||||
- return extractSubVector(
|
||||
- createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget), 0,
|
||||
- DAG, DL, SizeInBits);
|
||||
+ SDValue NewSrcVec =
|
||||
+ createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget);
|
||||
+ if (NewSrcVec)
|
||||
+ return extractSubVector(NewSrcVec, 0, DAG, DL, SizeInBits);
|
||||
+ return SDValue();
|
||||
} else if (SrcVec.getValueSizeInBits() < SizeInBits) {
|
||||
// Widen smaller SrcVec to match VT.
|
||||
SrcVec = widenSubVector(VT, SrcVec, false, Subtarget, DAG, SDLoc(SrcVec));
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -1,61 +0,0 @@
|
|||
From 9cdff0785d5cf9effc8e922d3330311c4d3dda78 Mon Sep 17 00:00:00 2001
|
||||
From: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
Date: Fri, 27 Aug 2021 12:09:42 +0800
|
||||
Subject: [PATCH 2/2] This patch is needed for avx512skx-i8x64 and
|
||||
avx512skx-i16x32 targets.
|
||||
|
||||
This is combination of two commits:
|
||||
- 0cd6712a7af0fa2702b5d4cc733500eb5e62e7d0 - stability fix.
|
||||
- d8ad7cc0885f32104a7cd83c77191aec15fd684f - performance follow up.
|
||||
|
||||
Upstream-Status: Backport
|
||||
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 23 +++++++++++++++++--
|
||||
1 file changed, 21 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
|
||||
index 439a8367dabe..b1639c7f275d 100644
|
||||
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
|
||||
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
|
||||
@@ -18471,6 +18471,26 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
|
||||
|
||||
// Allow targets to opt-out.
|
||||
EVT VT = Extract->getValueType(0);
|
||||
+
|
||||
+ // We can only create byte sized loads.
|
||||
+ if (!VT.isByteSized())
|
||||
+ return SDValue();
|
||||
+
|
||||
+ unsigned Index = ExtIdx->getZExtValue();
|
||||
+ unsigned NumElts = VT.getVectorNumElements();
|
||||
+
|
||||
+ // If the index is a multiple of the extract element count, we can offset the
|
||||
+ // address by the store size multiplied by the subvector index. Otherwise if
|
||||
+ // the scalar type is byte sized, we can just use the index multiplied by
|
||||
+ // the element size in bytes as the offset.
|
||||
+ unsigned Offset;
|
||||
+ if (Index % NumElts == 0)
|
||||
+ Offset = (Index / NumElts) * VT.getStoreSize();
|
||||
+ else if (VT.getScalarType().isByteSized())
|
||||
+ Offset = Index * VT.getScalarType().getStoreSize();
|
||||
+ else
|
||||
+ return SDValue();
|
||||
+
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
|
||||
return SDValue();
|
||||
@@ -18478,8 +18498,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
|
||||
// The narrow load will be offset from the base address of the old load if
|
||||
// we are extracting from something besides index 0 (little-endian).
|
||||
SDLoc DL(Extract);
|
||||
- SDValue BaseAddr = Ld->getOperand(1);
|
||||
- unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
|
||||
+ SDValue BaseAddr = Ld->getBasePtr();
|
||||
|
||||
// TODO: Use "BaseIndexOffset" to make this more effective.
|
||||
SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -1,97 +0,0 @@
|
|||
From c2ebd328979c081dd2c9fd0e359ed99473731d0e Mon Sep 17 00:00:00 2001
|
||||
From: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
Date: Fri, 27 Aug 2021 12:13:00 +0800
|
||||
Subject: [PATCH 1/2] [X86] When storing v1i1/v2i1/v4i1 to memory, make sure we
|
||||
store zeros in the rest of the byte
|
||||
|
||||
We can't store garbage in the unused bits. It possible that something like zextload from i1/i2/i4 is created to read the memory. Those zextloads would be legalized assuming the extra bits are 0.
|
||||
|
||||
I'm not sure that the code in lowerStore is executed for the v1i1/v2i1/v4i1 case. It looks like the DAG combine in combineStore may have converted them to v8i1 first. And I think we're missing some cases to avoid going to the stack in the first place. But I don't have time to investigate those things at the moment so I wanted to focus on the correctness issue.
|
||||
|
||||
Should fix PR48147.
|
||||
|
||||
Reviewed By: RKSimon
|
||||
|
||||
Differential Revision: https://reviews.llvm.org/D9129
|
||||
|
||||
Upstream-Status: Backport
|
||||
|
||||
Signed-off-by:Craig Topper <craig.topper@sifive.com>
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
llvm/lib/Target/X86/X86ISelLowering.cpp | 20 ++++++++++++++------
|
||||
llvm/lib/Target/X86/X86InstrAVX512.td | 2 --
|
||||
2 files changed, 14 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
index 63eb050e9b3a..96b5e2cfbd82 100644
|
||||
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
@@ -22688,17 +22688,22 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
|
||||
// Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 stores.
|
||||
if (StoredVal.getValueType().isVector() &&
|
||||
StoredVal.getValueType().getVectorElementType() == MVT::i1) {
|
||||
- assert(StoredVal.getValueType().getVectorNumElements() <= 8 &&
|
||||
- "Unexpected VT");
|
||||
+ unsigned NumElts = StoredVal.getValueType().getVectorNumElements();
|
||||
+ assert(NumElts <= 8 && "Unexpected VT");
|
||||
assert(!St->isTruncatingStore() && "Expected non-truncating store");
|
||||
assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
|
||||
"Expected AVX512F without AVX512DQI");
|
||||
|
||||
+ // We must pad with zeros to ensure we store zeroes to any unused bits.
|
||||
StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
|
||||
DAG.getUNDEF(MVT::v16i1), StoredVal,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
StoredVal = DAG.getBitcast(MVT::i16, StoredVal);
|
||||
StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal);
|
||||
+ // Make sure we store zeros in the extra bits.
|
||||
+ if (NumElts < 8)
|
||||
+ StoredVal = DAG.getZeroExtendInReg(StoredVal, dl,
|
||||
+ MVT::getIntegerVT(NumElts));
|
||||
|
||||
return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
|
||||
St->getPointerInfo(), St->getAlignment(),
|
||||
@@ -41585,8 +41590,10 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
|
||||
|
||||
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements());
|
||||
StoredVal = DAG.getBitcast(NewVT, StoredVal);
|
||||
-
|
||||
- return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
|
||||
+ SDValue Val = StoredVal.getOperand(0);
|
||||
+ // We must store zeros to the unused bits.
|
||||
+ Val = DAG.getZeroExtendInReg(Val, dl, MVT::i1);
|
||||
+ return DAG.getStore(St->getChain(), dl, Val, St->getBasePtr(),
|
||||
St->getPointerInfo(), St->getAlignment(),
|
||||
St->getMemOperand()->getFlags());
|
||||
}
|
||||
@@ -41602,10 +41609,11 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
|
||||
}
|
||||
|
||||
// Widen v2i1/v4i1 stores to v8i1.
|
||||
- if ((VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
|
||||
+ if ((VT == MVT::v1i1 || VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
|
||||
Subtarget.hasAVX512()) {
|
||||
unsigned NumConcats = 8 / VT.getVectorNumElements();
|
||||
- SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(VT));
|
||||
+ // We must store zeros to the unused bits.
|
||||
+ SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, VT));
|
||||
Ops[0] = StoredVal;
|
||||
StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops);
|
||||
return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
|
||||
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
|
||||
index 32f012033fb0..d3b92183f87b 100644
|
||||
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
|
||||
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
|
||||
@@ -2888,8 +2888,6 @@ def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
|
||||
|
||||
// Load/store kreg
|
||||
let Predicates = [HasDQI] in {
|
||||
- def : Pat<(store VK1:$src, addr:$dst),
|
||||
- (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
|
||||
|
||||
def : Pat<(v1i1 (load addr:$src)),
|
||||
(COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -1,173 +0,0 @@
|
|||
From c1565af764adceca118daad0f592e5f14c2bdd4a Mon Sep 17 00:00:00 2001
|
||||
From: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
Date: Fri, 27 Aug 2021 12:15:09 +0800
|
||||
Subject: [PATCH 2/2] [X86] Convert vXi1 vectors to xmm/ymm/zmm types via
|
||||
getRegisterTypeForCallingConv rather than using CCPromoteToType in the td
|
||||
file
|
||||
|
||||
Previously we tried to promote these to xmm/ymm/zmm by promoting
|
||||
in the X86CallingConv.td file. But this breaks when we run out
|
||||
of xmm/ymm/zmm registers and need to fall back to memory. We end
|
||||
up trying to create a non-sensical scalar to vector. This lead
|
||||
to an assertion. The new tests in avx512-calling-conv.ll all
|
||||
trigger this assertion.
|
||||
|
||||
Since we really want to treat these types like we do on avx2,
|
||||
it seems better to promote them before the calling convention
|
||||
code gets involved. Except when the calling convention is one
|
||||
that passes the vXi1 type in a k register.
|
||||
|
||||
The changes in avx512-regcall-Mask.ll are because we indicated
|
||||
that xmm/ymm/zmm types should be passed indirectly for the
|
||||
Win64 ABI before we go to the common lines that promoted the
|
||||
vXi1 types. This caused the promoted types to be picked up by
|
||||
the default calling convention code. Now we promote them earlier
|
||||
so they get passed indirectly as though they were xmm/ymm/zmm.
|
||||
|
||||
Differential Revision: https://reviews.llvm.org/D75154
|
||||
|
||||
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/eadea7868f5b7542ee6bdcd9a975697a0c919ffc]
|
||||
|
||||
Signed-off-by:Craig Topper <craig.topper@intel.com>
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
llvm/lib/Target/X86/X86ISelLowering.cpp | 90 +++++++++++++++++--------
|
||||
1 file changed, 61 insertions(+), 29 deletions(-)
|
||||
|
||||
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
index 96b5e2cfbd82..d5de94aeb8a2 100644
|
||||
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
@@ -2085,51 +2085,83 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const {
|
||||
return TargetLoweringBase::getPreferredVectorAction(VT);
|
||||
}
|
||||
|
||||
+static std::pair<MVT, unsigned>
|
||||
+handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
|
||||
+ const X86Subtarget &Subtarget) {
|
||||
+ // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
|
||||
+ // convention is one that uses k registers.
|
||||
+ if (NumElts == 2)
|
||||
+ return {MVT::v2i64, 1};
|
||||
+ if (NumElts == 4)
|
||||
+ return {MVT::v4i32, 1};
|
||||
+ if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
|
||||
+ CC != CallingConv::Intel_OCL_BI)
|
||||
+ return {MVT::v8i16, 1};
|
||||
+ if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
|
||||
+ CC != CallingConv::Intel_OCL_BI)
|
||||
+ return {MVT::v16i8, 1};
|
||||
+ // v32i1 passes in ymm unless we have BWI and the calling convention is
|
||||
+ // regcall.
|
||||
+ if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
|
||||
+ return {MVT::v32i8, 1};
|
||||
+ // Split v64i1 vectors if we don't have v64i8 available.
|
||||
+ if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
|
||||
+ if (Subtarget.useAVX512Regs())
|
||||
+ return {MVT::v64i8, 1};
|
||||
+ return {MVT::v32i8, 2};
|
||||
+ }
|
||||
+
|
||||
+ // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
|
||||
+ if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
|
||||
+ NumElts > 64)
|
||||
+ return {MVT::i8, NumElts};
|
||||
+
|
||||
+ return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
|
||||
+}
|
||||
+
|
||||
MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
|
||||
CallingConv::ID CC,
|
||||
EVT VT) const {
|
||||
- // v32i1 vectors should be promoted to v32i8 to match avx2.
|
||||
- if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
|
||||
- return MVT::v32i8;
|
||||
- // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
|
||||
if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
|
||||
- Subtarget.hasAVX512() &&
|
||||
- (!isPowerOf2_32(VT.getVectorNumElements()) ||
|
||||
- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
|
||||
- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
|
||||
- return MVT::i8;
|
||||
- // Split v64i1 vectors if we don't have v64i8 available.
|
||||
- if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
|
||||
- CC != CallingConv::X86_RegCall)
|
||||
- return MVT::v32i1;
|
||||
+ Subtarget.hasAVX512()) {
|
||||
+ unsigned NumElts = VT.getVectorNumElements();
|
||||
+
|
||||
+ MVT RegisterVT;
|
||||
+ unsigned NumRegisters;
|
||||
+ std::tie(RegisterVT, NumRegisters) =
|
||||
+ handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
|
||||
+ if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
|
||||
+ return RegisterVT;
|
||||
+ }
|
||||
+
|
||||
// FIXME: Should we just make these types legal and custom split operations?
|
||||
if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
|
||||
Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
|
||||
return MVT::v16i32;
|
||||
+
|
||||
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
|
||||
}
|
||||
|
||||
unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
|
||||
CallingConv::ID CC,
|
||||
EVT VT) const {
|
||||
- // v32i1 vectors should be promoted to v32i8 to match avx2.
|
||||
- if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
|
||||
- return 1;
|
||||
- // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
|
||||
if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
|
||||
- Subtarget.hasAVX512() &&
|
||||
- (!isPowerOf2_32(VT.getVectorNumElements()) ||
|
||||
- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
|
||||
- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
|
||||
- return VT.getVectorNumElements();
|
||||
- // Split v64i1 vectors if we don't have v64i8 available.
|
||||
- if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
|
||||
- CC != CallingConv::X86_RegCall)
|
||||
- return 2;
|
||||
+ Subtarget.hasAVX512()) {
|
||||
+ unsigned NumElts = VT.getVectorNumElements();
|
||||
+
|
||||
+ MVT RegisterVT;
|
||||
+ unsigned NumRegisters;
|
||||
+ std::tie(RegisterVT, NumRegisters) =
|
||||
+ handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
|
||||
+ if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
|
||||
+ return NumRegisters;
|
||||
+ }
|
||||
+
|
||||
// FIXME: Should we just make these types legal and custom split operations?
|
||||
if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
|
||||
Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
|
||||
return 1;
|
||||
+
|
||||
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
|
||||
}
|
||||
|
||||
@@ -2140,8 +2172,8 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
|
||||
if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
|
||||
Subtarget.hasAVX512() &&
|
||||
(!isPowerOf2_32(VT.getVectorNumElements()) ||
|
||||
- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
|
||||
- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) {
|
||||
+ (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
|
||||
+ VT.getVectorNumElements() > 64)) {
|
||||
RegisterVT = MVT::i8;
|
||||
IntermediateVT = MVT::i1;
|
||||
NumIntermediates = VT.getVectorNumElements();
|
||||
@@ -2151,7 +2183,7 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
|
||||
// Split v64i1 vectors if we don't have v64i8 available.
|
||||
if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
|
||||
CC != CallingConv::X86_RegCall) {
|
||||
- RegisterVT = MVT::v32i1;
|
||||
+ RegisterVT = MVT::v32i8;
|
||||
IntermediateVT = MVT::v32i1;
|
||||
NumIntermediates = 2;
|
||||
return 2;
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -1,550 +0,0 @@
|
|||
From 447cb2e1b2f0d8bdcfd8a0b39f47d28de50b5d82 Mon Sep 17 00:00:00 2001
|
||||
From: Djordje Todorovic <djordje.todorovic@syrmia.com>
|
||||
Date: Mon, 9 Mar 2020 11:02:35 +0100
|
||||
Subject: [PATCH] Enable the call site info only for -g + optimizations
|
||||
|
||||
Emit call site info only in the case of '-g' + 'O>0' level.
|
||||
|
||||
Differential Revision: https://reviews.llvm.org/D75175
|
||||
|
||||
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/c15c68abdc6f1afece637bdedba808676191a8e6]
|
||||
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
|
||||
---
|
||||
clang/include/clang/Basic/CodeGenOptions.def | 2 ++
|
||||
clang/lib/CodeGen/BackendUtil.cpp | 1 +
|
||||
clang/lib/Frontend/CompilerInvocation.cpp | 4 +++-
|
||||
llvm/include/llvm/CodeGen/CommandFlags.inc | 7 +++++++
|
||||
llvm/include/llvm/Target/TargetOptions.h | 7 ++++++-
|
||||
llvm/lib/CodeGen/MIRParser/MIRParser.cpp | 4 ++--
|
||||
llvm/lib/CodeGen/MachineFunction.cpp | 2 +-
|
||||
llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 2 +-
|
||||
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 2 +-
|
||||
llvm/lib/Target/ARM/ARMISelLowering.cpp | 2 +-
|
||||
llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +-
|
||||
llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir | 2 +-
|
||||
llvm/test/CodeGen/X86/call-site-info-output.ll | 4 ++--
|
||||
llvm/test/DebugInfo/AArch64/call-site-info-output.ll | 2 +-
|
||||
llvm/test/DebugInfo/ARM/call-site-info-output.ll | 2 +-
|
||||
.../MIR/AArch64/dbgcall-site-interpret-movzxi.mir | 2 +-
|
||||
.../DebugInfo/MIR/AArch64/dbgcall-site-interpretation.mir | 2 +-
|
||||
llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-orr-moves.mir | 2 +-
|
||||
.../test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir | 2 +-
|
||||
.../test/DebugInfo/MIR/ARM/dbgcall-site-interpretation.mir | 2 +-
|
||||
.../DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir | 2 +-
|
||||
llvm/test/DebugInfo/MIR/ARM/if-coverter-call-site-info.mir | 2 +-
|
||||
.../MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir | 2 +-
|
||||
.../MIR/Hexagon/live-debug-values-bundled-entry-values.mir | 2 +-
|
||||
llvm/test/DebugInfo/MIR/SystemZ/call-site-lzer.mir | 2 +-
|
||||
llvm/test/DebugInfo/MIR/X86/DW_OP_entry_value.mir | 2 +-
|
||||
llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir | 2 +-
|
||||
.../test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir | 2 +-
|
||||
.../test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir | 2 +-
|
||||
.../DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir | 2 +-
|
||||
llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir | 2 +-
|
||||
.../DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir | 2 +-
|
||||
llvm/test/DebugInfo/MIR/X86/dbginfo-entryvals.mir | 2 +-
|
||||
llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir | 4 ++--
|
||||
.../DebugInfo/MIR/X86/entry-value-of-modified-param.mir | 2 +-
|
||||
llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir | 2 +-
|
||||
.../DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir | 2 +-
|
||||
.../test/DebugInfo/MIR/X86/unreachable-block-call-site.mir | 2 +-
|
||||
llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll | 2 +-
|
||||
llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll | 2 +-
|
||||
.../tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll | 2 +-
|
||||
41 files changed, 58 insertions(+), 41 deletions(-)
|
||||
|
||||
diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
|
||||
index 1ecae98b13b1..6a6a9465273f 100644
|
||||
--- a/clang/include/clang/Basic/CodeGenOptions.def
|
||||
+++ b/clang/include/clang/Basic/CodeGenOptions.def
|
||||
@@ -64,6 +64,8 @@ CODEGENOPT(DebugPassManager, 1, 0) ///< Prints debug information for the new
|
||||
///< pass manager.
|
||||
CODEGENOPT(DisableRedZone , 1, 0) ///< Set when -mno-red-zone is enabled.
|
||||
CODEGENOPT(EnableDebugEntryValues, 1, 0) ///< Emit call site parameter dbg info
|
||||
+CODEGENOPT(EmitCallSiteInfo, 1, 0) ///< Emit call site info only in the case of
|
||||
+ ///< '-g' + 'O>0' level.
|
||||
CODEGENOPT(IndirectTlsSegRefs, 1, 0) ///< Set when -mno-tls-direct-seg-refs
|
||||
///< is specified.
|
||||
CODEGENOPT(DisableTailCalls , 1, 0) ///< Do not emit tail calls.
|
||||
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
|
||||
index db8fd4166d7a..db09f9b641fe 100644
|
||||
--- a/clang/lib/CodeGen/BackendUtil.cpp
|
||||
+++ b/clang/lib/CodeGen/BackendUtil.cpp
|
||||
@@ -482,6 +482,7 @@ static void initTargetOptions(llvm::TargetOptions &Options,
|
||||
Options.EmitAddrsig = CodeGenOpts.Addrsig;
|
||||
Options.EnableDebugEntryValues = CodeGenOpts.EnableDebugEntryValues;
|
||||
Options.ForceDwarfFrameSection = CodeGenOpts.ForceDwarfFrameSection;
|
||||
+ Options.EmitCallSiteInfo = CodeGenOpts.EmitCallSiteInfo;
|
||||
|
||||
Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile;
|
||||
Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll;
|
||||
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
|
||||
index 18fa06bf3c6d..2e73dcbdebe4 100644
|
||||
--- a/clang/lib/Frontend/CompilerInvocation.cpp
|
||||
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
|
||||
@@ -789,8 +789,10 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
|
||||
|
||||
llvm::Triple T(TargetOpts.Triple);
|
||||
if (Opts.OptimizationLevel > 0 && Opts.hasReducedDebugInfo() &&
|
||||
- llvm::is_contained(DebugEntryValueArchs, T.getArch()))
|
||||
+ llvm::is_contained(DebugEntryValueArchs, T.getArch())) {
|
||||
Opts.EnableDebugEntryValues = Args.hasArg(OPT_femit_debug_entry_values);
|
||||
+ Opts.EmitCallSiteInfo = true;
|
||||
+ }
|
||||
|
||||
Opts.DisableO0ImplyOptNone = Args.hasArg(OPT_disable_O0_optnone);
|
||||
Opts.DisableRedZone = Args.hasArg(OPT_disable_red_zone);
|
||||
diff --git a/llvm/include/llvm/CodeGen/CommandFlags.inc b/llvm/include/llvm/CodeGen/CommandFlags.inc
|
||||
index 6475a5b19edb..36073fe9cc98 100644
|
||||
--- a/llvm/include/llvm/CodeGen/CommandFlags.inc
|
||||
+++ b/llvm/include/llvm/CodeGen/CommandFlags.inc
|
||||
@@ -286,6 +286,12 @@ static cl::opt<bool>
|
||||
EnableAddrsig("addrsig", cl::desc("Emit an address-significance table"),
|
||||
cl::init(false));
|
||||
|
||||
+static cl::opt<bool> EmitCallSiteInfo(
|
||||
+ "emit-call-site-info",
|
||||
+ cl::desc(
|
||||
+ "Emit call site debug information, if debug information is enabled."),
|
||||
+ cl::init(false));
|
||||
+
|
||||
static cl::opt<bool>
|
||||
EnableDebugEntryValues("debug-entry-values",
|
||||
cl::desc("Emit debug info about parameter's entry values"),
|
||||
@@ -349,6 +355,7 @@ static TargetOptions InitTargetOptionsFromCodeGenFlags() {
|
||||
Options.ExceptionModel = ExceptionModel;
|
||||
Options.EmitStackSizeSection = EnableStackSizeSection;
|
||||
Options.EmitAddrsig = EnableAddrsig;
|
||||
+ Options.EmitCallSiteInfo = EmitCallSiteInfo;
|
||||
Options.EnableDebugEntryValues = EnableDebugEntryValues;
|
||||
Options.ForceDwarfFrameSection = ForceDwarfFrameSection;
|
||||
|
||||
diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h
|
||||
index d27c7b0178f0..9378e290bed1 100644
|
||||
--- a/llvm/include/llvm/Target/TargetOptions.h
|
||||
+++ b/llvm/include/llvm/Target/TargetOptions.h
|
||||
@@ -134,7 +134,8 @@ namespace llvm {
|
||||
EmulatedTLS(false), ExplicitEmulatedTLS(false), EnableIPRA(false),
|
||||
EmitStackSizeSection(false), EnableMachineOutliner(false),
|
||||
SupportsDefaultOutlining(false), EmitAddrsig(false),
|
||||
- EnableDebugEntryValues(false), ForceDwarfFrameSection(false) {}
|
||||
+ EmitCallSiteInfo(false), EnableDebugEntryValues(false),
|
||||
+ ForceDwarfFrameSection(false) {}
|
||||
|
||||
/// PrintMachineCode - This flag is enabled when the -print-machineinstrs
|
||||
/// option is specified on the command line, and should enable debugging
|
||||
@@ -281,6 +282,10 @@ namespace llvm {
|
||||
/// to selectively generate basic block sections.
|
||||
std::shared_ptr<MemoryBuffer> BBSectionsFuncListBuf;
|
||||
|
||||
+ /// The flag enables call site info production. It is used only for debug
|
||||
+ /// info, and it is restricted only to optimized code. This can be used for
|
||||
+ /// something else, so that should be controlled in the frontend.
|
||||
+ unsigned EmitCallSiteInfo : 1;
|
||||
/// Emit debug info about parameter's entry values.
|
||||
unsigned EnableDebugEntryValues : 1;
|
||||
|
||||
diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
|
||||
index 10157c746b46..f955bdc6186a 100644
|
||||
--- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
|
||||
+++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
|
||||
@@ -381,11 +381,11 @@ bool MIRParserImpl::initializeCallSiteInfo(
|
||||
CSInfo.emplace_back(Reg, ArgRegPair.ArgNo);
|
||||
}
|
||||
|
||||
- if (TM.Options.EnableDebugEntryValues)
|
||||
+ if (TM.Options.EmitCallSiteInfo)
|
||||
MF.addCallArgsForwardingRegs(&*CallI, std::move(CSInfo));
|
||||
}
|
||||
|
||||
- if (YamlMF.CallSitesInfo.size() && !TM.Options.EnableDebugEntryValues)
|
||||
+ if (YamlMF.CallSitesInfo.size() && !TM.Options.EmitCallSiteInfo)
|
||||
return error(Twine("Call site info provided but not used"));
|
||||
return false;
|
||||
}
|
||||
diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
|
||||
index 4612690644fe..c3795b7ed314 100644
|
||||
--- a/llvm/lib/CodeGen/MachineFunction.cpp
|
||||
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
|
||||
@@ -855,7 +855,7 @@ MachineFunction::CallSiteInfoMap::iterator
|
||||
MachineFunction::getCallSiteInfo(const MachineInstr *MI) {
|
||||
assert(MI->isCall() && "Call site info refers only to call instructions!");
|
||||
|
||||
- if (!Target.Options.EnableDebugEntryValues)
|
||||
+ if (!Target.Options.EmitCallSiteInfo)
|
||||
return CallSitesInfo.end();
|
||||
return CallSitesInfo.find(MI);
|
||||
}
|
||||
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
|
||||
index 0e4d783e3505..52099f24aca5 100644
|
||||
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
|
||||
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
|
||||
@@ -863,7 +863,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
|
||||
MI = &*std::next(Before);
|
||||
}
|
||||
|
||||
- if (MI->isCall() && DAG->getTarget().Options.EnableDebugEntryValues)
|
||||
+ if (MI->isCall() && DAG->getTarget().Options.EmitCallSiteInfo)
|
||||
MF.addCallArgsForwardingRegs(MI, DAG->getSDCallSiteInfo(Node));
|
||||
|
||||
return MI;
|
||||
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
|
||||
index 23f05eaad944..63ff3031a5e8 100644
|
||||
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
|
||||
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
|
||||
@@ -4132,7 +4132,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
|
||||
RegsToPass.emplace_back(VA.getLocReg(), Arg);
|
||||
RegsUsed.insert(VA.getLocReg());
|
||||
const TargetOptions &Options = DAG.getTarget().Options;
|
||||
- if (Options.EnableDebugEntryValues)
|
||||
+ if (Options.EmitCallSiteInfo)
|
||||
CSInfo.emplace_back(VA.getLocReg(), i);
|
||||
}
|
||||
} else {
|
||||
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
|
||||
index 9f504b1eaa42..5589ba34a2ac 100644
|
||||
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
|
||||
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
|
||||
@@ -2222,7 +2222,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
isThisReturn = true;
|
||||
}
|
||||
const TargetOptions &Options = DAG.getTarget().Options;
|
||||
- if (Options.EnableDebugEntryValues)
|
||||
+ if (Options.EmitCallSiteInfo)
|
||||
CSInfo.emplace_back(VA.getLocReg(), i);
|
||||
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
|
||||
} else if (isByVal) {
|
||||
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
index d5de94aeb8a2..4808bdf6ddc2 100644
|
||||
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
@@ -4030,7 +4030,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
} else if (VA.isRegLoc()) {
|
||||
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
|
||||
const TargetOptions &Options = DAG.getTarget().Options;
|
||||
- if (Options.EnableDebugEntryValues)
|
||||
+ if (Options.EmitCallSiteInfo)
|
||||
CSInfo.emplace_back(VA.getLocReg(), I);
|
||||
if (isVarArg && IsWin64) {
|
||||
// Win64 ABI requires argument XMM reg to be copied to the corresponding
|
||||
diff --git a/llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir b/llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir
|
||||
index 5ffa0293a2e1..fec542223fc9 100644
|
||||
--- a/llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir
|
||||
+++ b/llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir
|
||||
@@ -1,4 +1,4 @@
|
||||
-# RUN: llc -debug-entry-values -run-pass=none -verify-machineinstrs -o - %s | FileCheck %s
|
||||
+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=none -verify-machineinstrs -o - %s | FileCheck %s
|
||||
|
||||
# Verify that it is possible to read and write MIR where a callSites entry
|
||||
# points to a call residing in a bundle. The offset should point to the call
|
||||
diff --git a/llvm/test/CodeGen/X86/call-site-info-output.ll b/llvm/test/CodeGen/X86/call-site-info-output.ll
|
||||
index 4b1e236aadfe..a0438f0c2b98 100644
|
||||
--- a/llvm/test/CodeGen/X86/call-site-info-output.ll
|
||||
+++ b/llvm/test/CodeGen/X86/call-site-info-output.ll
|
||||
@@ -1,6 +1,6 @@
|
||||
; Test call site info MIR printer and parser.Parser assertions and machine
|
||||
; verifier will check the rest;
|
||||
-; RUN: llc -debug-entry-values %s -stop-before=finalize-isel -o %t.mir
|
||||
+; RUN: llc -emit-call-site-info -debug-entry-values %s -stop-before=finalize-isel -o %t.mir
|
||||
; RUN: cat %t.mir | FileCheck %s
|
||||
; CHECK: name: fn2
|
||||
; CHECK: callSites:
|
||||
@@ -10,7 +10,7 @@
|
||||
; CHECK-NEXT: arg: 0, reg: '$edi'
|
||||
; CHECK-NEXT: arg: 1, reg: '$esi'
|
||||
; CHECK-NEXT: arg: 2, reg: '$edx'
|
||||
-; RUN: llc -debug-entry-values %t.mir -run-pass=finalize-isel -o -| FileCheck %s --check-prefix=PARSER
|
||||
+; RUN: llc -emit-call-site-info -debug-entry-values %t.mir -run-pass=finalize-isel -o -| FileCheck %s --check-prefix=PARSER
|
||||
; Verify that we are able to parse output mir and that we are getting the same result.
|
||||
; PARSER: name: fn2
|
||||
; PARSER: callSites:
|
||||
diff --git a/llvm/test/DebugInfo/AArch64/call-site-info-output.ll b/llvm/test/DebugInfo/AArch64/call-site-info-output.ll
|
||||
index d52d6962f3c4..17d9f7f18762 100644
|
||||
--- a/llvm/test/DebugInfo/AArch64/call-site-info-output.ll
|
||||
+++ b/llvm/test/DebugInfo/AArch64/call-site-info-output.ll
|
||||
@@ -1,4 +1,4 @@
|
||||
-; RUN: llc -mtriple aarch64-linux-gnu -debug-entry-values %s -o - -stop-before=finalize-isel | FileCheck %s
|
||||
+; RUN: llc -emit-call-site-info -mtriple aarch64-linux-gnu -debug-entry-values %s -o - -stop-before=finalize-isel | FileCheck %s
|
||||
; Verify that Selection DAG knows how to recognize simple function parameter forwarding registers.
|
||||
; Produced from:
|
||||
; extern int fn1(int,int,int);
|
||||
diff --git a/llvm/test/DebugInfo/ARM/call-site-info-output.ll b/llvm/test/DebugInfo/ARM/call-site-info-output.ll
|
||||
index 9255a7d57dde..ed726dfe753f 100644
|
||||
--- a/llvm/test/DebugInfo/ARM/call-site-info-output.ll
|
||||
+++ b/llvm/test/DebugInfo/ARM/call-site-info-output.ll
|
||||
@@ -1,4 +1,4 @@
|
||||
-; RUN: llc -mtriple arm-linux-gnu -debug-entry-values %s -o - -stop-before=finalize-isel | FileCheck %s
|
||||
+; RUN: llc -emit-call-site-info -mtriple arm-linux-gnu -debug-entry-values %s -o - -stop-before=finalize-isel | FileCheck %s
|
||||
; Verify that Selection DAG knows how to recognize simple function parameter forwarding registers.
|
||||
; Produced from:
|
||||
; extern int fn1(int,int,int);
|
||||
diff --git a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpret-movzxi.mir b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpret-movzxi.mir
|
||||
index dc7561ca6400..057779a90721 100644
|
||||
--- a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpret-movzxi.mir
|
||||
+++ b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpret-movzxi.mir
|
||||
@@ -1,4 +1,4 @@
|
||||
-# RUN: llc -mtriple aarch64-linux-gnu -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
|
||||
+# RUN: llc -emit-call-site-info -mtriple aarch64-linux-gnu -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
|
||||
#
|
||||
# Based on the following C reproducer:
|
||||
#
|
||||
diff --git a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpretation.mir b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpretation.mir
|
||||
index 0371ccef603e..d925bc395878 100644
|
||||
--- a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpretation.mir
|
||||
+++ b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpretation.mir
|
||||
@@ -1,4 +1,4 @@
|
||||
-# RUN: llc -mtriple aarch64-linux-gnu -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
|
||||
+# RUN: llc -emit-call-site-info -mtriple aarch64-linux-gnu -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
|
||||
# Following code is used for producing this test case. Note that
|
||||
# some of argument loading instruction are modified in order to
|
||||
# cover certain cases.
|
||||
diff --git a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-orr-moves.mir b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-orr-moves.mir
|
||||
index 916a14022ba5..4a87dad3b9b5 100644
|
||||
--- a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-orr-moves.mir
|
||||
+++ b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-orr-moves.mir
|
||||
@@ -1,4 +1,4 @@
|
||||
-# RUN: llc -debug-entry-values -start-after=livedebugvalues -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
|
||||
+# RUN: llc -emit-call-site-info -debug-entry-values -start-after=livedebugvalues -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
|
||||
|
||||
# Based on the following C reproducer:
|
||||
#
|
||||
diff --git a/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir b/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir
|
||||
index fbf9b3454689..d85f2d25391d 100644
|
||||
--- a/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir
|
||||
+++ b/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir
|
||||
@@ -1,4 +1,4 @@
|
||||
-# RUN: llc -start-after=livedebugvalues -filetype=obj -o - %s \
|
||||
+# RUN: llc -emit-call-site-info -start-after=livedebugvalues -filetype=obj -o - %s \
|
||||
# RUN: | llvm-dwarfdump -v - | FileCheck %s
|
||||
|
||||
# This tests for a crash in DwarfDebug's singular DBG_VALUE range promotion when
|
||||
diff --git a/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-interpretation.mir b/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-interpretation.mir
|
||||
index ce8dc97f0e72..0ae4e6ec485c 100644
|
||||
--- a/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-interpretation.mir
|
||||
+++ b/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-interpretation.mir
|
||||
@@ -1,4 +1,4 @@
|
||||
-# RUN: llc -mtriple=arm-linux-gnueabi -debug-entry-values -filetype=obj -start-after=machineverifier %s -o -| llvm-dwarfdump -| FileCheck %s
|
||||
+# RUN: llc -emit-call-site-info -mtriple=arm-linux-gnueabi -debug-entry-values -filetype=obj -start-after=machineverifier %s -o -| llvm-dwarfdump -| FileCheck %s
|
||||
# Following code is used for producing this test case. Note that
|
||||
# some of argument loading instruction are modified in order to
|
||||
# cover certain cases.
|
||||
diff --git a/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir b/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir
|
||||
index 9001c8ba8eea..5b84d9e9627f 100644
|
||||
--- a/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir
|
||||
+++ b/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir
|
||||
@@ -1,4 +1,4 @@
|
||||
-# RUN: llc -debug-entry-values -run-pass=livedebugvalues -o - %s | FileCheck %s
|
||||
+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -o - %s | FileCheck %s
|
||||
|
||||
# Based on the following C reproducer:
|
||||
#
|
||||
diff --git a/llvm/test/DebugInfo/MIR/ARM/if-coverter-call-site-info.mir b/llvm/test/DebugInfo/MIR/ARM/if-coverter-call-site-info.mir
|
||||
index aa7b54c1e5bb..11e9c4c90836 100644
|
||||
--- a/llvm/test/DebugInfo/MIR/ARM/if-coverter-call-site-info.mir
|
||||
+++ b/llvm/test/DebugInfo/MIR/ARM/if-coverter-call-site-info.mir
|
||||
@@ -1,4 +1,4 @@
|
||||
-# RUN: llc -mtriple=arm-linux-gnu -debug-entry-values -run-pass if-converter %s -o -| FileCheck %s
|
||||
+# RUN: llc -emit-call-site-info -mtriple=arm-linux-gnu -debug-entry-values -run-pass if-converter %s -o -| FileCheck %s
|
||||
|
||||
# Vefify that the call site info will be updated after the optimization.
|
||||
# This test case would previously trigger an assertion when
|
||||
diff --git a/llvm/test/DebugInfo/MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir b/llvm/test/DebugInfo/MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir
|
||||
index 8ae628af2c09..3ae23d4189bf 100644
|
||||
--- a/llvm/test/DebugInfo/MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir
|
||||
+++ b/llvm/test/DebugInfo/MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir
|
||||
@@ -1,4 +1,4 @@
|
||||
-# RUN: llc -mtriple hexagon -debug-entry-values -start-after=machineverifier -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s
|
||||
+# RUN: llc -mtriple hexagon -emit-call-site-info -debug-entry-values -start-after=machineverifier -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s
|
||||
|
||||
# Based on the following C reproducer:
|
||||
#
|
||||
diff --git a/llvm/test/DebugInfo/MIR/Hexagon/live-debug-values-bundled-entry-values.mir b/llvm/test/DebugInfo/MIR/Hexagon/live-debug-values-bundled-entry-values.mir
|
||||
index ff0a539dd15d..8bb0b3202acd 100644
|
||||
--- a/llvm/test/DebugInfo/MIR/Hexagon/live-debug-values-bundled-entry-values.mir
|
||||
+++ b/llvm/test/DebugInfo/MIR/Hexagon/live-debug-values-bundled-entry-values.mir
|
||||
@@ -1,4 +1,4 @@
|
||||
-# RUN: llc -debug-entry-values -run-pass=livedebugvalues -o - %s | FileCheck %s
|
||||
+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -o - %s | FileCheck %s
|
||||
|
||||
# Verify that the entry values for the input parameters are inserted after the
|
||||
# bundles which contains the registers' clobbering instructions (the calls to
|
||||
diff --git a/llvm/test/DebugInfo/MIR/SystemZ/call-site-lzer.mir b/llvm/test/DebugInfo/MIR/SystemZ/call-site-lzer.mir
|
||||
index 8a4e8b5632c2..3cf41467f7f9 100644
|
||||
--- a/llvm/test/DebugInfo/MIR/SystemZ/call-site-lzer.mir
|
||||
+++ b/llvm/test/DebugInfo/MIR/SystemZ/call-site-lzer.mir
|
||||
@@ -1,4 +1,4 @@
|
||||
-# RUN: llc -debug-entry-values -start-after=livedebugvalues -o - %s | FileCheck %s
|
||||
+# RUN: llc -emit-call-site-info -debug-entry-values -start-after=livedebugvalues -o - %s | FileCheck %s
|
||||
|
||||
# This test would previously trigger an assertion when trying to describe the
|
||||
# call site value for callee()'s float parameter.
|
||||
diff --git a/llvm/test/DebugInfo/MIR/X86/DW_OP_entry_value.mir b/llvm/test/DebugInfo/MIR/X86/DW_OP_entry_value.mir
|
||||
index e6fe5d2de878..4e5a07321d42 100644
|
||||
--- a/llvm/test/DebugInfo/MIR/X86/DW_OP_entry_value.mir
|
||||
+++ b/llvm/test/DebugInfo/MIR/X86/DW_OP_entry_value.mir
|
||||
@@ -1,4 +1,4 @@
|
||||
-# RUN: llc -debug-entry-values -start-before=livedebugvalues -mtriple=x86_64-apple-darwin -o %t %s -filetype=obj
|
||||
+# RUN: llc -emit-call-site-info -debug-entry-values -start-before=livedebugvalues -mtriple=x86_64-apple-darwin -o %t %s -filetype=obj
|
||||
# RUN: llvm-dwarfdump %t | FileCheck %s
|
||||
#
|
||||
# int global;
|
||||
diff --git a/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir b/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir
|
||||
index c32a1155d038..edeef2c7aed4 100644
|
||||
--- a/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir
|
||||
+++ b/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir
|
||||
@@ -1,6 +1,6 @@
|
||||
# Check that llvm can describe a call site parameter which resides in a spill slot.
|
||||
#
|
||||
-# RUN: llc -debug-entry-values -start-after=machineverifier -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s
|
||||
+# RUN: llc -emit-call-site-info -debug-entry-values -start-after=machineverifier -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s
|
||||
#
|
||||
# Command:
|
||||
# $ ~/src/builds/llvm-project-master-RA/bin/clang -g -Xclang -femit-debug-entry-values -O2 -c -o spill.o spill.cc -mllvm -stop-before=machineverifier -o spill.mir
|
||||
diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir
|
||||
index a2d51a203512..01a2b887a60b 100644
|
||||
--- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir
|
||||
+++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir
|
||||
@@ -1,4 +1,4 @@
|
||||
-# RUN: llc -debug-entry-values -start-after=livedebugvalues -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
|
||||
+# RUN: llc -emit-call-site-info -debug-entry-values -start-after=livedebugvalues -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
|
||||
|
||||
# Based on the following reproducer:
|
||||
#
|
||||
diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir
|
||||
index f9e9459f1abd..104bc0146798 100644
|
||||
--- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir
|
||||
+++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir
|
||||
@@ -1,4 +1,4 @@
|
||||
-# RUN: llc -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
|
||||
+# RUN: llc -emit-call-site-info -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
|
||||
#
|
||||
# CHECK: DW_TAG_GNU_call_site
|
||||
# CHECK-NEXT: DW_AT_abstract_origin {{.*}} "foo"
|
||||
diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir
|
||||
index 1bb70f6d4530..4d88fa9aab74 100644
|
||||
--- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir
|
||||
+++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir
|
||||
@@ -1,4 +1,4 @@
|
||||
-# RUN: llc -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
|
||||
+# RUN: llc -emit-call-site-info -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
|
||||
# CHECK: DW_TAG_GNU_call_site
|
||||
# CHECK-NEXT: DW_AT_abstract_origin {{.*}} "foo")
|
||||
# CHECK-NEXT: DW_AT_low_pc {{.*}}
|
||||
diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir
|
||||
index 235787573f51..81af598ba194 100644
|
||||
--- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir
|
||||
+++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir
|
||||
@@ -1,4 +1,4 @@
|
||||
-# RUN: llc -debug-entry-values -start-before=livedebugvalues -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
|
||||
+# RUN: llc -emit-call-site-info -debug-entry-values -start-before=livedebugvalues -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
|
||||
|
||||
# Based on the following C++ code:
|
||||
# struct A { A(A &) {} };
|
||||
diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir
|
||||
index db0934c595c3..46adedd1be44 100644
|
||||
--- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir
|
||||
+++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir
|
||||
@@ -1,4 +1,4 @@
|
||||
-# RUN: llc -O1 -debug-entry-values -start-after=livedebugvalues -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s
|
||||
+# RUN: llc -O1 -emit-call-site-info -debug-entry-values -start-after=livedebugvalues -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s
|
||||
|
||||
# Based on the following C reproducer:
|
||||
#
|
||||
diff --git a/llvm/test/DebugInfo/MIR/X86/dbginfo-entryvals.mir b/llvm/test/DebugInfo/MIR/X86/dbginfo-entryvals.mir
|
||||
index 5d203029936e..1d7b64f169d1 100644
|
||||
--- a/llvm/test/DebugInfo/MIR/X86/dbginfo-entryvals.mir
|
||||
+++ b/llvm/test/DebugInfo/MIR/X86/dbginfo-entryvals.mir
|
||||
@@ -1,4 +1,4 @@
|
||||
-# RUN: llc -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s
|
||||
+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s
|
||||
#
|
||||
#extern void fn2(int);
|
||||
#
|
||||
diff --git a/llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir b/llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir
|
||||
index e79be66cd4e3..c39bc4db50be 100644
|
||||
--- a/llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir
|
||||
+++ b/llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir
|
||||
@@ -2,8 +2,8 @@
|
||||
# When the debugger tuning is set to gdb, use GNU opcodes.
|
||||
# For lldb, use the standard DWARF5 opcodes.
|
||||
|
||||
-# RUN: llc -debug-entry-values -debugger-tune=gdb -filetype=obj -mtriple=x86_64-unknown-unknown -start-after=machineverifier -o - %s | llvm-dwarfdump - | FileCheck %s -check-prefixes=CHECK-GNU
|
||||
-# RUN: llc -debug-entry-values -debugger-tune=lldb -filetype=obj -mtriple=x86_64-unknown-unknown -start-after=machineverifier -o - %s | llvm-dwarfdump - | FileCheck %s -check-prefixes=CHECK-DWARF5
|
||||
+# RUN: llc -emit-call-site-info -debug-entry-values -debugger-tune=gdb -filetype=obj -mtriple=x86_64-unknown-unknown -start-after=machineverifier -o - %s | llvm-dwarfdump - | FileCheck %s -check-prefixes=CHECK-GNU
|
||||
+# RUN: llc -emit-call-site-info -debug-entry-values -debugger-tune=lldb -filetype=obj -mtriple=x86_64-unknown-unknown -start-after=machineverifier -o - %s | llvm-dwarfdump - | FileCheck %s -check-prefixes=CHECK-DWARF5
|
||||
#
|
||||
# extern void foo(int *a, int b, int c, int d, int e, int f);
|
||||
# extern int getVal();
|
||||
diff --git a/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir b/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir
|
||||
index 8d121c3a30b9..c7f15aaaa562 100644
|
||||
--- a/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir
|
||||
+++ b/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir
|
||||
@@ -1,4 +1,4 @@
|
||||
-# RUN: llc -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s
|
||||
+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s
|
||||
#
|
||||
#extern void fn1 (int, int, int);
|
||||
#
|
||||
diff --git a/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir b/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir
|
||||
index 2396daada876..aa8fdd7afd47 100644
|
||||
--- a/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir
|
||||
+++ b/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir
|
||||
@@ -1,4 +1,4 @@
|
||||
-# RUN: llc -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s
|
||||
+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s
|
||||
#
|
||||
# The test case was artificially adjusted, in order to make proper diamond basic
|
||||
# block structure relevant to the debug entry values propagation.
|
||||
diff --git a/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir b/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir
|
||||
index 86b1cddaa462..c5af863954bf 100644
|
||||
--- a/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir
|
||||
+++ b/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir
|
||||
@@ -1,4 +1,4 @@
|
||||
-# RUN: llc -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s
|
||||
+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s
|
||||
#
|
||||
#extern void fn1 (int, int, int);
|
||||
#__attribute__((noinline))
|
||||
diff --git a/llvm/test/DebugInfo/MIR/X86/unreachable-block-call-site.mir b/llvm/test/DebugInfo/MIR/X86/unreachable-block-call-site.mir
|
||||
index d282d796f6d7..ea9c12b5a192 100644
|
||||
--- a/llvm/test/DebugInfo/MIR/X86/unreachable-block-call-site.mir
|
||||
+++ b/llvm/test/DebugInfo/MIR/X86/unreachable-block-call-site.mir
|
||||
@@ -1,4 +1,4 @@
|
||||
-# RUN: llc -mtriple=x86_64-pc-linux -debug-entry-values -run-pass=unreachable-mbb-elimination -o - %s | FileCheck %s
|
||||
+# RUN: llc -mtriple=x86_64-pc-linux -emit-call-site-info -debug-entry-values -run-pass=unreachable-mbb-elimination -o - %s | FileCheck %s
|
||||
|
||||
# Verify that the call site information for the call residing in the eliminated
|
||||
# block is removed. This test case would previously trigger an assertion when
|
||||
diff --git a/llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll b/llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll
|
||||
index b698f1cdbfe8..b8cd9574cc63 100644
|
||||
--- a/llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll
|
||||
+++ b/llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll
|
||||
@@ -1,4 +1,4 @@
|
||||
-; RUN: llc -O1 -debug-entry-values -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
|
||||
+; RUN: llc -O1 -emit-call-site-info -debug-entry-values -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
|
||||
|
||||
; Verify that the 64-bit call site immediates are not truncated.
|
||||
;
|
||||
diff --git a/llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll b/llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll
|
||||
index 9fe67f82a2b4..5d37774f55d6 100644
|
||||
--- a/llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll
|
||||
+++ b/llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll
|
||||
@@ -1,4 +1,4 @@
|
||||
-; RUN: llc -O3 -debug-entry-values -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
|
||||
+; RUN: llc -O3 -emit-call-site-info -debug-entry-values -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll b/llvm/test/tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll
|
||||
index c304e9d768a5..d126757398ff 100644
|
||||
--- a/llvm/test/tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll
|
||||
+++ b/llvm/test/tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll
|
||||
@@ -1,4 +1,4 @@
|
||||
-; RUN: llc -debug-entry-values %s -o - -filetype=obj \
|
||||
+; RUN: llc -emit-call-site-info -debug-entry-values %s -o - -filetype=obj \
|
||||
; RUN: | llvm-dwarfdump -statistics - | FileCheck %s
|
||||
;
|
||||
; The LLVM IR file was generated on this source code by using
|
||||
--
|
||||
2.33.1
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -1,237 +0,0 @@
|
|||
From d51fdb9f2986747a56c593fa057d531720b39deb Mon Sep 17 00:00:00 2001
|
||||
From: Sriraman Tallam <tmsriram@google.com>
|
||||
Date: Fri, 13 Mar 2020 15:58:57 -0700
|
||||
Subject: [PATCH] Basic Block Sections Support.
|
||||
|
||||
This is the first in a series of patches to enable Basic Block Sections
|
||||
in LLVM.
|
||||
|
||||
We introduce a new compiler option, -fbasicblock-sections=, which places every
|
||||
basic block in a unique ELF text section in the object file along with a
|
||||
symbol labeling the basic block. The linker can then order the basic block
|
||||
sections in any arbitrary sequence which when done correctly can encapsulate
|
||||
block layout, function layout and function splitting optimizations. However,
|
||||
there are a couple of challenges to be addressed for this to be feasible:
|
||||
|
||||
1) The compiler must not allow any implicit fall-through between any two
|
||||
adjacent basic blocks as they could be reordered at link time to be
|
||||
non-adjacent. In other words, the compiler must make a fall-through
|
||||
between adjacent basic blocks explicit by retaining the direct jump
|
||||
instruction that jumps to the next basic block. These branches can only
|
||||
be removed later by the linker after the blocks have been reordered.
|
||||
2) All inter-basic block branch targets would now need to be resolved by
|
||||
the linker as they cannot be calculated during compile time. This is
|
||||
done using static relocations which bloats the size of the object files.
|
||||
Further, the compiler tries to use short branch instructions on some ISAs
|
||||
for branch offsets that can be accommodated in one byte. This is not
|
||||
possible with basic block sections as the offset is not determined at
|
||||
compile time, and long branch instructions have to be used everywhere.
|
||||
3) Each additional section bloats object file sizes by tens of bytes. The
|
||||
number of basic blocks can be potentially very large compared to the
|
||||
size of functions and can bloat object sizes significantly. Option
|
||||
fbasicblock-sections= also takes a file path which can be used to
|
||||
specify a subset of basic blocks that needs unique sections to keep
|
||||
the bloats small.
|
||||
4) Debug Info and CFI need special handling and will be presented as
|
||||
separate patches.
|
||||
|
||||
Basic Block Labels
|
||||
|
||||
With -fbasicblock-sections=labels, or when a basic block is placed in a
|
||||
unique section, it is labelled with a symbol. This allows easy mapping of
|
||||
virtual addresses from PMU profiles back to the corresponding basic blocks.
|
||||
Since the number of basic blocks is large, the labeling bloats the symbol
|
||||
table sizes and the string table sizes significantly. While the binary size
|
||||
does increase, it does not affect performance as the symbol table is not
|
||||
loaded in memory during run-time. The string table size bloat is kept very
|
||||
minimal using a unary naming scheme that uses string suffix compression.
|
||||
The basic blocks for function foo are named "a.BB.foo", "aa.BB.foo", ...
|
||||
This turns out to be very good for string table sizes and the bloat in the
|
||||
string table size for a very large binary is ~8 %. The naming also allows
|
||||
using the --symbol-ordering-file option in LLD to arbitrarily reorder the
|
||||
sections.
|
||||
|
||||
Differential Revision: https://reviews.llvm.org/D68063
|
||||
|
||||
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/4dfe92e46542be46d634a7ec24da2f2f889623d0]
|
||||
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
|
||||
---
|
||||
llvm/include/llvm/CodeGen/CommandFlags.inc | 34 ++++++++++++++++++++++
|
||||
llvm/include/llvm/Target/TargetMachine.h | 14 +++++++++
|
||||
llvm/include/llvm/Target/TargetOptions.h | 31 ++++++++++++++++++--
|
||||
3 files changed, 76 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/llvm/include/llvm/CodeGen/CommandFlags.inc b/llvm/include/llvm/CodeGen/CommandFlags.inc
|
||||
index 8739b644873d..6475a5b19edb 100644
|
||||
--- a/llvm/include/llvm/CodeGen/CommandFlags.inc
|
||||
+++ b/llvm/include/llvm/CodeGen/CommandFlags.inc
|
||||
@@ -238,6 +238,12 @@ static cl::opt<bool>
|
||||
cl::desc("Emit functions into separate sections"),
|
||||
cl::init(false));
|
||||
|
||||
+static cl::opt<std::string>
|
||||
+ BBSections("basicblock-sections",
|
||||
+ cl::desc("Emit basic blocks into separate sections"),
|
||||
+ cl::value_desc("all | <function list (file)> | labels | none"),
|
||||
+ cl::init("none"));
|
||||
+
|
||||
static cl::opt<unsigned> TLSSize("tls-size",
|
||||
cl::desc("Bit size of immediate TLS offsets"),
|
||||
cl::init(0));
|
||||
@@ -251,6 +257,11 @@ static cl::opt<bool>
|
||||
cl::desc("Give unique names to every section"),
|
||||
cl::init(true));
|
||||
|
||||
+static cl::opt<bool> UniqueBBSectionNames(
|
||||
+ "unique-bb-section-names",
|
||||
+ cl::desc("Give unique names to every basic block section"),
|
||||
+ cl::init(false));
|
||||
+
|
||||
static cl::opt<llvm::EABI>
|
||||
EABIVersion("meabi", cl::desc("Set EABI type (default depends on triple):"),
|
||||
cl::init(EABI::Default),
|
||||
@@ -285,6 +296,27 @@ static cl::opt<bool>
|
||||
cl::desc("Always emit a debug frame section."),
|
||||
cl::init(false));
|
||||
|
||||
+static llvm::BasicBlockSection
|
||||
+getBBSectionsMode(llvm::TargetOptions &Options) {
|
||||
+ if (BBSections == "all")
|
||||
+ return BasicBlockSection::All;
|
||||
+ else if (BBSections == "labels")
|
||||
+ return BasicBlockSection::Labels;
|
||||
+ else if (BBSections == "none")
|
||||
+ return BasicBlockSection::None;
|
||||
+ else {
|
||||
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
|
||||
+ MemoryBuffer::getFile(BBSections);
|
||||
+ if (!MBOrErr) {
|
||||
+ errs() << "Error loading basic block sections function list file: "
|
||||
+ << MBOrErr.getError().message() << "\n";
|
||||
+ } else {
|
||||
+ Options.BBSectionsFuncListBuf = std::move(*MBOrErr);
|
||||
+ }
|
||||
+ return BasicBlockSection::List;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
// Common utility function tightly tied to the options listed here. Initializes
|
||||
// a TargetOptions object with CodeGen flags and returns it.
|
||||
static TargetOptions InitTargetOptionsFromCodeGenFlags() {
|
||||
@@ -308,7 +340,9 @@ static TargetOptions InitTargetOptionsFromCodeGenFlags() {
|
||||
Options.RelaxELFRelocations = RelaxELFRelocations;
|
||||
Options.DataSections = DataSections;
|
||||
Options.FunctionSections = FunctionSections;
|
||||
+ Options.BBSections = getBBSectionsMode(Options);
|
||||
Options.UniqueSectionNames = UniqueSectionNames;
|
||||
+ Options.UniqueBBSectionNames = UniqueBBSectionNames;
|
||||
Options.TLSSize = TLSSize;
|
||||
Options.EmulatedTLS = EmulatedTLS;
|
||||
Options.ExplicitEmulatedTLS = EmulatedTLS.getNumOccurrences() > 0;
|
||||
diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
|
||||
index 176ae39b17a7..4a1f3377f31d 100644
|
||||
--- a/llvm/include/llvm/Target/TargetMachine.h
|
||||
+++ b/llvm/include/llvm/Target/TargetMachine.h
|
||||
@@ -242,6 +242,9 @@ public:
|
||||
|
||||
bool getUniqueSectionNames() const { return Options.UniqueSectionNames; }
|
||||
|
||||
+ /// Return true if unique basic block section names must be generated.
|
||||
+ bool getUniqueBBSectionNames() const { return Options.UniqueBBSectionNames; }
|
||||
+
|
||||
/// Return true if data objects should be emitted into their own section,
|
||||
/// corresponds to -fdata-sections.
|
||||
bool getDataSections() const {
|
||||
@@ -254,6 +257,17 @@ public:
|
||||
return Options.FunctionSections;
|
||||
}
|
||||
|
||||
+ /// If basic blocks should be emitted into their own section,
|
||||
+ /// corresponding to -fbasicblock-sections.
|
||||
+ llvm::BasicBlockSection getBBSectionsType() const {
|
||||
+ return Options.BBSections;
|
||||
+ }
|
||||
+
|
||||
+ /// Get the list of functions and basic block ids that need unique sections.
|
||||
+ const MemoryBuffer *getBBSectionsFuncListBuf() const {
|
||||
+ return Options.BBSectionsFuncListBuf.get();
|
||||
+ }
|
||||
+
|
||||
/// Get a \c TargetIRAnalysis appropriate for the target.
|
||||
///
|
||||
/// This is used to construct the new pass manager's target IR analysis pass,
|
||||
diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h
|
||||
index 84c6ee2a6387..d27c7b0178f0 100644
|
||||
--- a/llvm/include/llvm/Target/TargetOptions.h
|
||||
+++ b/llvm/include/llvm/Target/TargetOptions.h
|
||||
@@ -16,8 +16,11 @@
|
||||
|
||||
#include "llvm/MC/MCTargetOptions.h"
|
||||
|
||||
+#include <memory>
|
||||
+
|
||||
namespace llvm {
|
||||
class MachineFunction;
|
||||
+ class MemoryBuffer;
|
||||
class Module;
|
||||
|
||||
namespace FloatABI {
|
||||
@@ -63,6 +66,18 @@ namespace llvm {
|
||||
};
|
||||
}
|
||||
|
||||
+ enum class BasicBlockSection {
|
||||
+ All, // Use Basic Block Sections for all basic blocks. A section
|
||||
+ // for every basic block can significantly bloat object file sizes.
|
||||
+ List, // Get list of functions & BBs from a file. Selectively enables
|
||||
+ // basic block sections for a subset of basic blocks which can be
|
||||
+ // used to control object size bloats from creating sections.
|
||||
+ Labels, // Do not use Basic Block Sections but label basic blocks. This
|
||||
+ // is useful when associating profile counts from virtual addresses
|
||||
+ // to basic blocks.
|
||||
+ None // Do not use Basic Block Sections.
|
||||
+ };
|
||||
+
|
||||
enum class EABI {
|
||||
Unknown,
|
||||
Default, // Default means not specified
|
||||
@@ -114,9 +129,9 @@ namespace llvm {
|
||||
EnableFastISel(false), EnableGlobalISel(false), UseInitArray(false),
|
||||
DisableIntegratedAS(false), RelaxELFRelocations(false),
|
||||
FunctionSections(false), DataSections(false),
|
||||
- UniqueSectionNames(true), TrapUnreachable(false),
|
||||
- NoTrapAfterNoreturn(false), TLSSize(0), EmulatedTLS(false),
|
||||
- ExplicitEmulatedTLS(false), EnableIPRA(false),
|
||||
+ UniqueSectionNames(true), UniqueBBSectionNames(false),
|
||||
+ TrapUnreachable(false), NoTrapAfterNoreturn(false), TLSSize(0),
|
||||
+ EmulatedTLS(false), ExplicitEmulatedTLS(false), EnableIPRA(false),
|
||||
EmitStackSizeSection(false), EnableMachineOutliner(false),
|
||||
SupportsDefaultOutlining(false), EmitAddrsig(false),
|
||||
EnableDebugEntryValues(false), ForceDwarfFrameSection(false) {}
|
||||
@@ -224,6 +239,9 @@ namespace llvm {
|
||||
|
||||
unsigned UniqueSectionNames : 1;
|
||||
|
||||
+ /// Use unique names for basic block sections.
|
||||
+ unsigned UniqueBBSectionNames : 1;
|
||||
+
|
||||
/// Emit target-specific trap instruction for 'unreachable' IR instructions.
|
||||
unsigned TrapUnreachable : 1;
|
||||
|
||||
@@ -256,6 +274,13 @@ namespace llvm {
|
||||
/// Emit address-significance table.
|
||||
unsigned EmitAddrsig : 1;
|
||||
|
||||
+ /// Emit basic blocks into separate sections.
|
||||
+ BasicBlockSection BBSections = BasicBlockSection::None;
|
||||
+
|
||||
+ /// Memory Buffer that contains information on sampled basic blocks and used
|
||||
+ /// to selectively generate basic block sections.
|
||||
+ std::shared_ptr<MemoryBuffer> BBSectionsFuncListBuf;
|
||||
+
|
||||
/// Emit debug info about parameter's entry values.
|
||||
unsigned EnableDebugEntryValues : 1;
|
||||
|
||||
--
|
||||
2.33.1
|
||||
|
|
@ -1,33 +1,5 @@
|
|||
FILESEXTRAPATHS:prepend:intel-x86-common := "${THISDIR}/files:"
|
||||
|
||||
SPIRV_SRCREV = "fe4d6b767363a1995ccbfca27f79efb10dcfe110"
|
||||
|
||||
SRC_URI_LLVM10_PATCHES = " \
|
||||
file://llvm10-0001-llvm-spirv-skip-building-tests.patch;patchdir=llvm/projects/llvm-spirv \
|
||||
file://llvm10-0002-Fix-building-in-tree-with-cmake-DLLVM_LINK_LLVM_DYLI.patch;patchdir=llvm/projects/llvm-spirv \
|
||||
file://llvm10-0003-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch;patchdir=llvm/projects/llvm-spirv \
|
||||
file://BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch;patchdir=llvm \
|
||||
file://IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch;patchdir=llvm \
|
||||
file://llvm10-0001-OpenCL-3.0-support.patch \
|
||||
file://llvm10-0002-Add-cl_khr_extended_subgroup-extensions.patch \
|
||||
file://llvm10-0003-Memory-leak-fix-for-Managed-Static-Mutex.patch \
|
||||
file://llvm10-0004-Remove-repo-name-in-LLVM-IR.patch \
|
||||
file://llvm10-0005-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch \
|
||||
file://llvm10-0006-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch \
|
||||
file://llvm10-0007-support-cl_ext_float_atomics.patch \
|
||||
file://llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch \
|
||||
file://llvm10-0009-ispc-10_0_fix_for_1788.patch \
|
||||
file://llvm10-0010-ispc-10_0_fix_for_1793.patch \
|
||||
file://llvm10-0011-ispc-10_0_fix_for_1844.patch \
|
||||
file://llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch \
|
||||
file://llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch \
|
||||
file://llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch \
|
||||
file://llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch \
|
||||
file://llvm10-basic-block-sections-support.patch \
|
||||
file://llvm10-Enable-the-call-site-info-only-for-g-optimizations.patch \
|
||||
file://llvm10-Replace-MCTargetOptionsCommandFlags.inc-and-CommandF.patch \
|
||||
"
|
||||
|
||||
SRC_URI_LLVM12_PATCHES = " \
|
||||
file://llvm12-0001-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch \
|
||||
file://llvm12-0002-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch \
|
||||
|
@ -36,10 +8,4 @@ SRC_URI_LLVM12_PATCHES = " \
|
|||
file://llvm12-0005-ispc-12_0_fix_for_2111.patch \
|
||||
"
|
||||
|
||||
|
||||
SPIRV_LLVM10_SRC_URI = "git://github.com/KhronosGroup/SPIRV-LLVM-Translator.git;protocol=https;branch=llvm_release_100;destsuffix=git/llvm/projects/llvm-spirv;name=spirv"
|
||||
|
||||
SRC_URI:append:intel-x86-common = "${@bb.utils.contains('LLVMVERSION', '10.0.1', ' ${SPIRV_LLVM10_SRC_URI} ${SRC_URI_LLVM10_PATCHES} ', '', d)}"
|
||||
SRC_URI:append:intel-x86-common = "${@bb.utils.contains('LLVMVERSION', '12.0.0', ' ${SRC_URI_LLVM12_PATCHES} ', '', d)}"
|
||||
|
||||
SRCREV_spirv = "${@bb.utils.contains_any('LLVMVERSION', [ '13.0.0', '12.0.0' ], '', '${SPIRV_SRCREV}', d)}"
|
||||
|
|
|
@ -1,35 +0,0 @@
|
|||
From 7fc05c52dd91902fa324a7aac9b90715cfca4717 Mon Sep 17 00:00:00 2001
|
||||
From: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
Date: Wed, 15 Apr 2020 17:55:32 +0800
|
||||
Subject: [PATCH] Building in-tree with LLVM 10.0 with the LLVM_LINK_LLVM_DYLIB
|
||||
|
||||
Failed to link with the LLVMSPIRVLib library.
|
||||
|
||||
Add an explicit dependency to force the correct build order and linking.
|
||||
|
||||
Reference:
|
||||
https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/a6d4ccf082858e63e139ca06c02a071c343d2657
|
||||
|
||||
Upstream-Status: Submitted [https://github.com/intel/opencl-clang/pull/118]
|
||||
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
CMakeLists.txt | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index 51c140d..b8b514e 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -208,7 +208,7 @@ link_directories(
|
||||
|
||||
set(OPENCL_CLANG_LINK_LIBS ${CMAKE_DL_LIBS})
|
||||
|
||||
-if(NOT LLVMSPIRVLib IN_LIST LLVM_AVAILABLE_LIBS)
|
||||
+if(NOT LLVMSPIRVLib IN_LIST LLVM_AVAILABLE_LIBS OR (USE_PREBUILT_LLVM AND LLVM_LINK_LLVM_DYLIB))
|
||||
# SPIRV-LLVM-Translator is not included into LLVM as a component.
|
||||
# So, we need to list it here explicitly as an external library
|
||||
list(APPEND OPENCL_CLANG_LINK_LIBS LLVMSPIRVLib)
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -1,42 +0,0 @@
|
|||
From b29e00e6fe428a031cf577dfb703cf13eff837f6 Mon Sep 17 00:00:00 2001
|
||||
From: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
Date: Wed, 15 Apr 2020 18:05:14 +0800
|
||||
Subject: [PATCH 2/2] make sure only static libraries linked for native build
|
||||
|
||||
LINK_COMPONENTS=all isn't working for static libs for out of tree builds. Use
|
||||
LLVM_AVAILABLE_LIBS instead. Reported:
|
||||
|
||||
https://github.com/intel/opencl-clang/issues/114
|
||||
|
||||
Upstream-Status: Pending
|
||||
|
||||
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
CMakeLists.txt | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index 8707487..ad2dbda 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -218,7 +218,7 @@ add_subdirectory(cl_headers)
|
||||
|
||||
set(LLVM_REQUIRES_EH ON)
|
||||
|
||||
-if(USE_PREBUILT_LLVM OR CLANG_LINK_CLANG_DYLIB)
|
||||
+if(false)
|
||||
list(APPEND OPENCL_CLANG_LINK_LIBS clang-cpp)
|
||||
else()
|
||||
list(APPEND OPENCL_CLANG_LINK_LIBS
|
||||
@@ -266,6 +266,7 @@ add_llvm_library(${TARGET_NAME} SHARED
|
||||
all
|
||||
LINK_LIBS
|
||||
${OPENCL_CLANG_LINK_LIBS}
|
||||
+ ${LLVM_AVAILABLE_LIBS}
|
||||
)
|
||||
|
||||
# Configure resource file on Windows
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -1,15 +0,0 @@
|
|||
require opencl-clang.inc
|
||||
|
||||
SRC_URI:append = " file://0001-don-t-redefine-LLVM_TABLEGEN_EXE.patch \
|
||||
file://0001-Building-in-tree-with-LLVM-10.0-with-the-LLVM_LINK_L.patch \
|
||||
"
|
||||
SRC_URI:append:class-native = " file://0002-make-sure-only-static-libraries-linked-for-native-bu.patch"
|
||||
|
||||
BRANCH = "ocl-open-100"
|
||||
|
||||
SRCREV = "c8cd72e32b6abc18ce6da71c357ea45ba78b52f0"
|
||||
|
||||
EXTRA_OECMAKE += "\
|
||||
-DLLVM_TABLEGEN_EXE=${STAGING_BINDIR_NATIVE}/llvm-tblgen \
|
||||
-DCMAKE_SKIP_RPATH=TRUE \
|
||||
"
|
Loading…
Reference in New Issue
Block a user