mirror of
git://git.yoctoproject.org/meta-intel.git
synced 2025-07-19 21:09:03 +02:00
llvm-project-source: update SPIRV-LLVM-Translator 8.0.0 -> 9.0.0
Remove all the backported patches which are available in 9.0.0 release.
Few patches were recommended from llvm-patches repo:
https://github.com/intel/intel-graphics-compiler/blob/master/documentation/build_ubuntu.md
3906cc086f
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
This commit is contained in:
parent
420247ef2d
commit
1b076fd8db
|
@ -1,156 +0,0 @@
|
||||||
From 39a3ac0065c23d1e2d55dfd8792cc28a146a4307 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Alexey Bader <alexey.bader@intel.com>
|
|
||||||
Date: Tue, 19 Feb 2019 15:19:06 +0000
|
|
||||||
Subject: [PATCH 1/2] [OpenCL] Change type of block pointer for OpenCL
|
|
||||||
|
|
||||||
Summary:
|
|
||||||
|
|
||||||
For some reason OpenCL blocks in LLVM IR are represented as function pointers.
|
|
||||||
These pointers do not point to any real function and never get called. Actually
|
|
||||||
they point to some structure, which in turn contains pointer to the real block
|
|
||||||
invoke function.
|
|
||||||
This patch changes represntation of OpenCL blocks in LLVM IR from function
|
|
||||||
pointers to pointers to `%struct.__block_literal_generic`.
|
|
||||||
Such representation allows to avoid unnecessary bitcasts and simplifies
|
|
||||||
further processing (e.g. translation to SPIR-V ) of the module for targets
|
|
||||||
which do not support function pointers.
|
|
||||||
|
|
||||||
Patch by: Alexey Sotkin.
|
|
||||||
|
|
||||||
Reviewers: Anastasia, yaxunl, svenvh
|
|
||||||
|
|
||||||
Reviewed By: Anastasia
|
|
||||||
|
|
||||||
Subscribers: alexbatashev, cfe-commits
|
|
||||||
|
|
||||||
Tags: #clang
|
|
||||||
|
|
||||||
Differential Revision: https://reviews.llvm.org/D58277
|
|
||||||
|
|
||||||
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354337 91177308-0d34-0410-b5e6-96231b3b80d8
|
|
||||||
|
|
||||||
Upstream-Status: Backport
|
|
||||||
[https://github.com/llvm-mirror/clang/commit/283f308bdb5893bab1f36791711346e746045f94]
|
|
||||||
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
|
|
||||||
---
|
|
||||||
lib/CodeGen/CodeGenTypes.cpp | 4 +++-
|
|
||||||
test/CodeGenOpenCL/blocks.cl | 18 ++++++++----------
|
|
||||||
test/CodeGenOpenCL/cl20-device-side-enqueue.cl | 18 +++++++++---------
|
|
||||||
3 files changed, 20 insertions(+), 20 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp
|
|
||||||
index 2acf1ac..93b3ebf 100644
|
|
||||||
--- a/lib/CodeGen/CodeGenTypes.cpp
|
|
||||||
+++ b/lib/CodeGen/CodeGenTypes.cpp
|
|
||||||
@@ -637,7 +637,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
|
|
||||||
|
|
||||||
case Type::BlockPointer: {
|
|
||||||
const QualType FTy = cast<BlockPointerType>(Ty)->getPointeeType();
|
|
||||||
- llvm::Type *PointeeType = ConvertTypeForMem(FTy);
|
|
||||||
+ llvm::Type *PointeeType = CGM.getLangOpts().OpenCL
|
|
||||||
+ ? CGM.getGenericBlockLiteralType()
|
|
||||||
+ : ConvertTypeForMem(FTy);
|
|
||||||
unsigned AS = Context.getTargetAddressSpace(FTy);
|
|
||||||
ResultType = llvm::PointerType::get(PointeeType, AS);
|
|
||||||
break;
|
|
||||||
diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl
|
|
||||||
index 675240c..19aacc3 100644
|
|
||||||
--- a/test/CodeGenOpenCL/blocks.cl
|
|
||||||
+++ b/test/CodeGenOpenCL/blocks.cl
|
|
||||||
@@ -35,11 +35,10 @@ void foo(){
|
|
||||||
// SPIR: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]], i32 0, i32 3
|
|
||||||
// SPIR: %[[i_value:.*]] = load i32, i32* %i
|
|
||||||
// SPIR: store i32 %[[i_value]], i32* %[[block_captured]],
|
|
||||||
- // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to i32 ()*
|
|
||||||
- // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast i32 ()* %[[blk_ptr]] to i32 () addrspace(4)*
|
|
||||||
- // SPIR: store i32 () addrspace(4)* %[[blk_gen_ptr]], i32 () addrspace(4)** %[[block_B:.*]],
|
|
||||||
- // SPIR: %[[blk_gen_ptr:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]]
|
|
||||||
- // SPIR: %[[block_literal:.*]] = bitcast i32 () addrspace(4)* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
|
|
||||||
+ // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to %struct.__opencl_block_literal_generic*
|
|
||||||
+ // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
|
|
||||||
+ // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]],
|
|
||||||
+ // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]]
|
|
||||||
// SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2
|
|
||||||
// SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
|
|
||||||
// SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]]
|
|
||||||
@@ -50,11 +49,10 @@ void foo(){
|
|
||||||
// AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3
|
|
||||||
// AMDGCN: %[[i_value:.*]] = load i32, i32 addrspace(5)* %i
|
|
||||||
// AMDGCN: store i32 %[[i_value]], i32 addrspace(5)* %[[block_captured]],
|
|
||||||
- // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to i32 () addrspace(5)*
|
|
||||||
- // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast i32 () addrspace(5)* %[[blk_ptr]] to i32 ()*
|
|
||||||
- // AMDGCN: store i32 ()* %[[blk_gen_ptr]], i32 ()* addrspace(5)* %[[block_B:.*]],
|
|
||||||
- // AMDGCN: %[[blk_gen_ptr:.*]] = load i32 ()*, i32 ()* addrspace(5)* %[[block_B]]
|
|
||||||
- // AMDGCN: %[[block_literal:.*]] = bitcast i32 ()* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic*
|
|
||||||
+ // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to %struct.__opencl_block_literal_generic addrspace(5)*
|
|
||||||
+ // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to %struct.__opencl_block_literal_generic*
|
|
||||||
+ // AMDGCN: store %struct.__opencl_block_literal_generic* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B:.*]],
|
|
||||||
+ // AMDGCN: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic*, %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B]]
|
|
||||||
// AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2
|
|
||||||
// AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8*
|
|
||||||
// AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]]
|
|
||||||
diff --git a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
|
|
||||||
index 4732194..8445016 100644
|
|
||||||
--- a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
|
|
||||||
+++ b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
|
|
||||||
@@ -11,7 +11,7 @@ typedef struct {int a;} ndrange_t;
|
|
||||||
|
|
||||||
// For a block global variable, first emit the block literal as a global variable, then emit the block variable itself.
|
|
||||||
// COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* [[INV_G:@[^ ]+]] to i8*) to i8 addrspace(4)*) }
|
|
||||||
-// COMMON: @block_G = addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*)
|
|
||||||
+// COMMON: @block_G = addrspace(1) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*)
|
|
||||||
|
|
||||||
// For anonymous blocks without captures, emit block literals as global variable.
|
|
||||||
// COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
|
|
||||||
@@ -77,9 +77,9 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
|
|
||||||
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
|
|
||||||
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
|
|
||||||
// COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL1:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
|
|
||||||
- // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()*
|
|
||||||
- // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to void ()*
|
|
||||||
- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
|
|
||||||
+ // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to %struct.__opencl_block_literal_generic*
|
|
||||||
+ // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to %struct.__opencl_block_literal_generic*
|
|
||||||
+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)*
|
|
||||||
// COMMON-LABEL: call i32 @__enqueue_kernel_basic(
|
|
||||||
// COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
|
|
||||||
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
|
|
||||||
@@ -95,8 +95,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
|
|
||||||
// COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %event_wait_list to %opencl.clk_event_t{{.*}}* addrspace(4)*
|
|
||||||
// COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
|
|
||||||
// COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL2:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
|
|
||||||
- // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()*
|
|
||||||
- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
|
|
||||||
+ // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to %struct.__opencl_block_literal_generic*
|
|
||||||
+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)*
|
|
||||||
// COMMON-LABEL: call i32 @__enqueue_kernel_basic_events
|
|
||||||
// COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]],
|
|
||||||
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
|
|
||||||
@@ -300,13 +300,13 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
|
|
||||||
// Emits global block literal [[BLG8]] and invoke function [[INVG8]].
|
|
||||||
// The full type of these expressions are long (and repeated elsewhere), so we
|
|
||||||
// capture it as part of the regex for convenience and clarity.
|
|
||||||
- // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A
|
|
||||||
+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_A
|
|
||||||
void (^const block_A)(void) = ^{
|
|
||||||
return;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Emits global block literal [[BLG9]] and invoke function [[INVG9]].
|
|
||||||
- // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B
|
|
||||||
+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_B
|
|
||||||
void (^const block_B)(local void *) = ^(local void *a) {
|
|
||||||
return;
|
|
||||||
};
|
|
||||||
@@ -346,7 +346,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
|
|
||||||
// COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
|
|
||||||
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
|
|
||||||
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
|
|
||||||
- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* {{.*}} to i8 addrspace(4)*
|
|
||||||
+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* {{.*}} to i8 addrspace(4)*
|
|
||||||
// COMMON-LABEL: call i32 @__enqueue_kernel_basic(
|
|
||||||
// COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
|
|
||||||
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
|
|
||||||
--
|
|
||||||
1.8.3.1
|
|
||||||
|
|
|
@ -1,986 +0,0 @@
|
||||||
From 177cce531fd3665bb964a03db51890e0241e3e72 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Alexey Sotkin <alexey.sotkin@intel.com>
|
|
||||||
Date: Thu, 21 Feb 2019 17:14:36 +0300
|
|
||||||
Subject: [PATCH] Update LowerOpenCL pass to handle new blocks represntation in
|
|
||||||
LLVM IR
|
|
||||||
|
|
||||||
Upstream-Status: Backport [https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/bd6ddfaf7232cd81c7f2fe9877e66f286731bd8e]
|
|
||||||
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
|
|
||||||
|
|
||||||
---
|
|
||||||
lib/SPIRV/SPIRVLowerOCLBlocks.cpp | 249 ++++--------------------------
|
|
||||||
test/global_block.ll | 71 ++++-----
|
|
||||||
test/literal-struct.ll | 31 ++--
|
|
||||||
test/transcoding/block_w_struct_return.ll | 47 +++---
|
|
||||||
test/transcoding/enqueue_kernel.ll | 237 ++++++++++++++++------------
|
|
||||||
5 files changed, 235 insertions(+), 400 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp
|
|
||||||
index c80bf04..b42a4ec 100644
|
|
||||||
--- a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp
|
|
||||||
+++ b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp
|
|
||||||
@@ -40,207 +40,34 @@
|
|
||||||
// In both cases values with function type used as intermediate representation
|
|
||||||
// for block literal structure.
|
|
||||||
//
|
|
||||||
-// This pass is designed to find such cases and simplify them to avoid any
|
|
||||||
-// function pointer types occurrences in LLVM IR in 4 steps.
|
|
||||||
-//
|
|
||||||
-// 1. Find all function pointer allocas, like
|
|
||||||
-// %block = alloca void () *
|
|
||||||
-//
|
|
||||||
-// Then find a single store to that alloca:
|
|
||||||
-// %blockLit = alloca <{ i32, i32, ...}>, align 4
|
|
||||||
-// %0 = bitcast <{ i32, i32, ... }>* %blockLit to void ()*
|
|
||||||
-// > store void ()* %0, void ()** %block, align 4
|
|
||||||
-//
|
|
||||||
-// And replace the alloca users by new instructions which used stored value
|
|
||||||
-// %blockLit itself instead of function pointer alloca %block.
|
|
||||||
-//
|
|
||||||
-// 2. Find consecutive casts from block literal type to i8 addrspace(4)*
|
|
||||||
-// used function pointers as an intermediate type:
|
|
||||||
-// %0 = bitcast <{ i32, i32 }> %block to void() *
|
|
||||||
-// %1 = addrspacecast void() * %0 to i8 addrspace(4)*
|
|
||||||
-// And simplify them:
|
|
||||||
-// %2 = addrspacecast <{ i32, i32 }> %block to i8 addrspace(4)*
|
|
||||||
-//
|
|
||||||
-// 3. Find all unused instructions with function pointer type occured after
|
|
||||||
-// pp.1-2 and remove them.
|
|
||||||
-//
|
|
||||||
-// 4. Find unused globals with function pointer type, like
|
|
||||||
-// @block = constant void ()*
|
|
||||||
-// bitcast ({ i32, i32 }* @__block_literal_global to void ()*
|
|
||||||
-//
|
|
||||||
-// And remove them.
|
|
||||||
+// In LLVM IR produced by clang, blocks are represented with the following
|
|
||||||
+// structure:
|
|
||||||
+// %struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
|
|
||||||
+// Pointers to block invoke functions are stored in the third field. Clang
|
|
||||||
+// replaces inderect function calls in all cases except if block is passed as a
|
|
||||||
+// function argument. Note that it is somewhat unclear if the OpenCL C spec
|
|
||||||
+// should allow passing blocks as function argumernts. This pass is not supposed
|
|
||||||
+// to work correctly with such functions.
|
|
||||||
+// Clang though has to store function pointers to this structure. Purpose of
|
|
||||||
+// this pass is to replace store of function pointers(not allowed in SPIR-V)
|
|
||||||
+// with null pointers.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
#define DEBUG_TYPE "spv-lower-ocl-blocks"
|
|
||||||
|
|
||||||
-#include "OCLUtil.h"
|
|
||||||
#include "SPIRVInternal.h"
|
|
||||||
|
|
||||||
-#include "llvm/ADT/SetVector.h"
|
|
||||||
-#include "llvm/Analysis/ValueTracking.h"
|
|
||||||
-#include "llvm/IR/GlobalVariable.h"
|
|
||||||
-#include "llvm/IR/InstIterator.h"
|
|
||||||
#include "llvm/IR/Module.h"
|
|
||||||
#include "llvm/Pass.h"
|
|
||||||
-#include "llvm/PassSupport.h"
|
|
||||||
-#include "llvm/Support/Casting.h"
|
|
||||||
+#include "llvm/Support/Regex.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
-static void
|
|
||||||
-removeUnusedFunctionPtrInst(Instruction *I,
|
|
||||||
- SmallSetVector<Instruction *, 16> &FuncPtrInsts) {
|
|
||||||
- for (unsigned OpIdx = 0, Ops = I->getNumOperands(); OpIdx != Ops; ++OpIdx) {
|
|
||||||
- Instruction *OpI = dyn_cast<Instruction>(I->getOperand(OpIdx));
|
|
||||||
- I->setOperand(OpIdx, nullptr);
|
|
||||||
- if (OpI && OpI != I && OpI->user_empty())
|
|
||||||
- FuncPtrInsts.insert(OpI);
|
|
||||||
- }
|
|
||||||
- I->eraseFromParent();
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-static bool isFuncPtrAlloca(const AllocaInst *AI) {
|
|
||||||
- auto *ET = dyn_cast<PointerType>(AI->getAllocatedType());
|
|
||||||
- return ET && ET->getElementType()->isFunctionTy();
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-static bool hasFuncPtrType(const Value *V) {
|
|
||||||
- auto *PT = dyn_cast<PointerType>(V->getType());
|
|
||||||
- return PT && PT->getElementType()->isFunctionTy();
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-static bool isFuncPtrInst(const Instruction *I) {
|
|
||||||
- if (auto *AI = dyn_cast<AllocaInst>(I))
|
|
||||||
- return isFuncPtrAlloca(AI);
|
|
||||||
-
|
|
||||||
- for (auto &Op : I->operands()) {
|
|
||||||
- if (auto *AI = dyn_cast<AllocaInst>(Op))
|
|
||||||
- return isFuncPtrAlloca(AI);
|
|
||||||
-
|
|
||||||
- auto *OpI = dyn_cast<Instruction>(&Op);
|
|
||||||
- if (OpI && OpI != I && hasFuncPtrType(OpI))
|
|
||||||
- return true;
|
|
||||||
- }
|
|
||||||
- return false;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-static StoreInst *findSingleStore(AllocaInst *AI) {
|
|
||||||
- StoreInst *Store = nullptr;
|
|
||||||
- for (auto *U : AI->users()) {
|
|
||||||
- if (!isa<StoreInst>(U))
|
|
||||||
- continue; // not a store
|
|
||||||
- if (Store)
|
|
||||||
- return nullptr; // there are more than one stores
|
|
||||||
- Store = dyn_cast<StoreInst>(U);
|
|
||||||
- }
|
|
||||||
- return Store;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-static void fixFunctionPtrAllocaUsers(AllocaInst *AI) {
|
|
||||||
- // Find and remove a single store to alloca
|
|
||||||
- auto *SingleStore = findSingleStore(AI);
|
|
||||||
- assert(SingleStore && "More than one store to the function pointer alloca");
|
|
||||||
- auto *StoredVal = SingleStore->getValueOperand();
|
|
||||||
- SingleStore->eraseFromParent();
|
|
||||||
-
|
|
||||||
- // Find loads from the alloca and replace thier users
|
|
||||||
- for (auto *U : AI->users()) {
|
|
||||||
- auto *LI = dyn_cast<LoadInst>(U);
|
|
||||||
- if (!LI)
|
|
||||||
- continue;
|
|
||||||
-
|
|
||||||
- for (auto *U : LI->users()) {
|
|
||||||
- auto *UInst = cast<Instruction>(U);
|
|
||||||
- auto *Cast = CastInst::CreatePointerBitCastOrAddrSpaceCast(
|
|
||||||
- StoredVal, UInst->getType(), "", UInst);
|
|
||||||
- UInst->replaceAllUsesWith(Cast);
|
|
||||||
- }
|
|
||||||
- }
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-static int getBlockLiteralIdx(const Function &F) {
|
|
||||||
- StringRef FName = F.getName();
|
|
||||||
- if (isEnqueueKernelBI(FName))
|
|
||||||
- return FName.contains("events") ? 7 : 4;
|
|
||||||
- if (isKernelQueryBI(FName))
|
|
||||||
- return FName.contains("for_ndrange") ? 2 : 1;
|
|
||||||
- if (FName.startswith("__") && FName.contains("_block_invoke"))
|
|
||||||
- return F.hasStructRetAttr() ? 1 : 0;
|
|
||||||
-
|
|
||||||
- return -1; // No block literal argument
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-static bool hasBlockLiteralArg(const Function &F) {
|
|
||||||
- return getBlockLiteralIdx(F) != -1;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-static bool simplifyFunctionPtrCasts(Function &F) {
|
|
||||||
- bool Changed = false;
|
|
||||||
- int BlockLiteralIdx = getBlockLiteralIdx(F);
|
|
||||||
- for (auto *U : F.users()) {
|
|
||||||
- auto *Call = dyn_cast<CallInst>(U);
|
|
||||||
- if (!Call)
|
|
||||||
- continue;
|
|
||||||
- if (Call->getFunction()->getName() == F.getName().str() + "_kernel")
|
|
||||||
- continue; // Skip block invoke function calls inside block invoke kernels
|
|
||||||
-
|
|
||||||
- const DataLayout &DL = F.getParent()->getDataLayout();
|
|
||||||
- auto *BlockLiteral = Call->getOperand(BlockLiteralIdx);
|
|
||||||
- auto *BlockLiteralVal = GetUnderlyingObject(BlockLiteral, DL);
|
|
||||||
- if (isa<GlobalVariable>(BlockLiteralVal))
|
|
||||||
- continue; // nothing to do with globals
|
|
||||||
-
|
|
||||||
- auto *BlockLiteralAlloca = cast<AllocaInst>(BlockLiteralVal);
|
|
||||||
- assert(!BlockLiteralAlloca->getAllocatedType()->isFunctionTy() &&
|
|
||||||
- "Function type shouldn't be there");
|
|
||||||
-
|
|
||||||
- auto *NewBlockLiteral = CastInst::CreatePointerBitCastOrAddrSpaceCast(
|
|
||||||
- BlockLiteralAlloca, BlockLiteral->getType(), "", Call);
|
|
||||||
- BlockLiteral->replaceAllUsesWith(NewBlockLiteral);
|
|
||||||
- Changed |= true;
|
|
||||||
- }
|
|
||||||
- return Changed;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-static void
|
|
||||||
-findFunctionPtrAllocas(Module &M,
|
|
||||||
- SmallVectorImpl<AllocaInst *> &FuncPtrAllocas) {
|
|
||||||
- for (auto &F : M) {
|
|
||||||
- if (F.isDeclaration())
|
|
||||||
- continue;
|
|
||||||
- for (auto &I : instructions(F)) {
|
|
||||||
- auto *AI = dyn_cast<AllocaInst>(&I);
|
|
||||||
- if (!AI || !isFuncPtrAlloca(AI))
|
|
||||||
- continue;
|
|
||||||
- FuncPtrAllocas.push_back(AI);
|
|
||||||
- }
|
|
||||||
- }
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-static void
|
|
||||||
-findUnusedFunctionPtrInsts(Module &M,
|
|
||||||
- SmallSetVector<Instruction *, 16> &FuncPtrInsts) {
|
|
||||||
- for (auto &F : M) {
|
|
||||||
- if (F.isDeclaration())
|
|
||||||
- continue;
|
|
||||||
- for (auto &I : instructions(F))
|
|
||||||
- if (I.user_empty() && isFuncPtrInst(&I))
|
|
||||||
- FuncPtrInsts.insert(&I);
|
|
||||||
- }
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-static void
|
|
||||||
-findUnusedFunctionPtrGlbs(Module &M,
|
|
||||||
- SmallVectorImpl<GlobalVariable *> &FuncPtrGlbs) {
|
|
||||||
- for (auto &GV : M.globals()) {
|
|
||||||
- if (!GV.user_empty())
|
|
||||||
- continue;
|
|
||||||
- auto *GVType = dyn_cast<PointerType>(GV.getType()->getElementType());
|
|
||||||
- if (GVType && GVType->getElementType()->isFunctionTy())
|
|
||||||
- FuncPtrGlbs.push_back(&GV);
|
|
||||||
- }
|
|
||||||
+static bool isBlockInvoke(Function &F) {
|
|
||||||
+ static Regex BlockInvokeRegex("_block_invoke_?[0-9]*$");
|
|
||||||
+ return BlockInvokeRegex.match(F.getName());
|
|
||||||
}
|
|
||||||
|
|
||||||
class SPIRVLowerOCLBlocks : public ModulePass {
|
|
||||||
@@ -250,44 +77,24 @@ public:
|
|
||||||
|
|
||||||
bool runOnModule(Module &M) {
|
|
||||||
bool Changed = false;
|
|
||||||
-
|
|
||||||
- // 1. Find function pointer allocas and fix their users
|
|
||||||
- SmallVector<AllocaInst *, 16> FuncPtrAllocas;
|
|
||||||
- findFunctionPtrAllocas(M, FuncPtrAllocas);
|
|
||||||
-
|
|
||||||
- Changed |= !FuncPtrAllocas.empty();
|
|
||||||
- for (auto *AI : FuncPtrAllocas)
|
|
||||||
- fixFunctionPtrAllocaUsers(AI);
|
|
||||||
-
|
|
||||||
- // 2. Simplify consecutive casts which use function pointer types
|
|
||||||
- for (auto &F : M)
|
|
||||||
- if (hasBlockLiteralArg(F))
|
|
||||||
- Changed |= simplifyFunctionPtrCasts(F);
|
|
||||||
-
|
|
||||||
- // 3. Cleanup unused instructions with function pointer type
|
|
||||||
- // which are occured after pp. 1-2
|
|
||||||
- SmallSetVector<Instruction *, 16> FuncPtrInsts;
|
|
||||||
- findUnusedFunctionPtrInsts(M, FuncPtrInsts);
|
|
||||||
-
|
|
||||||
- Changed |= !FuncPtrInsts.empty();
|
|
||||||
- while (!FuncPtrInsts.empty()) {
|
|
||||||
- Instruction *I = FuncPtrInsts.pop_back_val();
|
|
||||||
- removeUnusedFunctionPtrInst(I, FuncPtrInsts);
|
|
||||||
+ for (Function &F : M) {
|
|
||||||
+ if (!isBlockInvoke(F))
|
|
||||||
+ continue;
|
|
||||||
+ for (User *U : F.users()) {
|
|
||||||
+ if (!isa<Constant>(U))
|
|
||||||
+ continue;
|
|
||||||
+ Constant *Null = Constant::getNullValue(U->getType());
|
|
||||||
+ if (U != Null) {
|
|
||||||
+ U->replaceAllUsesWith(Null);
|
|
||||||
+ Changed = true;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
-
|
|
||||||
- // 4. Find and remove unused global variables with function pointer type
|
|
||||||
- SmallVector<GlobalVariable *, 16> FuncPtrGlbs;
|
|
||||||
- findUnusedFunctionPtrGlbs(M, FuncPtrGlbs);
|
|
||||||
-
|
|
||||||
- Changed |= !FuncPtrGlbs.empty();
|
|
||||||
- for (auto *GV : FuncPtrGlbs)
|
|
||||||
- GV->eraseFromParent();
|
|
||||||
-
|
|
||||||
return Changed;
|
|
||||||
}
|
|
||||||
|
|
||||||
static char ID;
|
|
||||||
-}; // class SPIRVLowerOCLBlocks
|
|
||||||
+};
|
|
||||||
|
|
||||||
char SPIRVLowerOCLBlocks::ID = 0;
|
|
||||||
|
|
||||||
diff --git a/test/global_block.ll b/test/global_block.ll
|
|
||||||
index 4fc453b..b558213 100644
|
|
||||||
--- a/test/global_block.ll
|
|
||||||
+++ b/test/global_block.ll
|
|
||||||
@@ -17,7 +17,7 @@
|
|
||||||
; RUN: spirv-val %t.spv
|
|
||||||
; RUN: llvm-spirv -r %t.spv -o - | llvm-dis | FileCheck %s --check-prefix=CHECK-LLVM
|
|
||||||
|
|
||||||
-target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
|
|
||||||
+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
|
|
||||||
target triple = "spir-unknown-unknown"
|
|
||||||
|
|
||||||
; CHECK-SPIRV: Name [[block_invoke:[0-9]+]] "_block_invoke"
|
|
||||||
@@ -27,71 +27,56 @@ target triple = "spir-unknown-unknown"
|
|
||||||
; CHECK-SPIRV: TypePointer [[int8Ptr:[0-9]+]] 8 [[int8]]
|
|
||||||
; CHECK-SPIRV: TypeFunction [[block_invoke_type:[0-9]+]] [[int]] [[int8Ptr]] [[int]]
|
|
||||||
|
|
||||||
-;; This variable is not needed in SPIRV
|
|
||||||
-; CHECK-SPIRV-NOT: Name {{[0-9]+}} block_kernel.b1
|
|
||||||
-; CHECK-LLVM-NOT: @block_kernel.b1
|
|
||||||
-@block_kernel.b1 = internal addrspace(2) constant i32 (i32) addrspace(4)* addrspacecast (i32 (i32) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i32 (i32) addrspace(1)*) to i32 (i32) addrspace(4)*), align 8
|
|
||||||
+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
|
|
||||||
|
|
||||||
-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
|
|
||||||
+@block_kernel.b1 = internal addrspace(2) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), align 4
|
|
||||||
+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (i32 (i8 addrspace(4)*, i32)* @_block_invoke to i8*) to i8 addrspace(4)*) }, align 4
|
|
||||||
|
|
||||||
-; Function Attrs: convergent nounwind
|
|
||||||
-define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
|
|
||||||
+; Function Attrs: convergent noinline nounwind optnone
|
|
||||||
+define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
|
|
||||||
entry:
|
|
||||||
- %res.addr = alloca i32 addrspace(1)*, align 8
|
|
||||||
- store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8, !tbaa !10
|
|
||||||
-
|
|
||||||
+ %res.addr = alloca i32 addrspace(1)*, align 4
|
|
||||||
+ store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 4
|
|
||||||
; CHECK-SPIRV: FunctionCall [[int]] {{[0-9]+}} [[block_invoke]] {{[0-9]+}} [[five]]
|
|
||||||
; CHECK-LLVM: %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* {{.*}}, i32 5)
|
|
||||||
- %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2
|
|
||||||
-
|
|
||||||
- %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8, !tbaa !10
|
|
||||||
- store i32 %call, i32 addrspace(1)* %0, align 4, !tbaa !14
|
|
||||||
+ %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2
|
|
||||||
+ %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 4
|
|
||||||
+ store i32 %call, i32 addrspace(1)* %0, align 4
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
-; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 0 [[block_invoke_type]]
|
|
||||||
+; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 2 [[block_invoke_type]]
|
|
||||||
; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int8Ptr]] {{[0-9]+}}
|
|
||||||
; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int]] {{[0-9]+}}
|
|
||||||
; CHECK-LLVM: define internal spir_func i32 @_block_invoke(i8 addrspace(4)* {{.*}}, i32 %{{.*}})
|
|
||||||
-; Function Attrs: convergent nounwind
|
|
||||||
+; Function Attrs: convergent noinline nounwind optnone
|
|
||||||
define internal spir_func i32 @_block_invoke(i8 addrspace(4)* %.block_descriptor, i32 %i) #1 {
|
|
||||||
entry:
|
|
||||||
- %.block_descriptor.addr = alloca i8 addrspace(4)*, align 8
|
|
||||||
+ %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
|
|
||||||
%i.addr = alloca i32, align 4
|
|
||||||
- store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 8
|
|
||||||
- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
|
|
||||||
- store i32 %i, i32* %i.addr, align 4, !tbaa !14
|
|
||||||
- %0 = load i32, i32* %i.addr, align 4, !tbaa !14
|
|
||||||
+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
|
|
||||||
+ store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
|
|
||||||
+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
|
|
||||||
+ store i32 %i, i32* %i.addr, align 4
|
|
||||||
+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
|
|
||||||
+ %0 = load i32, i32* %i.addr, align 4
|
|
||||||
%add = add nsw i32 %0, 1
|
|
||||||
ret i32 %add
|
|
||||||
}
|
|
||||||
|
|
||||||
-attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
||||||
-attributes #1 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
||||||
+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
||||||
+attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
||||||
attributes #2 = { convergent }
|
|
||||||
|
|
||||||
!llvm.module.flags = !{!0}
|
|
||||||
-!opencl.enable.FP_CONTRACT = !{}
|
|
||||||
!opencl.ocl.version = !{!1}
|
|
||||||
!opencl.spir.version = !{!1}
|
|
||||||
-!opencl.used.extensions = !{!2}
|
|
||||||
-!opencl.used.optional.core.features = !{!2}
|
|
||||||
-!opencl.compiler.options = !{!2}
|
|
||||||
-!llvm.ident = !{!3}
|
|
||||||
+!llvm.ident = !{!2}
|
|
||||||
|
|
||||||
!0 = !{i32 1, !"wchar_size", i32 4}
|
|
||||||
!1 = !{i32 2, i32 0}
|
|
||||||
-!2 = !{}
|
|
||||||
-!3 = !{!"clang version 7.0.0"}
|
|
||||||
-!4 = !{i32 1}
|
|
||||||
-!5 = !{!"none"}
|
|
||||||
-!6 = !{!"int*"}
|
|
||||||
-!7 = !{!""}
|
|
||||||
-!8 = !{i1 false}
|
|
||||||
-!9 = !{i32 0}
|
|
||||||
-!10 = !{!11, !11, i64 0}
|
|
||||||
-!11 = !{!"any pointer", !12, i64 0}
|
|
||||||
-!12 = !{!"omnipotent char", !13, i64 0}
|
|
||||||
-!13 = !{!"Simple C/C++ TBAA"}
|
|
||||||
-!14 = !{!15, !15, i64 0}
|
|
||||||
-!15 = !{!"int", !12, i64 0}
|
|
||||||
+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"}
|
|
||||||
+!3 = !{i32 1}
|
|
||||||
+!4 = !{!"none"}
|
|
||||||
+!5 = !{!"int*"}
|
|
||||||
+!6 = !{!""}
|
|
||||||
diff --git a/test/literal-struct.ll b/test/literal-struct.ll
|
|
||||||
index b88187f..dec957a 100644
|
|
||||||
--- a/test/literal-struct.ll
|
|
||||||
+++ b/test/literal-struct.ll
|
|
||||||
@@ -2,7 +2,7 @@
|
|
||||||
; structs, i.e. structs whose type has no name. Typicaly clang generate such
|
|
||||||
; structs if the kernel contains OpenCL 2.0 blocks. The IR was produced with
|
|
||||||
; the following command:
|
|
||||||
-; clang -cc1 -triple spir -cl-std=cl2.0 -O0 -finclude-default-header literal-struct.cl -emit-llvm -o test/literal-struct.ll
|
|
||||||
+; clang -cc1 -triple spir -cl-std=cl2.0 -O0 literal-struct.cl -emit-llvm -o test/literal-struct.ll
|
|
||||||
|
|
||||||
; literal-struct.cl:
|
|
||||||
; void foo()
|
|
||||||
@@ -17,25 +17,28 @@
|
|
||||||
; RUN: llvm-spirv %t.bc -o %t.spv
|
|
||||||
; RUN: spirv-val %t.spv
|
|
||||||
|
|
||||||
-; CHECK-DAG: TypeInt [[Int:[0-9]+]] 32 0
|
|
||||||
-; CHECK-DAG: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] {{$}}
|
|
||||||
+; CHECK: TypeInt [[Int:[0-9]+]] 32 0
|
|
||||||
+; CHECK: TypeInt [[Int8:[0-9]+]] 8 0
|
|
||||||
+; CHECK: TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8]]
|
|
||||||
+; CHECK: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] [[Int8Ptr]]
|
|
||||||
|
|
||||||
target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
|
|
||||||
target triple = "spir"
|
|
||||||
|
|
||||||
-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
|
|
||||||
+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
|
|
||||||
+
|
|
||||||
+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__foo_block_invoke to i8*) to i8 addrspace(4)*) }, align 4
|
|
||||||
; CHECK: ConstantComposite [[StructType]]
|
|
||||||
|
|
||||||
-; This is artificial case is added to cover ConstantNull instrucitions with TypeStruct.
|
|
||||||
-@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } zeroinitializer, align 4
|
|
||||||
+@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } zeroinitializer, align 4
|
|
||||||
; CHECK: ConstantNull [[StructType]]
|
|
||||||
|
|
||||||
; Function Attrs: convergent noinline nounwind optnone
|
|
||||||
define spir_func void @foo() #0 {
|
|
||||||
entry:
|
|
||||||
- %myBlock = alloca void () addrspace(4)*, align 4
|
|
||||||
- store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %myBlock, align 4
|
|
||||||
- call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) #1
|
|
||||||
+ %myBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 4
|
|
||||||
+ store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %myBlock, align 4
|
|
||||||
+ call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) #1
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -43,14 +46,14 @@ entry:
|
|
||||||
define internal spir_func void @__foo_block_invoke(i8 addrspace(4)* %.block_descriptor) #0 {
|
|
||||||
entry:
|
|
||||||
%.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
|
|
||||||
- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4
|
|
||||||
+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
|
|
||||||
store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
|
|
||||||
- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
|
|
||||||
- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4
|
|
||||||
+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
|
|
||||||
+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
-attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
||||||
+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
||||||
attributes #1 = { convergent }
|
|
||||||
|
|
||||||
!llvm.module.flags = !{!0}
|
|
||||||
@@ -60,4 +63,4 @@ attributes #1 = { convergent }
|
|
||||||
|
|
||||||
!0 = !{i32 1, !"wchar_size", i32 4}
|
|
||||||
!1 = !{i32 2, i32 0}
|
|
||||||
-!2 = !{!"clang version 8.0.0 "}
|
|
||||||
+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"}
|
|
||||||
diff --git a/test/transcoding/block_w_struct_return.ll b/test/transcoding/block_w_struct_return.ll
|
|
||||||
index a68820f..ebd2c5f 100644
|
|
||||||
--- a/test/transcoding/block_w_struct_return.ll
|
|
||||||
+++ b/test/transcoding/block_w_struct_return.ll
|
|
||||||
@@ -16,6 +16,8 @@
|
|
||||||
; res[tid] = kernelBlock(aa).a - 6;
|
|
||||||
; }
|
|
||||||
|
|
||||||
+; clang -cc1 -triple spir -cl-std=cl2.0 -disable-llvm-passes -finclude-default-header block_w_struct_return.cl -emit-llvm -o test/transcoding/block_w_struct_return.ll
|
|
||||||
+
|
|
||||||
; RUN: llvm-as %s -o %t.bc
|
|
||||||
; RUN: llvm-spirv %t.bc -spirv-text -o %t.spv.txt
|
|
||||||
; RUN: FileCheck < %t.spv.txt %s --check-prefix=CHECK-SPIRV
|
|
||||||
@@ -28,12 +30,14 @@
|
|
||||||
; CHECK-SPIRV: Name [[BlockInv:[0-9]+]] "__block_ret_struct_block_invoke"
|
|
||||||
|
|
||||||
; CHECK-SPIRV: 4 TypeInt [[IntTy:[0-9]+]] 32
|
|
||||||
+; CHECK-SPIRV: 4 TypeInt [[Int8Ty:[0-9]+]] 8
|
|
||||||
+; CHECK-SPIRV: 4 TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8Ty]]
|
|
||||||
; CHECK-SPIRV: 3 TypeStruct [[StructTy:[0-9]+]] [[IntTy]]
|
|
||||||
; CHECK-SPIRV: 4 TypePointer [[StructPtrTy:[0-9]+]] 7 [[StructTy]]
|
|
||||||
|
|
||||||
; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructArg:[0-9]+]] 7
|
|
||||||
; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructRet:[0-9]+]] 7
|
|
||||||
-; CHECK-SPIRV: 4 PtrCastToGeneric {{[0-9]+}} [[BlockLit:[0-9]+]] {{[0-9]+}}
|
|
||||||
+; CHECK-SPIRV: 4 PtrCastToGeneric [[Int8Ptr]] [[BlockLit:[0-9]+]] {{[0-9]+}}
|
|
||||||
; CHECK-SPIRV: 7 FunctionCall {{[0-9]+}} {{[0-9]+}} [[BlockInv]] [[StructRet]] [[BlockLit]] [[StructArg]]
|
|
||||||
|
|
||||||
; CHECK-LLVM: %[[StructA:.*]] = type { i32 }
|
|
||||||
@@ -42,20 +46,21 @@
|
|
||||||
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
|
|
||||||
target triple = "spir64-unknown-unknown"
|
|
||||||
|
|
||||||
+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
|
|
||||||
%struct.A = type { i32 }
|
|
||||||
|
|
||||||
-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
|
|
||||||
+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 16, i32 8, i8 addrspace(4)* addrspacecast (i8* bitcast (void (%struct.A*, i8 addrspace(4)*, %struct.A*)* @__block_ret_struct_block_invoke to i8*) to i8 addrspace(4)*) }, align 8
|
|
||||||
|
|
||||||
; Function Attrs: convergent noinline nounwind optnone
|
|
||||||
-define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 !kernel_arg_host_accessible !8 !kernel_arg_pipe_depth !9 !kernel_arg_pipe_io !7 !kernel_arg_buffer_location !7 {
|
|
||||||
+define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
|
|
||||||
entry:
|
|
||||||
%res.addr = alloca i32 addrspace(1)*, align 8
|
|
||||||
- %kernelBlock = alloca void (%struct.A*, %struct.A*) addrspace(4)*, align 8
|
|
||||||
+ %kernelBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 8
|
|
||||||
%tid = alloca i64, align 8
|
|
||||||
%aa = alloca %struct.A, align 4
|
|
||||||
%tmp = alloca %struct.A, align 4
|
|
||||||
store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8
|
|
||||||
- store void (%struct.A*, %struct.A*) addrspace(4)* addrspacecast (void (%struct.A*, %struct.A*) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to void (%struct.A*, %struct.A*) addrspace(1)*) to void (%struct.A*, %struct.A*) addrspace(4)*), void (%struct.A*, %struct.A*) addrspace(4)** %kernelBlock, align 8
|
|
||||||
+ store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %kernelBlock, align 8
|
|
||||||
%call = call spir_func i64 @_Z13get_global_idj(i32 0) #4
|
|
||||||
store i64 %call, i64* %tid, align 8
|
|
||||||
%0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8
|
|
||||||
@@ -64,7 +69,7 @@ entry:
|
|
||||||
store i32 -1, i32 addrspace(1)* %arrayidx, align 4
|
|
||||||
%a = getelementptr inbounds %struct.A, %struct.A* %aa, i32 0, i32 0
|
|
||||||
store i32 5, i32* %a, align 4
|
|
||||||
- call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), %struct.A* byval align 4 %aa) #5
|
|
||||||
+ call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), %struct.A* byval align 4 %aa) #5
|
|
||||||
%a1 = getelementptr inbounds %struct.A, %struct.A* %tmp, i32 0, i32 0
|
|
||||||
%2 = load i32, i32* %a1, align 4
|
|
||||||
%sub = sub nsw i32 %2, 6
|
|
||||||
@@ -79,10 +84,10 @@ entry:
|
|
||||||
define internal spir_func void @__block_ret_struct_block_invoke(%struct.A* noalias sret %agg.result, i8 addrspace(4)* %.block_descriptor, %struct.A* byval align 4 %a) #1 {
|
|
||||||
entry:
|
|
||||||
%.block_descriptor.addr = alloca i8 addrspace(4)*, align 8
|
|
||||||
- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 8
|
|
||||||
+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 8
|
|
||||||
store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 8
|
|
||||||
- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
|
|
||||||
- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 8
|
|
||||||
+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
|
|
||||||
+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 8
|
|
||||||
%a1 = getelementptr inbounds %struct.A, %struct.A* %a, i32 0, i32 0
|
|
||||||
store i32 6, i32* %a1, align 4
|
|
||||||
%0 = bitcast %struct.A* %agg.result to i8*
|
|
||||||
@@ -97,30 +102,22 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture r
|
|
||||||
; Function Attrs: convergent nounwind readnone
|
|
||||||
declare spir_func i64 @_Z13get_global_idj(i32) #3
|
|
||||||
|
|
||||||
-attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
||||||
-attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
||||||
+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
||||||
+attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
||||||
attributes #2 = { argmemonly nounwind }
|
|
||||||
attributes #3 = { convergent nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
||||||
attributes #4 = { convergent nounwind readnone }
|
|
||||||
attributes #5 = { convergent }
|
|
||||||
|
|
||||||
!llvm.module.flags = !{!0}
|
|
||||||
-!opencl.enable.FP_CONTRACT = !{}
|
|
||||||
!opencl.ocl.version = !{!1}
|
|
||||||
!opencl.spir.version = !{!1}
|
|
||||||
-!opencl.used.extensions = !{!2}
|
|
||||||
-!opencl.used.optional.core.features = !{!2}
|
|
||||||
-!opencl.compiler.options = !{!2}
|
|
||||||
-!llvm.ident = !{!3}
|
|
||||||
+!llvm.ident = !{!2}
|
|
||||||
|
|
||||||
!0 = !{i32 1, !"wchar_size", i32 4}
|
|
||||||
!1 = !{i32 2, i32 0}
|
|
||||||
-!2 = !{}
|
|
||||||
-!3 = !{!"clang version 7.0.0"}
|
|
||||||
-!4 = !{i32 1}
|
|
||||||
-!5 = !{!"none"}
|
|
||||||
-!6 = !{!"int*"}
|
|
||||||
-!7 = !{!""}
|
|
||||||
-!8 = !{i1 false}
|
|
||||||
-!9 = !{i32 0}
|
|
||||||
-
|
|
||||||
+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"}
|
|
||||||
+!3 = !{i32 1}
|
|
||||||
+!4 = !{!"none"}
|
|
||||||
+!5 = !{!"int*"}
|
|
||||||
+!6 = !{!""}
|
|
||||||
diff --git a/test/transcoding/enqueue_kernel.ll b/test/transcoding/enqueue_kernel.ll
|
|
||||||
index 1f0b360..761043e 100644
|
|
||||||
--- a/test/transcoding/enqueue_kernel.ll
|
|
||||||
+++ b/test/transcoding/enqueue_kernel.ll
|
|
||||||
@@ -51,11 +51,12 @@
|
|
||||||
; ModuleID = 'enqueue_kernel.cl'
|
|
||||||
source_filename = "enqueue_kernel.cl"
|
|
||||||
target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
|
|
||||||
-target triple = "spir-unknown-unknown"
|
|
||||||
+target triple = "spir"
|
|
||||||
|
|
||||||
%opencl.queue_t = type opaque
|
|
||||||
%struct.ndrange_t = type { i32 }
|
|
||||||
%opencl.clk_event_t = type opaque
|
|
||||||
+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
|
|
||||||
|
|
||||||
; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer1:[0-9]+]] "__device_side_enqueue_block_invoke_kernel"
|
|
||||||
; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer2:[0-9]+]] "__device_side_enqueue_block_invoke_2_kernel"
|
|
||||||
@@ -66,89 +67,123 @@ target triple = "spir-unknown-unknown"
|
|
||||||
|
|
||||||
; CHECK-SPIRV: TypeInt [[Int32Ty:[0-9]+]] 32
|
|
||||||
; CHECK-SPIRV: TypeInt [[Int8Ty:[0-9]+]] 8
|
|
||||||
-; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8
|
|
||||||
; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt0:[0-9]+]] 0
|
|
||||||
-; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 17
|
|
||||||
+; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 21
|
|
||||||
; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt2:[0-9]+]] 2
|
|
||||||
-; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 20
|
|
||||||
-; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]]
|
|
||||||
+; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8
|
|
||||||
+; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 24
|
|
||||||
|
|
||||||
; CHECK-SPIRV: TypePointer {{[0-9]+}} 7 {{[0-9]+}}
|
|
||||||
+; CHECK-SPIRV: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]]
|
|
||||||
+; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]]
|
|
||||||
; CHECK-SPIRV: TypePointer [[Int32LocPtrTy:[0-9]+]] 7 [[Int32Ty]]
|
|
||||||
; CHECK-SPIRV: TypeDeviceEvent [[EventTy:[0-9]+]]
|
|
||||||
-; CHECK-SPIRV: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]]
|
|
||||||
; CHECK-SPIRV: TypePointer [[EventPtrTy:[0-9]+]] 8 [[EventTy]]
|
|
||||||
; CHECK-SPIRV: TypeFunction [[BlockTy1:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]]
|
|
||||||
; CHECK-SPIRV: TypeFunction [[BlockTy2:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]]
|
|
||||||
; CHECK-SPIRV: TypeFunction [[BlockTy3:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]]
|
|
||||||
; CHECK-SPIRV: ConstantNull [[EventPtrTy]] [[EventNull:[0-9]+]]
|
|
||||||
|
|
||||||
-; CHECK-LLVM: [[BlockTy1:%[0-9a-z\.]+]] = type { i32, i32 }
|
|
||||||
-; CHECK-LLVM: [[BlockTy2:%[0-9a-z\.]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i8 }>
|
|
||||||
-; CHECK-LLVM: [[BlockTy3:%[0-9a-z\.]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>
|
|
||||||
-; CHECK-LLVM: [[BlockTy4:%[0-9a-z\.]+]] = type <{ i32, i32 }>
|
|
||||||
+; CHECK-LLVM: [[BlockTy1:%[0-9a-z\.]+]] = type { i32, i32, i8 addrspace(4)* }
|
|
||||||
+; CHECK-LLVM: [[BlockTy2:%[0-9a-z\.]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>
|
|
||||||
+; CHECK-LLVM: [[BlockTy3:%[0-9a-z\.]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>
|
|
||||||
+; CHECK-LLVM: [[BlockTy4:%[0-9a-z\.]+]] = type <{ i32, i32, i8 addrspace(4)* }>
|
|
||||||
|
|
||||||
-; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4
|
|
||||||
-; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4
|
|
||||||
+; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 4
|
|
||||||
+; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 4
|
|
||||||
|
|
||||||
-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
|
|
||||||
-@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
|
|
||||||
+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3 to i8*) to i8 addrspace(4)*) }, align 4
|
|
||||||
+@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4 to i8*) to i8 addrspace(4)*) }, align 4
|
|
||||||
|
|
||||||
; Function Attrs: convergent noinline nounwind optnone
|
|
||||||
-define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
|
|
||||||
+define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
|
|
||||||
entry:
|
|
||||||
+ %a.addr = alloca i32 addrspace(1)*, align 4
|
|
||||||
+ %b.addr = alloca i32 addrspace(1)*, align 4
|
|
||||||
+ %i.addr = alloca i32, align 4
|
|
||||||
+ %c0.addr = alloca i8, align 1
|
|
||||||
%default_queue = alloca %opencl.queue_t*, align 4
|
|
||||||
%flags = alloca i32, align 4
|
|
||||||
%ndrange = alloca %struct.ndrange_t, align 4
|
|
||||||
%clk_event = alloca %opencl.clk_event_t*, align 4
|
|
||||||
%event_wait_list = alloca %opencl.clk_event_t*, align 4
|
|
||||||
%event_wait_list2 = alloca [1 x %opencl.clk_event_t*], align 4
|
|
||||||
- %block = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, align 4
|
|
||||||
- %block3 = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4
|
|
||||||
+ %block = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, align 4
|
|
||||||
+ %tmp = alloca %struct.ndrange_t, align 4
|
|
||||||
+ %block3 = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4
|
|
||||||
+ %tmp4 = alloca %struct.ndrange_t, align 4
|
|
||||||
%c = alloca i8, align 1
|
|
||||||
+ %tmp11 = alloca %struct.ndrange_t, align 4
|
|
||||||
+ %block_sizes = alloca [1 x i32], align 4
|
|
||||||
+ %tmp12 = alloca %struct.ndrange_t, align 4
|
|
||||||
+ %block_sizes13 = alloca [3 x i32], align 4
|
|
||||||
+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
|
|
||||||
+ store i32 addrspace(1)* %b, i32 addrspace(1)** %b.addr, align 4
|
|
||||||
+ store i32 %i, i32* %i.addr, align 4
|
|
||||||
+ store i8 %c0, i8* %c0.addr, align 1
|
|
||||||
store i32 0, i32* %flags, align 4
|
|
||||||
%arrayinit.begin = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
|
|
||||||
%0 = load %opencl.clk_event_t*, %opencl.clk_event_t** %clk_event, align 4
|
|
||||||
store %opencl.clk_event_t* %0, %opencl.clk_event_t** %arrayinit.begin, align 4
|
|
||||||
%1 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
|
|
||||||
%2 = load i32, i32* %flags, align 4
|
|
||||||
- %block.size = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0
|
|
||||||
- store i32 17, i32* %block.size, align 4
|
|
||||||
- %block.align = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1
|
|
||||||
+ %3 = bitcast %struct.ndrange_t* %tmp to i8*
|
|
||||||
+ %4 = bitcast %struct.ndrange_t* %ndrange to i8*
|
|
||||||
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %3, i8* align 4 %4, i32 4, i1 false)
|
|
||||||
+ %block.size = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0
|
|
||||||
+ store i32 21, i32* %block.size, align 4
|
|
||||||
+ %block.align = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1
|
|
||||||
store i32 4, i32* %block.align, align 4
|
|
||||||
- %block.captured = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2
|
|
||||||
- store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured, align 4
|
|
||||||
- %block.captured1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3
|
|
||||||
- store i32 %i, i32* %block.captured1, align 4
|
|
||||||
- %block.captured2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4
|
|
||||||
- store i8 %c0, i8* %block.captured2, align 4
|
|
||||||
- %3 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block to void ()*
|
|
||||||
- %4 = addrspacecast void ()* %3 to i8 addrspace(4)*
|
|
||||||
+ %block.invoke = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2
|
|
||||||
+ store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke, align 4
|
|
||||||
+ %block.captured = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3
|
|
||||||
+ %5 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4
|
|
||||||
+ store i32 addrspace(1)* %5, i32 addrspace(1)** %block.captured, align 4
|
|
||||||
+ %block.captured1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4
|
|
||||||
+ %6 = load i32, i32* %i.addr, align 4
|
|
||||||
+ store i32 %6, i32* %block.captured1, align 4
|
|
||||||
+ %block.captured2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 5
|
|
||||||
+ %7 = load i8, i8* %c0.addr, align 1
|
|
||||||
+ store i8 %7, i8* %block.captured2, align 4
|
|
||||||
+ %8 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block to %struct.__opencl_block_literal_generic*
|
|
||||||
+ %9 = addrspacecast %struct.__opencl_block_literal_generic* %8 to i8 addrspace(4)*
|
|
||||||
|
|
||||||
; CHECK-SPIRV: PtrCastToGeneric [[Int8PtrGenTy]] [[BlockLit1:[0-9]+]]
|
|
||||||
; CHECK-SPIRV: EnqueueKernel [[Int32Ty]] {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} {{[0-9]+}}
|
|
||||||
; [[ConstInt0]] [[EventNull]] [[EventNull]]
|
|
||||||
; [[BlockKer1]] [[BlockLit1]] [[ConstInt17]] [[ConstInt8]]
|
|
||||||
|
|
||||||
-; CHECK-LLVM: [[Block2:%[0-9]+]] = addrspacecast [[BlockTy2]]* %block to i8 addrspace(4)*
|
|
||||||
+; CHECK-LLVM: [[Block2:%[0-9]+]] = bitcast [[BlockTy2]]* %block to %struct.__opencl_block_literal_generic*
|
|
||||||
+; CHECK-LLVM: [[Block2Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block2]] to i8 addrspace(4)*
|
|
||||||
; CHECK-LLVM: [[BlockInv2:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8 addrspace(4)*
|
|
||||||
-; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2]])
|
|
||||||
-
|
|
||||||
- %5 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %4)
|
|
||||||
- %6 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)*
|
|
||||||
- %7 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)*
|
|
||||||
- %block.size5 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 0
|
|
||||||
- store i32 20, i32* %block.size5, align 4
|
|
||||||
- %block.align6 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 1
|
|
||||||
+; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2Ptr]])
|
|
||||||
+
|
|
||||||
+ %10 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %tmp, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9)
|
|
||||||
+ %11 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
|
|
||||||
+ %12 = load i32, i32* %flags, align 4
|
|
||||||
+ %13 = bitcast %struct.ndrange_t* %tmp4 to i8*
|
|
||||||
+ %14 = bitcast %struct.ndrange_t* %ndrange to i8*
|
|
||||||
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %13, i8* align 4 %14, i32 4, i1 false)
|
|
||||||
+ %15 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)*
|
|
||||||
+ %16 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)*
|
|
||||||
+ %block.size5 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 0
|
|
||||||
+ store i32 24, i32* %block.size5, align 4
|
|
||||||
+ %block.align6 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 1
|
|
||||||
store i32 4, i32* %block.align6, align 4
|
|
||||||
- %block.captured7 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 2
|
|
||||||
- store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured7, align 4
|
|
||||||
- %block.captured8 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 3
|
|
||||||
- store i32 %i, i32* %block.captured8, align 4
|
|
||||||
- %block.captured9 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 4
|
|
||||||
- store i32 addrspace(1)* %b, i32 addrspace(1)** %block.captured9, align 4
|
|
||||||
- %8 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3 to void ()*
|
|
||||||
- %9 = addrspacecast void ()* %8 to i8 addrspace(4)*
|
|
||||||
+ %block.invoke7 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 2
|
|
||||||
+ store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2 to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke7, align 4
|
|
||||||
+ %block.captured8 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 3
|
|
||||||
+ %17 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4
|
|
||||||
+ store i32 addrspace(1)* %17, i32 addrspace(1)** %block.captured8, align 4
|
|
||||||
+ %block.captured9 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 4
|
|
||||||
+ %18 = load i32, i32* %i.addr, align 4
|
|
||||||
+ store i32 %18, i32* %block.captured9, align 4
|
|
||||||
+ %block.captured10 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 5
|
|
||||||
+ %19 = load i32 addrspace(1)*, i32 addrspace(1)** %b.addr, align 4
|
|
||||||
+ store i32 addrspace(1)* %19, i32 addrspace(1)** %block.captured10, align 4
|
|
||||||
+ %20 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3 to %struct.__opencl_block_literal_generic*
|
|
||||||
+ %21 = addrspacecast %struct.__opencl_block_literal_generic* %20 to i8 addrspace(4)*
|
|
||||||
+
|
|
||||||
|
|
||||||
; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event1:[0-9]+]]
|
|
||||||
; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event2:[0-9]+]]
|
|
||||||
@@ -158,16 +193,24 @@ entry:
|
|
||||||
; [[ConstInt2]] [[Event1]] [[Event2]]
|
|
||||||
; [[BlockKer2]] [[BlockLit2]] [[ConstInt20]] [[ConstInt8]]
|
|
||||||
|
|
||||||
-; CHECK-LLVM: [[Block3:%[0-9]+]] = addrspacecast [[BlockTy3]]* %block3 to i8 addrspace(4)*
|
|
||||||
+; CHECK-LLVM: [[Block3:%[0-9]+]] = bitcast [[BlockTy3]]* %block3 to %struct.__opencl_block_literal_generic*
|
|
||||||
+; CHECK-LLVM: [[Block3Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block3]] to i8 addrspace(4)
|
|
||||||
; CHECK-LLVM: [[BlockInv3:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8 addrspace(4)*
|
|
||||||
-; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3]])
|
|
||||||
-
|
|
||||||
- %10 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9)
|
|
||||||
- %11 = alloca [1 x i32]
|
|
||||||
- %12 = getelementptr [1 x i32], [1 x i32]* %11, i32 0, i32 0
|
|
||||||
- %13 = load i8, i8* %c, align 1
|
|
||||||
- %14 = zext i8 %13 to i32
|
|
||||||
- store i32 %14, i32* %12, align 4
|
|
||||||
+; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3Ptr]])
|
|
||||||
+
|
|
||||||
+ %22 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %11, i32 %12, %struct.ndrange_t* %tmp4, i32 2, %opencl.clk_event_t* addrspace(4)* %15, %opencl.clk_event_t* addrspace(4)* %16, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %21)
|
|
||||||
+ %23 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
|
|
||||||
+ %24 = load i32, i32* %flags, align 4
|
|
||||||
+ %25 = bitcast %struct.ndrange_t* %tmp11 to i8*
|
|
||||||
+ %26 = bitcast %struct.ndrange_t* %ndrange to i8*
|
|
||||||
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %25, i8* align 4 %26, i32 4, i1 false)
|
|
||||||
+ %arraydecay = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
|
|
||||||
+ %27 = addrspacecast %opencl.clk_event_t** %arraydecay to %opencl.clk_event_t* addrspace(4)*
|
|
||||||
+ %28 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)*
|
|
||||||
+ %29 = getelementptr [1 x i32], [1 x i32]* %block_sizes, i32 0, i32 0
|
|
||||||
+ %30 = load i8, i8* %c, align 1
|
|
||||||
+ %31 = zext i8 %30 to i32
|
|
||||||
+ store i32 %31, i32* %29, align 4
|
|
||||||
|
|
||||||
; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf31:[0-9]+]]
|
|
||||||
; CHECK-SPIRV: Bitcast {{[0-9]+}} [[BlockLit3Tmp:[0-9]+]] [[BlockGlb1:[0-9]+]]
|
|
||||||
@@ -182,14 +225,18 @@ entry:
|
|
||||||
; CHECK-LLVM: [[BlockInv0:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8 addrspace(4)*
|
|
||||||
; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv0]], i8 addrspace(4)* [[Block0]], i32 1, i32* {{.*}})
|
|
||||||
|
|
||||||
- %15 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %12)
|
|
||||||
- %16 = alloca [3 x i32]
|
|
||||||
- %17 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 0
|
|
||||||
- store i32 1, i32* %17, align 4
|
|
||||||
- %18 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 1
|
|
||||||
- store i32 2, i32* %18, align 4
|
|
||||||
- %19 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 2
|
|
||||||
- store i32 4, i32* %19, align 4
|
|
||||||
+ %32 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %23, i32 %24, %struct.ndrange_t* %tmp11, i32 2, %opencl.clk_event_t* addrspace(4)* %27, %opencl.clk_event_t* addrspace(4)* %28, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %29)
|
|
||||||
+ %33 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
|
|
||||||
+ %34 = load i32, i32* %flags, align 4
|
|
||||||
+ %35 = bitcast %struct.ndrange_t* %tmp12 to i8*
|
|
||||||
+ %36 = bitcast %struct.ndrange_t* %ndrange to i8*
|
|
||||||
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %35, i8* align 4 %36, i32 4, i1 false)
|
|
||||||
+ %37 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 0
|
|
||||||
+ store i32 1, i32* %37, align 4
|
|
||||||
+ %38 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 1
|
|
||||||
+ store i32 2, i32* %38, align 4
|
|
||||||
+ %39 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 2
|
|
||||||
+ store i32 4, i32* %39, align 4
|
|
||||||
|
|
||||||
; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf41:[0-9]+]]
|
|
||||||
; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf42:[0-9]+]]
|
|
||||||
@@ -206,24 +253,27 @@ entry:
|
|
||||||
; CHECK-LLVM: [[BlockInv1:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8 addrspace(4)*
|
|
||||||
; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv1]], i8 addrspace(4)* [[Block1]], i32 3, i32* {{.*}})
|
|
||||||
|
|
||||||
- %20 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %17)
|
|
||||||
+ %40 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %33, i32 %34, %struct.ndrange_t* %tmp12, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %37)
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
+; Function Attrs: argmemonly nounwind
|
|
||||||
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) #1
|
|
||||||
+
|
|
||||||
; Function Attrs: convergent noinline nounwind optnone
|
|
||||||
define internal spir_func void @__device_side_enqueue_block_invoke(i8 addrspace(4)* %.block_descriptor) #2 {
|
|
||||||
entry:
|
|
||||||
%.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
|
|
||||||
- %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)*, align 4
|
|
||||||
+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)*, align 4
|
|
||||||
store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
|
|
||||||
- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)*
|
|
||||||
- store <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4
|
|
||||||
- %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4
|
|
||||||
+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)*
|
|
||||||
+ store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4
|
|
||||||
+ %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 5
|
|
||||||
%0 = load i8, i8 addrspace(4)* %block.capture.addr, align 4
|
|
||||||
%conv = sext i8 %0 to i32
|
|
||||||
- %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 2
|
|
||||||
+ %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3
|
|
||||||
%1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr1, align 4
|
|
||||||
- %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3
|
|
||||||
+ %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4
|
|
||||||
%2 = load i32, i32 addrspace(4)* %block.capture.addr2, align 4
|
|
||||||
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 %2
|
|
||||||
store i32 %conv, i32 addrspace(1)* %arrayidx, align 4
|
|
||||||
@@ -243,19 +293,19 @@ declare i32 @__enqueue_kernel_basic(%opencl.queue_t*, i32, %struct.ndrange_t*, i
|
|
||||||
define internal spir_func void @__device_side_enqueue_block_invoke_2(i8 addrspace(4)* %.block_descriptor) #2 {
|
|
||||||
entry:
|
|
||||||
%.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
|
|
||||||
- %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4
|
|
||||||
+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4
|
|
||||||
store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
|
|
||||||
- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*
|
|
||||||
- store <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4
|
|
||||||
- %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4
|
|
||||||
+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*
|
|
||||||
+ store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4
|
|
||||||
+ %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 5
|
|
||||||
%0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr, align 4
|
|
||||||
- %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3
|
|
||||||
+ %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4
|
|
||||||
%1 = load i32, i32 addrspace(4)* %block.capture.addr1, align 4
|
|
||||||
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 %1
|
|
||||||
%2 = load i32, i32 addrspace(1)* %arrayidx, align 4
|
|
||||||
- %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 2
|
|
||||||
+ %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3
|
|
||||||
%3 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr2, align 4
|
|
||||||
- %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3
|
|
||||||
+ %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4
|
|
||||||
%4 = load i32, i32 addrspace(4)* %block.capture.addr3, align 4
|
|
||||||
%arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %3, i32 %4
|
|
||||||
store i32 %2, i32 addrspace(1)* %arrayidx4, align 4
|
|
||||||
@@ -276,11 +326,11 @@ define internal spir_func void @__device_side_enqueue_block_invoke_3(i8 addrspac
|
|
||||||
entry:
|
|
||||||
%.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
|
|
||||||
%p.addr = alloca i8 addrspace(3)*, align 4
|
|
||||||
- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4
|
|
||||||
+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
|
|
||||||
store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
|
|
||||||
- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
|
|
||||||
+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
|
|
||||||
store i8 addrspace(3)* %p, i8 addrspace(3)** %p.addr, align 4
|
|
||||||
- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4
|
|
||||||
+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -300,13 +350,13 @@ entry:
|
|
||||||
%p1.addr = alloca i8 addrspace(3)*, align 4
|
|
||||||
%p2.addr = alloca i8 addrspace(3)*, align 4
|
|
||||||
%p3.addr = alloca i8 addrspace(3)*, align 4
|
|
||||||
- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4
|
|
||||||
+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
|
|
||||||
store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
|
|
||||||
- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
|
|
||||||
+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
|
|
||||||
store i8 addrspace(3)* %p1, i8 addrspace(3)** %p1.addr, align 4
|
|
||||||
store i8 addrspace(3)* %p2, i8 addrspace(3)** %p2.addr, align 4
|
|
||||||
store i8 addrspace(3)* %p3, i8 addrspace(3)** %p3.addr, align 4
|
|
||||||
- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4
|
|
||||||
+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -329,27 +379,20 @@ declare i32 @__enqueue_kernel_varargs(%opencl.queue_t*, i32, %struct.ndrange_t*,
|
|
||||||
; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_3_kernel(i8 addrspace(4)*, i8 addrspace(3)*)
|
|
||||||
; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_4_kernel(i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)
|
|
||||||
|
|
||||||
-attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
||||||
+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
||||||
attributes #1 = { argmemonly nounwind }
|
|
||||||
-attributes #2 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
||||||
+attributes #2 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
||||||
attributes #3 = { nounwind }
|
|
||||||
|
|
||||||
!llvm.module.flags = !{!0}
|
|
||||||
-!opencl.enable.FP_CONTRACT = !{}
|
|
||||||
!opencl.ocl.version = !{!1}
|
|
||||||
!opencl.spir.version = !{!1}
|
|
||||||
-!opencl.used.extensions = !{!2}
|
|
||||||
-!opencl.used.optional.core.features = !{!2}
|
|
||||||
-!opencl.compiler.options = !{!2}
|
|
||||||
-!llvm.ident = !{!3}
|
|
||||||
+!llvm.ident = !{!2}
|
|
||||||
|
|
||||||
!0 = !{i32 1, !"wchar_size", i32 4}
|
|
||||||
!1 = !{i32 2, i32 0}
|
|
||||||
-!2 = !{}
|
|
||||||
-!3 = !{!"clang version 7.0.0"}
|
|
||||||
-!4 = !{i32 1, i32 1, i32 0, i32 0}
|
|
||||||
-!5 = !{!"none", !"none", !"none", !"none"}
|
|
||||||
-!6 = !{!"int*", !"int*", !"int", !"char"}
|
|
||||||
-!7 = !{!"", !"", !"", !""}
|
|
||||||
-!8 = !{i1 false, i1 false, i1 false, i1 false}
|
|
||||||
-!9 = !{i32 0, i32 0, i32 0, i32 0}
|
|
||||||
+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"}
|
|
||||||
+!3 = !{i32 1, i32 1, i32 0, i32 0}
|
|
||||||
+!4 = !{!"none", !"none", !"none", !"none"}
|
|
||||||
+!5 = !{!"int*", !"int*", !"int", !"char"}
|
|
||||||
+!6 = !{!"", !"", !"", !""}
|
|
||||||
--
|
|
||||||
2.7.4
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
From 7bbd0058362ac3bb5edd7a82d43e1785810776b3 Mon Sep 17 00:00:00 2001
|
From 559fb8f82295ec4dc64a132b6566939b85c1b6fe Mon Sep 17 00:00:00 2001
|
||||||
From: Anuj Mittal <anuj.mittal@intel.com>
|
From: Anuj Mittal <anuj.mittal@intel.com>
|
||||||
Date: Fri, 29 Mar 2019 08:56:53 +0800
|
Date: Thu, 15 Aug 2019 22:34:31 +0800
|
||||||
Subject: [PATCH] dont export targets for binaries
|
Subject: [PATCH] dont export targets for binaries
|
||||||
|
|
||||||
The projects using LLVM cmake modules look for target binaries in
|
The projects using LLVM cmake modules look for target binaries in
|
||||||
|
@ -10,29 +10,30 @@ either.
|
||||||
Upstream-Status: Inappropriate [cross-compile specific]
|
Upstream-Status: Inappropriate [cross-compile specific]
|
||||||
|
|
||||||
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
|
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
|
||||||
|
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||||
---
|
---
|
||||||
llvm/cmake/modules/AddLLVM.cmake | 9 ---------
|
llvm/cmake/modules/AddLLVM.cmake | 9 ---------
|
||||||
llvm/cmake/modules/TableGen.cmake | 6 ------
|
llvm/cmake/modules/TableGen.cmake | 6 ------
|
||||||
2 files changed, 15 deletions(-)
|
2 files changed, 15 deletions(-)
|
||||||
|
|
||||||
diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake
|
diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake
|
||||||
index 0df6845..b79f4fa 100644
|
index 619e986b8aa..d2bc1a25dd9 100644
|
||||||
--- a/llvm/cmake/modules/AddLLVM.cmake
|
--- a/llvm/cmake/modules/AddLLVM.cmake
|
||||||
+++ b/llvm/cmake/modules/AddLLVM.cmake
|
+++ b/llvm/cmake/modules/AddLLVM.cmake
|
||||||
@@ -866,12 +866,6 @@ macro(add_llvm_tool name)
|
@@ -898,12 +898,6 @@ macro(add_llvm_tool name)
|
||||||
|
|
||||||
if ( ${name} IN_LIST LLVM_TOOLCHAIN_TOOLS OR NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
|
if ( ${name} IN_LIST LLVM_TOOLCHAIN_TOOLS OR NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
|
||||||
if( LLVM_BUILD_TOOLS )
|
if( LLVM_BUILD_TOOLS )
|
||||||
|
- set(export_to_llvmexports)
|
||||||
- if(${name} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR
|
- if(${name} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR
|
||||||
- NOT LLVM_DISTRIBUTION_COMPONENTS)
|
- NOT LLVM_DISTRIBUTION_COMPONENTS)
|
||||||
- set(export_to_llvmexports EXPORT LLVMExports)
|
- set(export_to_llvmexports EXPORT LLVMExports)
|
||||||
- set_property(GLOBAL PROPERTY LLVM_HAS_EXPORTS True)
|
- set_property(GLOBAL PROPERTY LLVM_HAS_EXPORTS True)
|
||||||
- endif()
|
- endif()
|
||||||
-
|
|
||||||
install(TARGETS ${name}
|
install(TARGETS ${name}
|
||||||
${export_to_llvmexports}
|
${export_to_llvmexports}
|
||||||
RUNTIME DESTINATION ${LLVM_TOOLS_INSTALL_DIR}
|
@@ -917,9 +911,6 @@ macro(add_llvm_tool name)
|
||||||
@@ -884,9 +878,6 @@ macro(add_llvm_tool name)
|
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
@ -43,18 +44,19 @@ index 0df6845..b79f4fa 100644
|
||||||
endmacro(add_llvm_tool name)
|
endmacro(add_llvm_tool name)
|
||||||
|
|
||||||
diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake
|
diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake
|
||||||
index 3c84ae7..141219f 100644
|
index 36c026b5c0f..537acd696d8 100644
|
||||||
--- a/llvm/cmake/modules/TableGen.cmake
|
--- a/llvm/cmake/modules/TableGen.cmake
|
||||||
+++ b/llvm/cmake/modules/TableGen.cmake
|
+++ b/llvm/cmake/modules/TableGen.cmake
|
||||||
@@ -164,14 +164,8 @@ macro(add_tablegen target project)
|
@@ -148,15 +148,9 @@ macro(add_tablegen target project)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (${project} STREQUAL LLVM AND NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
|
if (${project} STREQUAL LLVM AND NOT LLVM_INSTALL_TOOLCHAIN_ONLY AND LLVM_BUILD_UTILS)
|
||||||
|
- set(export_to_llvmexports)
|
||||||
- if(${target} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR
|
- if(${target} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR
|
||||||
- NOT LLVM_DISTRIBUTION_COMPONENTS)
|
- NOT LLVM_DISTRIBUTION_COMPONENTS)
|
||||||
- set(export_to_llvmexports EXPORT LLVMExports)
|
- set(export_to_llvmexports EXPORT LLVMExports)
|
||||||
- endif()
|
- endif()
|
||||||
-
|
|
||||||
install(TARGETS ${target}
|
install(TARGETS ${target}
|
||||||
${export_to_llvmexports}
|
${export_to_llvmexports}
|
||||||
RUNTIME DESTINATION ${LLVM_TOOLS_INSTALL_DIR})
|
RUNTIME DESTINATION ${LLVM_TOOLS_INSTALL_DIR})
|
||||||
|
@ -62,5 +64,5 @@ index 3c84ae7..141219f 100644
|
||||||
- set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${target})
|
- set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${target})
|
||||||
endmacro()
|
endmacro()
|
||||||
--
|
--
|
||||||
2.7.4
|
2.17.1
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
From 91db4c3cf7f290a3cab5caa316fc25a60dd409f1 Mon Sep 17 00:00:00 2001
|
From 48e50f06b1bbed94cdf5207587161d4bfce7366e Mon Sep 17 00:00:00 2001
|
||||||
From: Anuj Mittal <anuj.mittal@intel.com>
|
From: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||||
Date: Fri, 16 Aug 2019 20:25:16 +0800
|
Date: Wed, 21 Aug 2019 14:35:31 +0800
|
||||||
Subject: [PATCH] llvm-spirv: skip including tests
|
Subject: [PATCH] llvm-spirv: skip building tests
|
||||||
|
|
||||||
Some of these need clang to be built and since we're building this in-tree,
|
Some of these need clang to be built and since we're building this in-tree,
|
||||||
that leads to problems when compiling libcxx, compiler-rt which aren't built
|
that leads to problems when compiling libcxx, compiler-rt which aren't built
|
||||||
|
@ -13,12 +13,13 @@ all components, disable the building of tests altogether.
|
||||||
Upstream-Status: Inappropriate
|
Upstream-Status: Inappropriate
|
||||||
|
|
||||||
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
|
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
|
||||||
|
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||||
---
|
---
|
||||||
CMakeLists.txt | 10 ----------
|
CMakeLists.txt | 10 ----------
|
||||||
1 file changed, 10 deletions(-)
|
1 file changed, 10 deletions(-)
|
||||||
|
|
||||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||||
index d632a50..81ddf62 100644
|
index 1208741..20ca3e6 100644
|
||||||
--- a/CMakeLists.txt
|
--- a/CMakeLists.txt
|
||||||
+++ b/CMakeLists.txt
|
+++ b/CMakeLists.txt
|
||||||
@@ -15,13 +15,6 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
@@ -15,13 +15,6 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
||||||
|
@ -32,7 +33,7 @@ index d632a50..81ddf62 100644
|
||||||
- )
|
- )
|
||||||
- endif(LLVM_INCLUDE_TESTS)
|
- endif(LLVM_INCLUDE_TESTS)
|
||||||
-
|
-
|
||||||
find_package(LLVM 8.0.0 REQUIRED
|
find_package(LLVM 9.0.0 REQUIRED
|
||||||
COMPONENTS
|
COMPONENTS
|
||||||
Analysis
|
Analysis
|
||||||
@@ -56,9 +49,6 @@ set(LLVM_SPIRV_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include)
|
@@ -56,9 +49,6 @@ set(LLVM_SPIRV_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include)
|
||||||
|
@ -46,5 +47,5 @@ index d632a50..81ddf62 100644
|
||||||
install(
|
install(
|
||||||
FILES
|
FILES
|
||||||
--
|
--
|
||||||
2.7.4
|
2.17.1
|
||||||
|
|
||||||
|
|
|
@ -1,294 +0,0 @@
|
||||||
From c94ec28600255098ffb9d73d1b386a7c8a535590 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Andrew Savonichev <andrew.savonichev@intel.com>
|
|
||||||
Date: Thu, 21 Feb 2019 11:02:10 +0000
|
|
||||||
Subject: [PATCH 2/2] [OpenCL] Simplify LLVM IR generated for OpenCL blocks
|
|
||||||
|
|
||||||
Summary:
|
|
||||||
Emit direct call of block invoke functions when possible, i.e. in case the
|
|
||||||
block is not passed as a function argument.
|
|
||||||
Also doing some refactoring of `CodeGenFunction::EmitBlockCallExpr()`
|
|
||||||
|
|
||||||
Reviewers: Anastasia, yaxunl, svenvh
|
|
||||||
|
|
||||||
Reviewed By: Anastasia
|
|
||||||
|
|
||||||
Subscribers: cfe-commits
|
|
||||||
|
|
||||||
Tags: #clang
|
|
||||||
|
|
||||||
Differential Revision: https://reviews.llvm.org/D58388
|
|
||||||
|
|
||||||
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354568 91177308-0d34-0410-b5e6-96231b3b80d8
|
|
||||||
|
|
||||||
Upstream-Status: Backport
|
|
||||||
[https://github.com/llvm-mirror/clang/commit/eae71f8d05ce550c4e2595c9b7082cc2c7882c58]
|
|
||||||
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
|
|
||||||
---
|
|
||||||
lib/CodeGen/CGBlocks.cpp | 77 +++++++++++++-------------
|
|
||||||
lib/CodeGen/CGOpenCLRuntime.cpp | 30 +++++++---
|
|
||||||
lib/CodeGen/CGOpenCLRuntime.h | 4 ++
|
|
||||||
test/CodeGenOpenCL/blocks.cl | 10 +---
|
|
||||||
test/CodeGenOpenCL/cl20-device-side-enqueue.cl | 34 +++++++++---
|
|
||||||
5 files changed, 91 insertions(+), 64 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp
|
|
||||||
index fa3c3ee..10a0238 100644
|
|
||||||
--- a/lib/CodeGen/CGBlocks.cpp
|
|
||||||
+++ b/lib/CodeGen/CGBlocks.cpp
|
|
||||||
@@ -1261,52 +1261,49 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
|
|
||||||
ReturnValueSlot ReturnValue) {
|
|
||||||
const BlockPointerType *BPT =
|
|
||||||
E->getCallee()->getType()->getAs<BlockPointerType>();
|
|
||||||
-
|
|
||||||
llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee());
|
|
||||||
-
|
|
||||||
- // Get a pointer to the generic block literal.
|
|
||||||
- // For OpenCL we generate generic AS void ptr to be able to reuse the same
|
|
||||||
- // block definition for blocks with captures generated as private AS local
|
|
||||||
- // variables and without captures generated as global AS program scope
|
|
||||||
- // variables.
|
|
||||||
- unsigned AddrSpace = 0;
|
|
||||||
- if (getLangOpts().OpenCL)
|
|
||||||
- AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic);
|
|
||||||
-
|
|
||||||
- llvm::Type *BlockLiteralTy =
|
|
||||||
- llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace);
|
|
||||||
-
|
|
||||||
- // Bitcast the callee to a block literal.
|
|
||||||
- BlockPtr =
|
|
||||||
- Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal");
|
|
||||||
-
|
|
||||||
- // Get the function pointer from the literal.
|
|
||||||
- llvm::Value *FuncPtr =
|
|
||||||
- Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr,
|
|
||||||
- CGM.getLangOpts().OpenCL ? 2 : 3);
|
|
||||||
-
|
|
||||||
- // Add the block literal.
|
|
||||||
+ llvm::Type *GenBlockTy = CGM.getGenericBlockLiteralType();
|
|
||||||
+ llvm::Value *Func = nullptr;
|
|
||||||
+ QualType FnType = BPT->getPointeeType();
|
|
||||||
+ ASTContext &Ctx = getContext();
|
|
||||||
CallArgList Args;
|
|
||||||
|
|
||||||
- QualType VoidPtrQualTy = getContext().VoidPtrTy;
|
|
||||||
- llvm::Type *GenericVoidPtrTy = VoidPtrTy;
|
|
||||||
if (getLangOpts().OpenCL) {
|
|
||||||
- GenericVoidPtrTy = CGM.getOpenCLRuntime().getGenericVoidPointerType();
|
|
||||||
- VoidPtrQualTy =
|
|
||||||
- getContext().getPointerType(getContext().getAddrSpaceQualType(
|
|
||||||
- getContext().VoidTy, LangAS::opencl_generic));
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- BlockPtr = Builder.CreatePointerCast(BlockPtr, GenericVoidPtrTy);
|
|
||||||
- Args.add(RValue::get(BlockPtr), VoidPtrQualTy);
|
|
||||||
-
|
|
||||||
- QualType FnType = BPT->getPointeeType();
|
|
||||||
+ // For OpenCL, BlockPtr is already casted to generic block literal.
|
|
||||||
+
|
|
||||||
+ // First argument of a block call is a generic block literal casted to
|
|
||||||
+ // generic void pointer, i.e. i8 addrspace(4)*
|
|
||||||
+ llvm::Value *BlockDescriptor = Builder.CreatePointerCast(
|
|
||||||
+ BlockPtr, CGM.getOpenCLRuntime().getGenericVoidPointerType());
|
|
||||||
+ QualType VoidPtrQualTy = Ctx.getPointerType(
|
|
||||||
+ Ctx.getAddrSpaceQualType(Ctx.VoidTy, LangAS::opencl_generic));
|
|
||||||
+ Args.add(RValue::get(BlockDescriptor), VoidPtrQualTy);
|
|
||||||
+ // And the rest of the arguments.
|
|
||||||
+ EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
|
|
||||||
+
|
|
||||||
+ // We *can* call the block directly unless it is a function argument.
|
|
||||||
+ if (!isa<ParmVarDecl>(E->getCalleeDecl()))
|
|
||||||
+ Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee());
|
|
||||||
+ else {
|
|
||||||
+ llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 2);
|
|
||||||
+ Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
|
|
||||||
+ }
|
|
||||||
+ } else {
|
|
||||||
+ // Bitcast the block literal to a generic block literal.
|
|
||||||
+ BlockPtr = Builder.CreatePointerCast(
|
|
||||||
+ BlockPtr, llvm::PointerType::get(GenBlockTy, 0), "block.literal");
|
|
||||||
+ // Get pointer to the block invoke function
|
|
||||||
+ llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 3);
|
|
||||||
|
|
||||||
- // And the rest of the arguments.
|
|
||||||
- EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
|
|
||||||
+ // First argument is a block literal casted to a void pointer
|
|
||||||
+ BlockPtr = Builder.CreatePointerCast(BlockPtr, VoidPtrTy);
|
|
||||||
+ Args.add(RValue::get(BlockPtr), Ctx.VoidPtrTy);
|
|
||||||
+ // And the rest of the arguments.
|
|
||||||
+ EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
|
|
||||||
|
|
||||||
- // Load the function.
|
|
||||||
- llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
|
|
||||||
+ // Load the function.
|
|
||||||
+ Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
|
|
||||||
+ }
|
|
||||||
|
|
||||||
const FunctionType *FuncTy = FnType->castAs<FunctionType>();
|
|
||||||
const CGFunctionInfo &FnInfo =
|
|
||||||
diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp
|
|
||||||
index 7f6f595..75003e5 100644
|
|
||||||
--- a/lib/CodeGen/CGOpenCLRuntime.cpp
|
|
||||||
+++ b/lib/CodeGen/CGOpenCLRuntime.cpp
|
|
||||||
@@ -123,6 +123,23 @@ llvm::PointerType *CGOpenCLRuntime::getGenericVoidPointerType() {
|
|
||||||
CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
|
|
||||||
}
|
|
||||||
|
|
||||||
+// Get the block literal from an expression derived from the block expression.
|
|
||||||
+// OpenCL v2.0 s6.12.5:
|
|
||||||
+// Block variable declarations are implicitly qualified with const. Therefore
|
|
||||||
+// all block variables must be initialized at declaration time and may not be
|
|
||||||
+// reassigned.
|
|
||||||
+static const BlockExpr *getBlockExpr(const Expr *E) {
|
|
||||||
+ const Expr *Prev = nullptr; // to make sure we do not stuck in infinite loop.
|
|
||||||
+ while(!isa<BlockExpr>(E) && E != Prev) {
|
|
||||||
+ Prev = E;
|
|
||||||
+ E = E->IgnoreCasts();
|
|
||||||
+ if (auto DR = dyn_cast<DeclRefExpr>(E)) {
|
|
||||||
+ E = cast<VarDecl>(DR->getDecl())->getInit();
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ return cast<BlockExpr>(E);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/// Record emitted llvm invoke function and llvm block literal for the
|
|
||||||
/// corresponding block expression.
|
|
||||||
void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
|
|
||||||
@@ -137,20 +154,17 @@ void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
|
|
||||||
EnqueuedBlockMap[E].Kernel = nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
+llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) {
|
|
||||||
+ return EnqueuedBlockMap[getBlockExpr(E)].InvokeFunc;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
CGOpenCLRuntime::EnqueuedBlockInfo
|
|
||||||
CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) {
|
|
||||||
CGF.EmitScalarExpr(E);
|
|
||||||
|
|
||||||
// The block literal may be assigned to a const variable. Chasing down
|
|
||||||
// to get the block literal.
|
|
||||||
- if (auto DR = dyn_cast<DeclRefExpr>(E)) {
|
|
||||||
- E = cast<VarDecl>(DR->getDecl())->getInit();
|
|
||||||
- }
|
|
||||||
- E = E->IgnoreImplicit();
|
|
||||||
- if (auto Cast = dyn_cast<CastExpr>(E)) {
|
|
||||||
- E = Cast->getSubExpr();
|
|
||||||
- }
|
|
||||||
- auto *Block = cast<BlockExpr>(E);
|
|
||||||
+ const BlockExpr *Block = getBlockExpr(E);
|
|
||||||
|
|
||||||
assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() &&
|
|
||||||
"Block expression not emitted");
|
|
||||||
diff --git a/lib/CodeGen/CGOpenCLRuntime.h b/lib/CodeGen/CGOpenCLRuntime.h
|
|
||||||
index 750721f..4effc7e 100644
|
|
||||||
--- a/lib/CodeGen/CGOpenCLRuntime.h
|
|
||||||
+++ b/lib/CodeGen/CGOpenCLRuntime.h
|
|
||||||
@@ -92,6 +92,10 @@ public:
|
|
||||||
/// \param Block block literal emitted for the block expression.
|
|
||||||
void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF,
|
|
||||||
llvm::Value *Block);
|
|
||||||
+
|
|
||||||
+ /// \return LLVM block invoke function emitted for an expression derived from
|
|
||||||
+ /// the block expression.
|
|
||||||
+ llvm::Function *getInvokeFunction(const Expr *E);
|
|
||||||
};
|
|
||||||
|
|
||||||
}
|
|
||||||
diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl
|
|
||||||
index 19aacc3..ab5a2c6 100644
|
|
||||||
--- a/test/CodeGenOpenCL/blocks.cl
|
|
||||||
+++ b/test/CodeGenOpenCL/blocks.cl
|
|
||||||
@@ -39,11 +39,8 @@ void foo(){
|
|
||||||
// SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
|
|
||||||
// SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]],
|
|
||||||
// SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]]
|
|
||||||
- // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2
|
|
||||||
// SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
|
|
||||||
- // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]]
|
|
||||||
- // SPIR: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)*
|
|
||||||
- // SPIR: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]])
|
|
||||||
+ // SPIR: call {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %[[blk_gen_ptr]])
|
|
||||||
// AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 2
|
|
||||||
// AMDGCN: store i8* bitcast (i32 (i8*)* @__foo_block_invoke to i8*), i8* addrspace(5)* %[[block_invoke]]
|
|
||||||
// AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3
|
|
||||||
@@ -53,11 +50,8 @@ void foo(){
|
|
||||||
// AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to %struct.__opencl_block_literal_generic*
|
|
||||||
// AMDGCN: store %struct.__opencl_block_literal_generic* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B:.*]],
|
|
||||||
// AMDGCN: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic*, %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B]]
|
|
||||||
- // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2
|
|
||||||
// AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8*
|
|
||||||
- // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]]
|
|
||||||
- // AMDGCN: %[[invoke_func:.*]] = bitcast i8* %[[invoke_func_ptr]] to i32 (i8*)*
|
|
||||||
- // AMDGCN: call {{.*}}i32 %[[invoke_func]](i8* %[[blk_gen_ptr]])
|
|
||||||
+ // AMDGCN: call {{.*}}i32 @__foo_block_invoke(i8* %[[blk_gen_ptr]])
|
|
||||||
|
|
||||||
int (^ block_B)(void) = ^{
|
|
||||||
return i;
|
|
||||||
diff --git a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
|
|
||||||
index 8445016..1566912 100644
|
|
||||||
--- a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
|
|
||||||
+++ b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
|
|
||||||
@@ -312,9 +312,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
|
|
||||||
};
|
|
||||||
|
|
||||||
// Uses global block literal [[BLG8]] and invoke function [[INVG8]].
|
|
||||||
- // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
|
|
||||||
- // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
|
|
||||||
- // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
|
|
||||||
+ // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
|
|
||||||
block_A();
|
|
||||||
|
|
||||||
// Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]].
|
|
||||||
@@ -333,15 +331,35 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
|
|
||||||
unsigned size = get_kernel_work_group_size(block_A);
|
|
||||||
|
|
||||||
// Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted.
|
|
||||||
- // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
|
|
||||||
- // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
|
|
||||||
- // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
|
|
||||||
+ // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
|
|
||||||
block_A();
|
|
||||||
|
|
||||||
+ // Make sure that block invoke function is resolved correctly after sequence of assignements.
|
|
||||||
+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)*
|
|
||||||
+ // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)*
|
|
||||||
+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*)
|
|
||||||
+ // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*),
|
|
||||||
+ // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b1,
|
|
||||||
+ bl_t b1 = block_G;
|
|
||||||
+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)*
|
|
||||||
+ // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)*
|
|
||||||
+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*)
|
|
||||||
+ // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*),
|
|
||||||
+ // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b2,
|
|
||||||
+ bl_t b2 = b1;
|
|
||||||
+ // COMMON: call spir_func void @block_G_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)*
|
|
||||||
+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*)
|
|
||||||
+ // COOMON-SAME: to i8 addrspace(4)*), i8 addrspace(3)* null)
|
|
||||||
+ b2(0);
|
|
||||||
+ // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]].
|
|
||||||
+ // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl(
|
|
||||||
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
|
|
||||||
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
|
|
||||||
+ size = get_kernel_preferred_work_group_size_multiple(b2);
|
|
||||||
+
|
|
||||||
void (^block_C)(void) = ^{
|
|
||||||
callee(i, a);
|
|
||||||
};
|
|
||||||
-
|
|
||||||
// Emits block literal on stack and block kernel [[INVLK3]].
|
|
||||||
// COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
|
|
||||||
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
|
|
||||||
@@ -404,8 +422,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
|
|
||||||
// COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}})
|
|
||||||
// COMMON: define internal spir_func void [[INVG9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}})
|
|
||||||
// COMMON: define internal spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}})
|
|
||||||
+// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
|
|
||||||
// COMMON: define internal spir_kernel void [[INVLK3]](i8 addrspace(4)*{{.*}})
|
|
||||||
// COMMON: define internal spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
|
|
||||||
-// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
|
|
||||||
// COMMON: define internal spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}})
|
|
||||||
// COMMON: define internal spir_kernel void [[INVGK11]](i8 addrspace(4)*{{.*}})
|
|
||||||
--
|
|
||||||
1.8.3.1
|
|
||||||
|
|
|
@ -1,27 +0,0 @@
|
||||||
From a2c093c8db7b4e3a5612d0fcce9e4fd1756d6e4b Mon Sep 17 00:00:00 2001
|
|
||||||
From: Alexey Sotkin <alexey.sotkin@intel.com>
|
|
||||||
Date: Mon, 5 Aug 2019 18:18:01 +0300
|
|
||||||
Subject: [PATCH] Remove extra semicolon
|
|
||||||
|
|
||||||
Upstream-Status: Backport
|
|
||||||
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
|
|
||||||
---
|
|
||||||
lib/SPIRV/libSPIRV/SPIRVEnum.h | 2 +-
|
|
||||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/lib/SPIRV/libSPIRV/SPIRVEnum.h b/lib/SPIRV/libSPIRV/SPIRVEnum.h
|
|
||||||
index c93a484..3a071e7 100644
|
|
||||||
--- a/lib/SPIRV/libSPIRV/SPIRVEnum.h
|
|
||||||
+++ b/lib/SPIRV/libSPIRV/SPIRVEnum.h
|
|
||||||
@@ -124,7 +124,7 @@ template <> inline void SPIRVMap<SPIRVExtensionKind, std::string>::init() {
|
|
||||||
add(SPV_INTEL_device_side_avc_motion_estimation,
|
|
||||||
"SPV_INTEL_device_side_avc_motion_estimation");
|
|
||||||
add(SPV_KHR_no_integer_wrap_decoration, "SPV_KHR_no_integer_wrap_decoration");
|
|
||||||
-};
|
|
||||||
+}
|
|
||||||
|
|
||||||
template <> inline void SPIRVMap<SPIRVExtInstSetKind, std::string>::init() {
|
|
||||||
add(SPIRVEIS_OpenCL, "OpenCL.std");
|
|
||||||
--
|
|
||||||
2.7.4
|
|
||||||
|
|
|
@ -1,61 +0,0 @@
|
||||||
From 29e2813a2ab7d5569860bb07892dfef7b5374d96 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Yaxun Liu <Yaxun.Liu@amd.com>
|
|
||||||
Date: Tue, 26 Feb 2019 16:20:41 +0000
|
|
||||||
Subject: [PATCH] [OpenCL] Fix assertion due to blocks
|
|
||||||
|
|
||||||
A recent change caused assertion in CodeGenFunction::EmitBlockCallExpr when a block is called.
|
|
||||||
|
|
||||||
There is code
|
|
||||||
|
|
||||||
Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee());
|
|
||||||
getCalleeDecl calls Expr::getReferencedDeclOfCallee, which does not handle
|
|
||||||
BlockExpr and returns nullptr, which causes isa to assert.
|
|
||||||
|
|
||||||
This patch fixes that.
|
|
||||||
|
|
||||||
Differential Revision: https://reviews.llvm.org/D58658
|
|
||||||
|
|
||||||
|
|
||||||
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354893 91177308-0d34-0410-b5e6-96231b3b80d8
|
|
||||||
|
|
||||||
Upstream-Status: Backport
|
|
||||||
[https://github.com/llvm-mirror/clang/commit/29e2813a2ab7d5569860bb07892dfef7b5374d96]
|
|
||||||
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
|
|
||||||
---
|
|
||||||
lib/AST/Expr.cpp | 2 ++
|
|
||||||
test/CodeGenOpenCL/blocks.cl | 6 ++++++
|
|
||||||
2 files changed, 8 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp
|
|
||||||
index aef1eab..85690c7 100644
|
|
||||||
--- a/lib/AST/Expr.cpp
|
|
||||||
+++ b/lib/AST/Expr.cpp
|
|
||||||
@@ -1358,6 +1358,8 @@ Decl *Expr::getReferencedDeclOfCallee() {
|
|
||||||
return DRE->getDecl();
|
|
||||||
if (MemberExpr *ME = dyn_cast<MemberExpr>(CEE))
|
|
||||||
return ME->getMemberDecl();
|
|
||||||
+ if (auto *BE = dyn_cast<BlockExpr>(CEE))
|
|
||||||
+ return BE->getBlockDecl();
|
|
||||||
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl
|
|
||||||
index ab5a2c6..c3e2685 100644
|
|
||||||
--- a/test/CodeGenOpenCL/blocks.cl
|
|
||||||
+++ b/test/CodeGenOpenCL/blocks.cl
|
|
||||||
@@ -90,6 +90,12 @@ int get42() {
|
|
||||||
return blockArgFunc(^{return 42;});
|
|
||||||
}
|
|
||||||
|
|
||||||
+// COMMON-LABEL: define {{.*}}@call_block
|
|
||||||
+// call {{.*}}@__call_block_block_invoke
|
|
||||||
+int call_block() {
|
|
||||||
+ return ^int(int num) { return num; } (11);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
// CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__size"
|
|
||||||
// CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__align"
|
|
||||||
|
|
||||||
--
|
|
||||||
1.8.3.1
|
|
||||||
|
|
|
@ -0,0 +1,111 @@
|
||||||
|
From eeb816d95f0910bd246e37bb2bb3923acf0edf6b Mon Sep 17 00:00:00 2001
|
||||||
|
From: Aleksander Us <aleksander.us@intel.com>
|
||||||
|
Date: Mon, 26 Aug 2019 15:47:41 +0300
|
||||||
|
Subject: [PATCH] [BasicBlockUtils] Add metadata fixing in
|
||||||
|
SplitBlockPredecessors.
|
||||||
|
|
||||||
|
In case when BB is header of some loop and predecessor is latch of
|
||||||
|
this loop, metadata was not attached to newly created basic block.
|
||||||
|
This led to loss of loop metadata for other passes.
|
||||||
|
|
||||||
|
Upstream-Status: Submitted [https://reviews.llvm.org/D66892]
|
||||||
|
|
||||||
|
https://github.com/intel/llvm-patches/commit/8af4449e2d201707f7f2f832b473a0439e255f32
|
||||||
|
|
||||||
|
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||||
|
---
|
||||||
|
lib/Transforms/Utils/BasicBlockUtils.cpp | 23 ++++++++----
|
||||||
|
test/Transforms/LoopSimplify/loop_metadata.ll | 36 +++++++++++++++++++
|
||||||
|
2 files changed, 52 insertions(+), 7 deletions(-)
|
||||||
|
create mode 100644 test/Transforms/LoopSimplify/loop_metadata.ll
|
||||||
|
|
||||||
|
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
|
||||||
|
index 5fa371377c8..3a90ae061fb 100644
|
||||||
|
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
|
||||||
|
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
|
||||||
|
@@ -579,24 +579,33 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
|
||||||
|
|
||||||
|
// The new block unconditionally branches to the old block.
|
||||||
|
BranchInst *BI = BranchInst::Create(BB, NewBB);
|
||||||
|
+ bool IsBBHeader = LI && LI->isLoopHeader(BB);
|
||||||
|
+ Loop *BBLoop = LI ? LI->getLoopFor(BB) : nullptr;
|
||||||
|
// Splitting the predecessors of a loop header creates a preheader block.
|
||||||
|
- if (LI && LI->isLoopHeader(BB))
|
||||||
|
+ if (IsBBHeader)
|
||||||
|
// Using the loop start line number prevents debuggers stepping into the
|
||||||
|
// loop body for this instruction.
|
||||||
|
- BI->setDebugLoc(LI->getLoopFor(BB)->getStartLoc());
|
||||||
|
+ BI->setDebugLoc(BBLoop->getStartLoc());
|
||||||
|
else
|
||||||
|
BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc());
|
||||||
|
|
||||||
|
// Move the edges from Preds to point to NewBB instead of BB.
|
||||||
|
- for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
|
||||||
|
+ for (BasicBlock *Pred : Preds) {
|
||||||
|
+ Instruction *PI = Pred->getTerminator();
|
||||||
|
// This is slightly more strict than necessary; the minimum requirement
|
||||||
|
// is that there be no more than one indirectbr branching to BB. And
|
||||||
|
// all BlockAddress uses would need to be updated.
|
||||||
|
- assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
|
||||||
|
+ assert(!isa<IndirectBrInst>(PI) &&
|
||||||
|
"Cannot split an edge from an IndirectBrInst");
|
||||||
|
- assert(!isa<CallBrInst>(Preds[i]->getTerminator()) &&
|
||||||
|
- "Cannot split an edge from a CallBrInst");
|
||||||
|
- Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
|
||||||
|
+ assert(!isa<CallBrInst>(PI) && "Cannot split an edge from a CallBrInst");
|
||||||
|
+ if (IsBBHeader && BBLoop->contains(Pred) && BBLoop->isLoopLatch(Pred)) {
|
||||||
|
+ // Update loop metadata if it exists.
|
||||||
|
+ if (MDNode *LoopMD = PI->getMetadata(LLVMContext::MD_loop)) {
|
||||||
|
+ BI->setMetadata(LLVMContext::MD_loop, LoopMD);
|
||||||
|
+ PI->setMetadata(LLVMContext::MD_loop, nullptr);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ PI->replaceUsesOfWith(BB, NewBB);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
|
||||||
|
diff --git a/test/Transforms/LoopSimplify/loop_metadata.ll b/test/Transforms/LoopSimplify/loop_metadata.ll
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000000..c15c92fe3ae
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/test/Transforms/LoopSimplify/loop_metadata.ll
|
||||||
|
@@ -0,0 +1,36 @@
|
||||||
|
+; RUN: opt -S -loop-simplify < %s | FileCheck %s
|
||||||
|
+
|
||||||
|
+; CHECK: for.cond.loopexit:
|
||||||
|
+; CHECK: br label %for.cond, !llvm.loop !0
|
||||||
|
+; CHECK: br i1 %cmp1, label %for.body1, label %for.cond.loopexit
|
||||||
|
+
|
||||||
|
+define void @foo() {
|
||||||
|
+entry:
|
||||||
|
+ br label %for.cond
|
||||||
|
+
|
||||||
|
+for.cond: ; preds = %for.cond1, %entry
|
||||||
|
+ %j = phi i32 [ 0, %entry ], [ %add, %for.cond1 ]
|
||||||
|
+ %cmp = icmp ult i32 %j, 8
|
||||||
|
+ br i1 %cmp, label %for.body, label %for.end
|
||||||
|
+
|
||||||
|
+for.body: ; preds = %for.cond
|
||||||
|
+ %dummy1 = add i32 1, 1
|
||||||
|
+ %add = add nuw nsw i32 %j, 1
|
||||||
|
+ br label %for.cond1
|
||||||
|
+
|
||||||
|
+for.cond1: ; preds = %for.body1, %for.body
|
||||||
|
+ %i.0 = phi i32 [ 1, %for.body ], [ %inc, %for.body1 ]
|
||||||
|
+ %cmp1 = icmp ult i32 %i.0, 8
|
||||||
|
+ br i1 %cmp1, label %for.body1, label %for.cond, !llvm.loop !0
|
||||||
|
+
|
||||||
|
+for.body1: ; preds = %for.cond1
|
||||||
|
+ %dummy2 = add i32 1, 1
|
||||||
|
+ %inc = add nuw nsw i32 %i.0, 1
|
||||||
|
+ br label %for.cond1
|
||||||
|
+
|
||||||
|
+for.end: ; preds = %for.cond
|
||||||
|
+ ret void
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+!0 = distinct !{!0, !1}
|
||||||
|
+!1 = !{!"llvm.loop.unroll.full"}
|
||||||
|
--
|
||||||
|
2.18.0
|
||||||
|
|
|
@ -0,0 +1,146 @@
|
||||||
|
From 35e218a886f4c066eabd18685240d55270bd5a6d Mon Sep 17 00:00:00 2001
|
||||||
|
From: Aleksander Us <aleksander.us@intel.com>
|
||||||
|
Date: Mon, 26 Aug 2019 15:45:47 +0300
|
||||||
|
Subject: [PATCH] [IndVarSimplify] Do not use SCEV expander for IVCount in
|
||||||
|
LFTR when possible.
|
||||||
|
|
||||||
|
SCEV analysis cannot properly cache instruction with poison flags
|
||||||
|
(for example, add nsw outside of loop will not be reused by expander).
|
||||||
|
This can lead to generating of additional instructions by SCEV expander.
|
||||||
|
|
||||||
|
Example IR:
|
||||||
|
|
||||||
|
...
|
||||||
|
%maxval = add nuw nsw i32 %a1, %a2
|
||||||
|
...
|
||||||
|
for.body:
|
||||||
|
...
|
||||||
|
%cmp22 = icmp ult i32 %ivadd, %maxval
|
||||||
|
br i1 %cmp22, label %for.body, label %for.end
|
||||||
|
...
|
||||||
|
|
||||||
|
SCEV expander will generate copy of %maxval in preheader but without
|
||||||
|
nuw/nsw flags. This can be avoided by explicit check that iv count
|
||||||
|
value gives the same SCEV expressions as calculated by LFTR.
|
||||||
|
|
||||||
|
Upstream-Status: Submitted [https://reviews.llvm.org/D66890]
|
||||||
|
|
||||||
|
https://github.com/intel/llvm-patches/commit/fd6a6c97341a56fd21bc32bc940afea751312e8f
|
||||||
|
|
||||||
|
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||||
|
---
|
||||||
|
lib/Transforms/Scalar/IndVarSimplify.cpp | 12 +++++++++-
|
||||||
|
test/Transforms/IndVarSimplify/add_nsw.ll | 23 ++++++++++++++++++++
|
||||||
|
test/Transforms/IndVarSimplify/lftr-reuse.ll | 9 +++-----
|
||||||
|
test/Transforms/IndVarSimplify/udiv.ll | 1 +
|
||||||
|
4 files changed, 38 insertions(+), 7 deletions(-)
|
||||||
|
create mode 100644 test/Transforms/IndVarSimplify/add_nsw.ll
|
||||||
|
|
||||||
|
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
|
||||||
|
index f9fc698a4a9..5e04dac8aa6 100644
|
||||||
|
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
|
||||||
|
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
|
||||||
|
@@ -2375,6 +2375,17 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
|
||||||
|
if (UsePostInc)
|
||||||
|
IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
|
||||||
|
|
||||||
|
+ // If computed limit is equal to old limit then do not use SCEV expander
|
||||||
|
+ // because it can lost NUW/NSW flags and create extra instructions.
|
||||||
|
+ BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
|
||||||
|
+ if (ICmpInst *Cmp = dyn_cast<ICmpInst>(BI->getOperand(0))) {
|
||||||
|
+ Value *Limit = Cmp->getOperand(0);
|
||||||
|
+ if (!L->isLoopInvariant(Limit))
|
||||||
|
+ Limit = Cmp->getOperand(1);
|
||||||
|
+ if (SE->getSCEV(Limit) == IVLimit)
|
||||||
|
+ return Limit;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
// Expand the code for the iteration count.
|
||||||
|
assert(SE->isLoopInvariant(IVLimit, L) &&
|
||||||
|
"Computed iteration count is not loop invariant!");
|
||||||
|
@@ -2383,7 +2394,6 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
|
||||||
|
// SCEV expression (IVInit) for a pointer type IV value (IndVar).
|
||||||
|
Type *LimitTy = ExitCount->getType()->isPointerTy() ?
|
||||||
|
IndVar->getType() : ExitCount->getType();
|
||||||
|
- BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
|
||||||
|
return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
diff --git a/test/Transforms/IndVarSimplify/add_nsw.ll b/test/Transforms/IndVarSimplify/add_nsw.ll
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000000..abd1cbb6c51
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/test/Transforms/IndVarSimplify/add_nsw.ll
|
||||||
|
@@ -0,0 +1,23 @@
|
||||||
|
+; RUN: opt -indvars -S %s | FileCheck %s
|
||||||
|
+
|
||||||
|
+target datalayout = "e-p:32:32-i64:64-n8:16:32"
|
||||||
|
+
|
||||||
|
+; CHECK: for.body.preheader:
|
||||||
|
+; CHECK-NOT: add
|
||||||
|
+; CHECK: for.body:
|
||||||
|
+
|
||||||
|
+define void @foo(i32 %a1, i32 %a2) {
|
||||||
|
+entry:
|
||||||
|
+ %maxval = add nuw nsw i32 %a1, %a2
|
||||||
|
+ %cmp = icmp slt i32 %maxval, 1
|
||||||
|
+ br i1 %cmp, label %for.end, label %for.body
|
||||||
|
+
|
||||||
|
+for.body: ; preds = %entry, %for.body
|
||||||
|
+ %j.02 = phi i32 [ 0, %entry ], [ %add31, %for.body ]
|
||||||
|
+ %add31 = add nuw nsw i32 %j.02, 1
|
||||||
|
+ %cmp22 = icmp slt i32 %add31, %maxval
|
||||||
|
+ br i1 %cmp22, label %for.body, label %for.end
|
||||||
|
+
|
||||||
|
+for.end: ; preds = %for.body
|
||||||
|
+ ret void
|
||||||
|
+}
|
||||||
|
diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll
|
||||||
|
index 14ae9738696..509d662b767 100644
|
||||||
|
--- a/test/Transforms/IndVarSimplify/lftr-reuse.ll
|
||||||
|
+++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll
|
||||||
|
@@ -67,11 +67,9 @@ define void @expandOuterRecurrence(i32 %arg) nounwind {
|
||||||
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]]
|
||||||
|
; CHECK-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]]
|
||||||
|
; CHECK: outer.preheader:
|
||||||
|
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG]], -1
|
||||||
|
; CHECK-NEXT: br label [[OUTER:%.*]]
|
||||||
|
; CHECK: outer:
|
||||||
|
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[TMP0]], [[OUTER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[OUTER_INC:%.*]] ]
|
||||||
|
-; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC]] ], [ 0, [[OUTER_PREHEADER]] ]
|
||||||
|
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ], [ 0, [[OUTER_PREHEADER]] ]
|
||||||
|
; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]]
|
||||||
|
; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1
|
||||||
|
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]]
|
||||||
|
@@ -81,14 +79,13 @@ define void @expandOuterRecurrence(i32 %arg) nounwind {
|
||||||
|
; CHECK: inner:
|
||||||
|
; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[INNER_PH]] ], [ [[J_INC:%.*]], [[INNER]] ]
|
||||||
|
; CHECK-NEXT: [[J_INC]] = add nuw nsw i32 [[J]], 1
|
||||||
|
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[INDVARS_IV]]
|
||||||
|
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[SUB3]]
|
||||||
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]]
|
||||||
|
; CHECK: outer.inc.loopexit:
|
||||||
|
; CHECK-NEXT: br label [[OUTER_INC]]
|
||||||
|
; CHECK: outer.inc:
|
||||||
|
; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1
|
||||||
|
-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], -1
|
||||||
|
-; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[TMP0]]
|
||||||
|
+; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[SUB1]]
|
||||||
|
; CHECK-NEXT: br i1 [[EXITCOND1]], label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]]
|
||||||
|
; CHECK: exit.loopexit:
|
||||||
|
; CHECK-NEXT: br label [[EXIT]]
|
||||||
|
diff --git a/test/Transforms/IndVarSimplify/udiv.ll b/test/Transforms/IndVarSimplify/udiv.ll
|
||||||
|
index b3f2c2a6a66..3530343ef4a 100644
|
||||||
|
--- a/test/Transforms/IndVarSimplify/udiv.ll
|
||||||
|
+++ b/test/Transforms/IndVarSimplify/udiv.ll
|
||||||
|
@@ -133,6 +133,7 @@ declare i32 @printf(i8* nocapture, ...) nounwind
|
||||||
|
; CHECK-LABEL: @foo(
|
||||||
|
; CHECK: for.body.preheader:
|
||||||
|
; CHECK-NOT: udiv
|
||||||
|
+; CHECK: for.body:
|
||||||
|
|
||||||
|
define void @foo(double* %p, i64 %n) nounwind {
|
||||||
|
entry:
|
||||||
|
--
|
||||||
|
2.18.0
|
||||||
|
|
|
@ -1,14 +1,11 @@
|
||||||
FILESEXTRAPATHS_prepend_intel-x86-common := "${THISDIR}/files:"
|
FILESEXTRAPATHS_prepend_intel-x86-common := "${THISDIR}/files:"
|
||||||
|
|
||||||
SRC_URI_append_intel-x86-common = " \
|
SRC_URI_append_intel-x86-common = " \
|
||||||
file://0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch;patchdir=clang \
|
|
||||||
file://0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch;patchdir=clang \
|
|
||||||
file://0003-OpenCL-Fix-assertion-due-to-blocks.patch;patchdir=clang \
|
|
||||||
file://0001-dont-export-targets-for-binaries.patch \
|
file://0001-dont-export-targets-for-binaries.patch \
|
||||||
git://github.com/KhronosGroup/SPIRV-LLVM-Translator.git;protocol=https;branch=llvm_release_80;destsuffix=git/llvm/projects/llvm-spirv;name=spirv \
|
file://BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch;patchdir=llvm \
|
||||||
file://0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch;patchdir=llvm/projects/llvm-spirv \
|
file://IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch;patchdir=llvm \
|
||||||
file://0002-Remove-extra-semicolon.patch;patchdir=llvm/projects/llvm-spirv \
|
git://github.com/KhronosGroup/SPIRV-LLVM-Translator.git;protocol=https;branch=llvm_release_90;destsuffix=git/llvm/projects/llvm-spirv;name=spirv \
|
||||||
file://0001-skip-building-tests.patch;patchdir=llvm/projects/llvm-spirv \
|
file://0001-skip-building-tests.patch;patchdir=llvm/projects/llvm-spirv \
|
||||||
"
|
"
|
||||||
|
|
||||||
SRCREV_spirv = "1d48cd84d04a2f60b43ea3f66eb7c86f4e5973a9"
|
SRCREV_spirv = "70420631144a6a25613ae37178f2cc1d3607b65b"
|
||||||
|
|
Loading…
Reference in New Issue
Block a user