llvm-project-source: backport OpenCL recommended patches

Updating SPIRV-LLVM-Translator srcrev to latest commits for
* llvm_releae_100
* llvm_release_110

Backport opencl-clang recommended llvm/clang patches.

llvm-10:
https://github.com/intel/opencl-clang/tree/ocl-open-100/patches

llvm-11:
https://github.com/intel/opencl-clang/tree/ocl-open-110/patches

Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
This commit is contained in:
Naveen Saini 2021-04-08 11:02:31 +08:00 committed by Anuj Mittal
parent 01cfc99a8f
commit bdae07eceb
8 changed files with 17788 additions and 2 deletions

View File

@ -0,0 +1,119 @@
From 200c200eb19602ffd7c8f29d0b2df9df1fd311bf Mon Sep 17 00:00:00 2001
From: Naveen Saini <naveen.kumar.saini@intel.com>
Date: Wed, 7 Apr 2021 17:44:20 +0800
Subject: [PATCH] Fix debug info of work-item builtin translation (#745)
debug info of work-item builtins are lost in both llvm IR -> spirv and
spirv -> llvm IR translations. See #744
Upstream-Status: Backport [https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/c734c5c8bbd3012a09c610e4be68e90cc603c580]
Signed-off-by: Wenju He <wenju.he@intel.com>
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
lib/SPIRV/OCL20ToSPIRV.cpp | 5 ++-
lib/SPIRV/SPIRVReader.cpp | 1 +
test/DebugInfo/builtin-get-global-id.ll | 60 +++++++++++++++++++++++++
3 files changed, 65 insertions(+), 1 deletion(-)
create mode 100644 test/DebugInfo/builtin-get-global-id.ll
diff --git a/lib/SPIRV/OCL20ToSPIRV.cpp b/lib/SPIRV/OCL20ToSPIRV.cpp
index 1262c48c..a742c8cf 100644
--- a/lib/SPIRV/OCL20ToSPIRV.cpp
+++ b/lib/SPIRV/OCL20ToSPIRV.cpp
@@ -1297,11 +1297,14 @@ void OCL20ToSPIRV::transWorkItemBuiltinsToVariables() {
for (auto UI = I.user_begin(), UE = I.user_end(); UI != UE; ++UI) {
auto CI = dyn_cast<CallInst>(*UI);
assert(CI && "invalid instruction");
- Value *NewValue = new LoadInst(BV, "", CI);
+ const DebugLoc &DLoc = CI->getDebugLoc();
+ Instruction *NewValue = new LoadInst(BV, "", CI);
+ NewValue->setDebugLoc(DLoc);
LLVM_DEBUG(dbgs() << "Transform: " << *CI << " => " << *NewValue << '\n');
if (IsVec) {
NewValue =
ExtractElementInst::Create(NewValue, CI->getArgOperand(0), "", CI);
+ NewValue->setDebugLoc(DLoc);
LLVM_DEBUG(dbgs() << *NewValue << '\n');
}
NewValue->takeName(CI);
diff --git a/lib/SPIRV/SPIRVReader.cpp b/lib/SPIRV/SPIRVReader.cpp
index 16a3dd38..528f6663 100644
--- a/lib/SPIRV/SPIRVReader.cpp
+++ b/lib/SPIRV/SPIRVReader.cpp
@@ -307,6 +307,7 @@ bool SPIRVToLLVM::transOCLBuiltinFromVariable(GlobalVariable *GV,
auto Replace = [&](std::vector<Value *> Arg, Instruction *I) {
auto Call = CallInst::Create(Func, Arg, "", I);
Call->takeName(I);
+ Call->setDebugLoc(I->getDebugLoc());
setAttrByCalledFunc(Call);
SPIRVDBG(dbgs() << "[transOCLBuiltinFromVariable] " << *I << " -> " << *Call
<< '\n';)
diff --git a/test/DebugInfo/builtin-get-global-id.ll b/test/DebugInfo/builtin-get-global-id.ll
new file mode 100644
index 00000000..a4a00e63
--- /dev/null
+++ b/test/DebugInfo/builtin-get-global-id.ll
@@ -0,0 +1,60 @@
+; Check debug info of builtin get_global_id is preserved from LLVM IR to spirv
+; and spirv to LLVM IR translation.
+
+; Original .cl source:
+; kernel void test() {
+; size_t gid = get_global_id(0);
+; }
+
+; Command line:
+; ./clang -cc1 1.cl -triple spir64 -cl-std=cl2.0 -emit-llvm -finclude-default-header -debug-info-kind=line-tables-only -O0
+
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s --check-prefix CHECK-SPIRV
+; RUN: llvm-spirv %t.bc -o %t.spv
+; RUN: llvm-spirv -r %t.spv -o - | llvm-dis -o - | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+target triple = "spir64"
+
+; CHECK-SPIRV: ExtInst {{.*}} DebugScope
+; CHECK-SPIRV-NEXT: Line {{[0-9]+}} 2 16
+; CHECK-SPIRV-NEXT: Load {{[0-9]+}} [[LoadRes:[0-9]+]]
+; CHECK-SPIRV-NEXT: CompositeExtract {{[0-9]+}} {{[0-9]+}} [[LoadRes]] 0
+
+; Function Attrs: convergent noinline norecurse nounwind optnone
+define spir_kernel void @test() #0 !dbg !7 !kernel_arg_addr_space !2 !kernel_arg_access_qual !2 !kernel_arg_type !2 !kernel_arg_base_type !2 !kernel_arg_type_qual !2 {
+entry:
+ %gid = alloca i64, align 8
+ %call = call spir_func i64 @_Z13get_global_idj(i32 0) #2, !dbg !10
+; CHECK: %call = call spir_func i64 @_Z13get_global_idj(i32 0) #1, !dbg [[DBG:![0-9]+]]
+ store i64 %call, i64* %gid, align 8, !dbg !11
+ ret void, !dbg !12
+}
+
+; Function Attrs: convergent nounwind readnone
+declare spir_func i64 @_Z13get_global_idj(i32) #1
+
+attributes #0 = { convergent noinline norecurse nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { convergent nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { convergent nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!opencl.ocl.version = !{!5}
+!opencl.spir.version = !{!5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0 (https://github.com/llvm/llvm-project.git b5bc56da8aa23dc57db9d286b0591dbcf9b1bdd3)", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, nameTableKind: None)
+!1 = !DIFile(filename: "<stdin>", directory: "")
+!2 = !{}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 2, i32 0}
+!6 = !{!"clang version 12.0.0 (https://github.com/llvm/llvm-project.git b5bc56da8aa23dc57db9d286b0591dbcf9b1bdd3)"}
+!7 = distinct !DISubprogram(name: "test", scope: !8, file: !8, line: 1, type: !9, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!8 = !DIFile(filename: "1.cl", directory: "")
+!9 = !DISubroutineType(types: !2)
+!10 = !DILocation(line: 2, column: 16, scope: !7)
+!11 = !DILocation(line: 2, column: 10, scope: !7)
+!12 = !DILocation(line: 3, column: 1, scope: !7)
--
2.17.1

View File

@ -0,0 +1,35 @@
From c86c43b70e029b102543e8a85d269cbeb5c00279 Mon Sep 17 00:00:00 2001
From: juanrod2 <>
Date: Tue, 22 Dec 2020 08:33:08 +0800
Subject: [PATCH] Memory leak fix for Managed Static Mutex
Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/llvm/0001-Memory-leak-fix-for-Managed-Static-Mutex.patch]
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
Cleaning a mutex inside ManagedStatic llvm class.
---
llvm/lib/Support/ManagedStatic.cpp | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Support/ManagedStatic.cpp b/llvm/lib/Support/ManagedStatic.cpp
index 053493f72fb5..6571580ccecf 100644
--- a/llvm/lib/Support/ManagedStatic.cpp
+++ b/llvm/lib/Support/ManagedStatic.cpp
@@ -76,8 +76,12 @@ void ManagedStaticBase::destroy() const {
/// llvm_shutdown - Deallocate and destroy all ManagedStatic variables.
void llvm::llvm_shutdown() {
- std::lock_guard<std::recursive_mutex> Lock(*getManagedStaticMutex());
+ getManagedStaticMutex()->lock();
while (StaticList)
StaticList->destroy();
+
+ getManagedStaticMutex()->unlock();
+ delete ManagedStaticMutex;
+ ManagedStaticMutex = nullptr;
}
--
2.29.2

View File

@ -0,0 +1,812 @@
From 27d47f1a17c8921b07acc8cdc26e38cc609de4a9 Mon Sep 17 00:00:00 2001
From: Naveen Saini <naveen.kumar.saini@intel.com>
Date: Wed, 7 Apr 2021 16:38:38 +0800
Subject: [PATCH 2/2] Add cl_khr_extended_subgroup extensions.
Added extensions and their function declarations into
the standard header.
Patch by Piotr Fusik!
Tags: #clang
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/4a4402f0d72167477a6252e4c3daf5089ebc8f9a]
Signed-off-by: Anastasia Stulova <anastasia.stulova@arm.com>
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
.../include/clang/Basic/OpenCLExtensions.def | 7 +
clang/lib/Headers/opencl-c.h | 668 ++++++++++++++++++
clang/test/SemaOpenCL/extension-version.cl | 83 +++
3 files changed, 758 insertions(+)
diff --git a/clang/include/clang/Basic/OpenCLExtensions.def b/clang/include/clang/Basic/OpenCLExtensions.def
index 608f78a13eef..d1574164f9b2 100644
--- a/clang/include/clang/Basic/OpenCLExtensions.def
+++ b/clang/include/clang/Basic/OpenCLExtensions.def
@@ -74,6 +74,13 @@ OPENCLEXT_INTERNAL(cl_khr_mipmap_image_writes, 200, ~0U)
OPENCLEXT_INTERNAL(cl_khr_srgb_image_writes, 200, ~0U)
OPENCLEXT_INTERNAL(cl_khr_subgroups, 200, ~0U)
OPENCLEXT_INTERNAL(cl_khr_terminate_context, 200, ~0U)
+OPENCLEXT_INTERNAL(cl_khr_subgroup_extended_types, 200, ~0U)
+OPENCLEXT_INTERNAL(cl_khr_subgroup_non_uniform_vote, 200, ~0U)
+OPENCLEXT_INTERNAL(cl_khr_subgroup_ballot, 200, ~0U)
+OPENCLEXT_INTERNAL(cl_khr_subgroup_non_uniform_arithmetic, 200, ~0U)
+OPENCLEXT_INTERNAL(cl_khr_subgroup_shuffle, 200, ~0U)
+OPENCLEXT_INTERNAL(cl_khr_subgroup_shuffle_relative, 200, ~0U)
+OPENCLEXT_INTERNAL(cl_khr_subgroup_clustered_reduce, 200, ~0U)
// Clang Extensions.
OPENCLEXT_INTERNAL(cl_clang_storage_class_specifiers, 100, ~0U)
diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index 93a946cec5b1..67d900eb1c3d 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -17530,6 +17530,674 @@ double __ovld __conv sub_group_scan_inclusive_max(double x);
#endif //cl_khr_subgroups cl_intel_subgroups
+#if defined(cl_khr_subgroup_extended_types)
+char __ovld __conv sub_group_broadcast( char value, uint index );
+char2 __ovld __conv sub_group_broadcast( char2 value, uint index );
+char3 __ovld __conv sub_group_broadcast( char3 value, uint index );
+char4 __ovld __conv sub_group_broadcast( char4 value, uint index );
+char8 __ovld __conv sub_group_broadcast( char8 value, uint index );
+char16 __ovld __conv sub_group_broadcast( char16 value, uint index );
+
+uchar __ovld __conv sub_group_broadcast( uchar value, uint index );
+uchar2 __ovld __conv sub_group_broadcast( uchar2 value, uint index );
+uchar3 __ovld __conv sub_group_broadcast( uchar3 value, uint index );
+uchar4 __ovld __conv sub_group_broadcast( uchar4 value, uint index );
+uchar8 __ovld __conv sub_group_broadcast( uchar8 value, uint index );
+uchar16 __ovld __conv sub_group_broadcast( uchar16 value, uint index );
+
+short __ovld __conv sub_group_broadcast( short value, uint index );
+short2 __ovld __conv sub_group_broadcast( short2 value, uint index );
+short3 __ovld __conv sub_group_broadcast( short3 value, uint index );
+short4 __ovld __conv sub_group_broadcast( short4 value, uint index );
+short8 __ovld __conv sub_group_broadcast( short8 value, uint index );
+short16 __ovld __conv sub_group_broadcast( short16 value, uint index );
+
+ushort __ovld __conv sub_group_broadcast( ushort value, uint index );
+ushort2 __ovld __conv sub_group_broadcast( ushort2 value, uint index );
+ushort3 __ovld __conv sub_group_broadcast( ushort3 value, uint index );
+ushort4 __ovld __conv sub_group_broadcast( ushort4 value, uint index );
+ushort8 __ovld __conv sub_group_broadcast( ushort8 value, uint index );
+ushort16 __ovld __conv sub_group_broadcast( ushort16 value, uint index );
+
+// scalar int broadcast is part of cl_khr_subgroups
+int2 __ovld __conv sub_group_broadcast( int2 value, uint index );
+int3 __ovld __conv sub_group_broadcast( int3 value, uint index );
+int4 __ovld __conv sub_group_broadcast( int4 value, uint index );
+int8 __ovld __conv sub_group_broadcast( int8 value, uint index );
+int16 __ovld __conv sub_group_broadcast( int16 value, uint index );
+
+// scalar uint broadcast is part of cl_khr_subgroups
+uint2 __ovld __conv sub_group_broadcast( uint2 value, uint index );
+uint3 __ovld __conv sub_group_broadcast( uint3 value, uint index );
+uint4 __ovld __conv sub_group_broadcast( uint4 value, uint index );
+uint8 __ovld __conv sub_group_broadcast( uint8 value, uint index );
+uint16 __ovld __conv sub_group_broadcast( uint16 value, uint index );
+
+// scalar long broadcast is part of cl_khr_subgroups
+long2 __ovld __conv sub_group_broadcast( long2 value, uint index );
+long3 __ovld __conv sub_group_broadcast( long3 value, uint index );
+long4 __ovld __conv sub_group_broadcast( long4 value, uint index );
+long8 __ovld __conv sub_group_broadcast( long8 value, uint index );
+long16 __ovld __conv sub_group_broadcast( long16 value, uint index );
+
+// scalar ulong broadcast is part of cl_khr_subgroups
+ulong2 __ovld __conv sub_group_broadcast( ulong2 value, uint index );
+ulong3 __ovld __conv sub_group_broadcast( ulong3 value, uint index );
+ulong4 __ovld __conv sub_group_broadcast( ulong4 value, uint index );
+ulong8 __ovld __conv sub_group_broadcast( ulong8 value, uint index );
+ulong16 __ovld __conv sub_group_broadcast( ulong16 value, uint index );
+
+// scalar float broadcast is part of cl_khr_subgroups
+float2 __ovld __conv sub_group_broadcast( float2 value, uint index );
+float3 __ovld __conv sub_group_broadcast( float3 value, uint index );
+float4 __ovld __conv sub_group_broadcast( float4 value, uint index );
+float8 __ovld __conv sub_group_broadcast( float8 value, uint index );
+float16 __ovld __conv sub_group_broadcast( float16 value, uint index );
+
+char __ovld __conv sub_group_reduce_add( char value );
+uchar __ovld __conv sub_group_reduce_add( uchar value );
+short __ovld __conv sub_group_reduce_add( short value );
+ushort __ovld __conv sub_group_reduce_add( ushort value );
+
+char __ovld __conv sub_group_reduce_min( char value );
+uchar __ovld __conv sub_group_reduce_min( uchar value );
+short __ovld __conv sub_group_reduce_min( short value );
+ushort __ovld __conv sub_group_reduce_min( ushort value );
+
+char __ovld __conv sub_group_reduce_max( char value );
+uchar __ovld __conv sub_group_reduce_max( uchar value );
+short __ovld __conv sub_group_reduce_max( short value );
+ushort __ovld __conv sub_group_reduce_max( ushort value );
+
+char __ovld __conv sub_group_scan_inclusive_add( char value );
+uchar __ovld __conv sub_group_scan_inclusive_add( uchar value );
+short __ovld __conv sub_group_scan_inclusive_add( short value );
+ushort __ovld __conv sub_group_scan_inclusive_add( ushort value );
+
+char __ovld __conv sub_group_scan_inclusive_min( char value );
+uchar __ovld __conv sub_group_scan_inclusive_min( uchar value );
+short __ovld __conv sub_group_scan_inclusive_min( short value );
+ushort __ovld __conv sub_group_scan_inclusive_min( ushort value );
+
+char __ovld __conv sub_group_scan_inclusive_max( char value );
+uchar __ovld __conv sub_group_scan_inclusive_max( uchar value );
+short __ovld __conv sub_group_scan_inclusive_max( short value );
+ushort __ovld __conv sub_group_scan_inclusive_max( ushort value );
+
+char __ovld __conv sub_group_scan_exclusive_add( char value );
+uchar __ovld __conv sub_group_scan_exclusive_add( uchar value );
+short __ovld __conv sub_group_scan_exclusive_add( short value );
+ushort __ovld __conv sub_group_scan_exclusive_add( ushort value );
+
+char __ovld __conv sub_group_scan_exclusive_min( char value );
+uchar __ovld __conv sub_group_scan_exclusive_min( uchar value );
+short __ovld __conv sub_group_scan_exclusive_min( short value );
+ushort __ovld __conv sub_group_scan_exclusive_min( ushort value );
+
+char __ovld __conv sub_group_scan_exclusive_max( char value );
+uchar __ovld __conv sub_group_scan_exclusive_max( uchar value );
+short __ovld __conv sub_group_scan_exclusive_max( short value );
+ushort __ovld __conv sub_group_scan_exclusive_max( ushort value );
+
+#if defined(cl_khr_fp16)
+// scalar half broadcast is part of cl_khr_subgroups
+half2 __ovld __conv sub_group_broadcast( half2 value, uint index );
+half3 __ovld __conv sub_group_broadcast( half3 value, uint index );
+half4 __ovld __conv sub_group_broadcast( half4 value, uint index );
+half8 __ovld __conv sub_group_broadcast( half8 value, uint index );
+half16 __ovld __conv sub_group_broadcast( half16 value, uint index );
+#endif // cl_khr_fp16
+
+#if defined(cl_khr_fp64)
+// scalar double broadcast is part of cl_khr_subgroups
+double2 __ovld __conv sub_group_broadcast( double2 value, uint index );
+double3 __ovld __conv sub_group_broadcast( double3 value, uint index );
+double4 __ovld __conv sub_group_broadcast( double4 value, uint index );
+double8 __ovld __conv sub_group_broadcast( double8 value, uint index );
+double16 __ovld __conv sub_group_broadcast( double16 value, uint index );
+#endif // cl_khr_fp64
+
+#endif // cl_khr_subgroup_extended_types
+
+#if defined(cl_khr_subgroup_non_uniform_vote)
+int __ovld sub_group_elect(void);
+int __ovld sub_group_non_uniform_all( int predicate );
+int __ovld sub_group_non_uniform_any( int predicate );
+
+int __ovld sub_group_non_uniform_all_equal( char value );
+int __ovld sub_group_non_uniform_all_equal( uchar value );
+int __ovld sub_group_non_uniform_all_equal( short value );
+int __ovld sub_group_non_uniform_all_equal( ushort value );
+int __ovld sub_group_non_uniform_all_equal( int value );
+int __ovld sub_group_non_uniform_all_equal( uint value );
+int __ovld sub_group_non_uniform_all_equal( long value );
+int __ovld sub_group_non_uniform_all_equal( ulong value );
+int __ovld sub_group_non_uniform_all_equal( float value );
+
+#if defined(cl_khr_fp16)
+int __ovld sub_group_non_uniform_all_equal( half value );
+#endif // cl_khr_fp16
+
+#if defined(cl_khr_fp64)
+int __ovld sub_group_non_uniform_all_equal( double value );
+#endif // cl_khr_fp64
+
+#endif // cl_khr_subgroup_non_uniform_vote
+
+#if defined(cl_khr_subgroup_ballot)
+char __ovld sub_group_non_uniform_broadcast( char value, uint index );
+char2 __ovld sub_group_non_uniform_broadcast( char2 value, uint index );
+char3 __ovld sub_group_non_uniform_broadcast( char3 value, uint index );
+char4 __ovld sub_group_non_uniform_broadcast( char4 value, uint index );
+char8 __ovld sub_group_non_uniform_broadcast( char8 value, uint index );
+char16 __ovld sub_group_non_uniform_broadcast( char16 value, uint index );
+
+uchar __ovld sub_group_non_uniform_broadcast( uchar value, uint index );
+uchar2 __ovld sub_group_non_uniform_broadcast( uchar2 value, uint index );
+uchar3 __ovld sub_group_non_uniform_broadcast( uchar3 value, uint index );
+uchar4 __ovld sub_group_non_uniform_broadcast( uchar4 value, uint index );
+uchar8 __ovld sub_group_non_uniform_broadcast( uchar8 value, uint index );
+uchar16 __ovld sub_group_non_uniform_broadcast( uchar16 value, uint index );
+
+short __ovld sub_group_non_uniform_broadcast( short value, uint index );
+short2 __ovld sub_group_non_uniform_broadcast( short2 value, uint index );
+short3 __ovld sub_group_non_uniform_broadcast( short3 value, uint index );
+short4 __ovld sub_group_non_uniform_broadcast( short4 value, uint index );
+short8 __ovld sub_group_non_uniform_broadcast( short8 value, uint index );
+short16 __ovld sub_group_non_uniform_broadcast( short16 value, uint index );
+
+ushort __ovld sub_group_non_uniform_broadcast( ushort value, uint index );
+ushort2 __ovld sub_group_non_uniform_broadcast( ushort2 value, uint index );
+ushort3 __ovld sub_group_non_uniform_broadcast( ushort3 value, uint index );
+ushort4 __ovld sub_group_non_uniform_broadcast( ushort4 value, uint index );
+ushort8 __ovld sub_group_non_uniform_broadcast( ushort8 value, uint index );
+ushort16 __ovld sub_group_non_uniform_broadcast( ushort16 value, uint index );
+
+int __ovld sub_group_non_uniform_broadcast( int value, uint index );
+int2 __ovld sub_group_non_uniform_broadcast( int2 value, uint index );
+int3 __ovld sub_group_non_uniform_broadcast( int3 value, uint index );
+int4 __ovld sub_group_non_uniform_broadcast( int4 value, uint index );
+int8 __ovld sub_group_non_uniform_broadcast( int8 value, uint index );
+int16 __ovld sub_group_non_uniform_broadcast( int16 value, uint index );
+
+uint __ovld sub_group_non_uniform_broadcast( uint value, uint index );
+uint2 __ovld sub_group_non_uniform_broadcast( uint2 value, uint index );
+uint3 __ovld sub_group_non_uniform_broadcast( uint3 value, uint index );
+uint4 __ovld sub_group_non_uniform_broadcast( uint4 value, uint index );
+uint8 __ovld sub_group_non_uniform_broadcast( uint8 value, uint index );
+uint16 __ovld sub_group_non_uniform_broadcast( uint16 value, uint index );
+
+long __ovld sub_group_non_uniform_broadcast( long value, uint index );
+long2 __ovld sub_group_non_uniform_broadcast( long2 value, uint index );
+long3 __ovld sub_group_non_uniform_broadcast( long3 value, uint index );
+long4 __ovld sub_group_non_uniform_broadcast( long4 value, uint index );
+long8 __ovld sub_group_non_uniform_broadcast( long8 value, uint index );
+long16 __ovld sub_group_non_uniform_broadcast( long16 value, uint index );
+
+ulong __ovld sub_group_non_uniform_broadcast( ulong value, uint index );
+ulong2 __ovld sub_group_non_uniform_broadcast( ulong2 value, uint index );
+ulong3 __ovld sub_group_non_uniform_broadcast( ulong3 value, uint index );
+ulong4 __ovld sub_group_non_uniform_broadcast( ulong4 value, uint index );
+ulong8 __ovld sub_group_non_uniform_broadcast( ulong8 value, uint index );
+ulong16 __ovld sub_group_non_uniform_broadcast( ulong16 value, uint index );
+
+float __ovld sub_group_non_uniform_broadcast( float value, uint index );
+float2 __ovld sub_group_non_uniform_broadcast( float2 value, uint index );
+float3 __ovld sub_group_non_uniform_broadcast( float3 value, uint index );
+float4 __ovld sub_group_non_uniform_broadcast( float4 value, uint index );
+float8 __ovld sub_group_non_uniform_broadcast( float8 value, uint index );
+float16 __ovld sub_group_non_uniform_broadcast( float16 value, uint index );
+
+char __ovld sub_group_broadcast_first( char value );
+uchar __ovld sub_group_broadcast_first( uchar value );
+short __ovld sub_group_broadcast_first( short value );
+ushort __ovld sub_group_broadcast_first( ushort value );
+int __ovld sub_group_broadcast_first( int value );
+uint __ovld sub_group_broadcast_first( uint value );
+long __ovld sub_group_broadcast_first( long value );
+ulong __ovld sub_group_broadcast_first( ulong value );
+float __ovld sub_group_broadcast_first( float value );
+
+uint4 __ovld sub_group_ballot( int predicate );
+int __ovld __cnfn sub_group_inverse_ballot( uint4 value );
+int __ovld __cnfn sub_group_ballot_bit_extract( uint4 value, uint index );
+uint __ovld __cnfn sub_group_ballot_bit_count( uint4 value );
+
+uint __ovld sub_group_ballot_inclusive_scan( uint4 value );
+uint __ovld sub_group_ballot_exclusive_scan( uint4 value );
+uint __ovld sub_group_ballot_find_lsb( uint4 value );
+uint __ovld sub_group_ballot_find_msb( uint4 value );
+
+uint4 __ovld __cnfn get_sub_group_eq_mask(void);
+uint4 __ovld __cnfn get_sub_group_ge_mask(void);
+uint4 __ovld __cnfn get_sub_group_gt_mask(void);
+uint4 __ovld __cnfn get_sub_group_le_mask(void);
+uint4 __ovld __cnfn get_sub_group_lt_mask(void);
+
+#if defined(cl_khr_fp16)
+half __ovld sub_group_non_uniform_broadcast( half value, uint index );
+half2 __ovld sub_group_non_uniform_broadcast( half2 value, uint index );
+half3 __ovld sub_group_non_uniform_broadcast( half3 value, uint index );
+half4 __ovld sub_group_non_uniform_broadcast( half4 value, uint index );
+half8 __ovld sub_group_non_uniform_broadcast( half8 value, uint index );
+half16 __ovld sub_group_non_uniform_broadcast( half16 value, uint index );
+
+half __ovld sub_group_broadcast_first( half value );
+#endif // cl_khr_fp16
+
+#if defined(cl_khr_fp64)
+double __ovld sub_group_non_uniform_broadcast( double value, uint index );
+double2 __ovld sub_group_non_uniform_broadcast( double2 value, uint index );
+double3 __ovld sub_group_non_uniform_broadcast( double3 value, uint index );
+double4 __ovld sub_group_non_uniform_broadcast( double4 value, uint index );
+double8 __ovld sub_group_non_uniform_broadcast( double8 value, uint index );
+double16 __ovld sub_group_non_uniform_broadcast( double16 value, uint index );
+
+double __ovld sub_group_broadcast_first( double value );
+#endif // cl_khr_fp64
+
+#endif // cl_khr_subgroup_ballot
+
+#if defined(cl_khr_subgroup_non_uniform_arithmetic)
+char __ovld sub_group_non_uniform_reduce_add( char value );
+uchar __ovld sub_group_non_uniform_reduce_add( uchar value );
+short __ovld sub_group_non_uniform_reduce_add( short value );
+ushort __ovld sub_group_non_uniform_reduce_add( ushort value );
+int __ovld sub_group_non_uniform_reduce_add( int value );
+uint __ovld sub_group_non_uniform_reduce_add( uint value );
+long __ovld sub_group_non_uniform_reduce_add( long value );
+ulong __ovld sub_group_non_uniform_reduce_add( ulong value );
+float __ovld sub_group_non_uniform_reduce_add( float value );
+
+char __ovld sub_group_non_uniform_reduce_mul( char value );
+uchar __ovld sub_group_non_uniform_reduce_mul( uchar value );
+short __ovld sub_group_non_uniform_reduce_mul( short value );
+ushort __ovld sub_group_non_uniform_reduce_mul( ushort value );
+int __ovld sub_group_non_uniform_reduce_mul( int value );
+uint __ovld sub_group_non_uniform_reduce_mul( uint value );
+long __ovld sub_group_non_uniform_reduce_mul( long value );
+ulong __ovld sub_group_non_uniform_reduce_mul( ulong value );
+float __ovld sub_group_non_uniform_reduce_mul( float value );
+
+char __ovld sub_group_non_uniform_reduce_min( char value );
+uchar __ovld sub_group_non_uniform_reduce_min( uchar value );
+short __ovld sub_group_non_uniform_reduce_min( short value );
+ushort __ovld sub_group_non_uniform_reduce_min( ushort value );
+int __ovld sub_group_non_uniform_reduce_min( int value );
+uint __ovld sub_group_non_uniform_reduce_min( uint value );
+long __ovld sub_group_non_uniform_reduce_min( long value );
+ulong __ovld sub_group_non_uniform_reduce_min( ulong value );
+float __ovld sub_group_non_uniform_reduce_min( float value );
+
+char __ovld sub_group_non_uniform_reduce_max( char value );
+uchar __ovld sub_group_non_uniform_reduce_max( uchar value );
+short __ovld sub_group_non_uniform_reduce_max( short value );
+ushort __ovld sub_group_non_uniform_reduce_max( ushort value );
+int __ovld sub_group_non_uniform_reduce_max( int value );
+uint __ovld sub_group_non_uniform_reduce_max( uint value );
+long __ovld sub_group_non_uniform_reduce_max( long value );
+ulong __ovld sub_group_non_uniform_reduce_max( ulong value );
+float __ovld sub_group_non_uniform_reduce_max( float value );
+
+char __ovld sub_group_non_uniform_scan_inclusive_add( char value );
+uchar __ovld sub_group_non_uniform_scan_inclusive_add( uchar value );
+short __ovld sub_group_non_uniform_scan_inclusive_add( short value );
+ushort __ovld sub_group_non_uniform_scan_inclusive_add( ushort value );
+int __ovld sub_group_non_uniform_scan_inclusive_add( int value );
+uint __ovld sub_group_non_uniform_scan_inclusive_add( uint value );
+long __ovld sub_group_non_uniform_scan_inclusive_add( long value );
+ulong __ovld sub_group_non_uniform_scan_inclusive_add( ulong value );
+float __ovld sub_group_non_uniform_scan_inclusive_add( float value );
+
+char __ovld sub_group_non_uniform_scan_inclusive_mul( char value );
+uchar __ovld sub_group_non_uniform_scan_inclusive_mul( uchar value );
+short __ovld sub_group_non_uniform_scan_inclusive_mul( short value );
+ushort __ovld sub_group_non_uniform_scan_inclusive_mul( ushort value );
+int __ovld sub_group_non_uniform_scan_inclusive_mul( int value );
+uint __ovld sub_group_non_uniform_scan_inclusive_mul( uint value );
+long __ovld sub_group_non_uniform_scan_inclusive_mul( long value );
+ulong __ovld sub_group_non_uniform_scan_inclusive_mul( ulong value );
+float __ovld sub_group_non_uniform_scan_inclusive_mul( float value );
+
+char __ovld sub_group_non_uniform_scan_inclusive_min( char value );
+uchar __ovld sub_group_non_uniform_scan_inclusive_min( uchar value );
+short __ovld sub_group_non_uniform_scan_inclusive_min( short value );
+ushort __ovld sub_group_non_uniform_scan_inclusive_min( ushort value );
+int __ovld sub_group_non_uniform_scan_inclusive_min( int value );
+uint __ovld sub_group_non_uniform_scan_inclusive_min( uint value );
+long __ovld sub_group_non_uniform_scan_inclusive_min( long value );
+ulong __ovld sub_group_non_uniform_scan_inclusive_min( ulong value );
+float __ovld sub_group_non_uniform_scan_inclusive_min( float value );
+
+char __ovld sub_group_non_uniform_scan_inclusive_max( char value );
+uchar __ovld sub_group_non_uniform_scan_inclusive_max( uchar value );
+short __ovld sub_group_non_uniform_scan_inclusive_max( short value );
+ushort __ovld sub_group_non_uniform_scan_inclusive_max( ushort value );
+int __ovld sub_group_non_uniform_scan_inclusive_max( int value );
+uint __ovld sub_group_non_uniform_scan_inclusive_max( uint value );
+long __ovld sub_group_non_uniform_scan_inclusive_max( long value );
+ulong __ovld sub_group_non_uniform_scan_inclusive_max( ulong value );
+float __ovld sub_group_non_uniform_scan_inclusive_max( float value );
+
+char __ovld sub_group_non_uniform_scan_exclusive_add( char value );
+uchar __ovld sub_group_non_uniform_scan_exclusive_add( uchar value );
+short __ovld sub_group_non_uniform_scan_exclusive_add( short value );
+ushort __ovld sub_group_non_uniform_scan_exclusive_add( ushort value );
+int __ovld sub_group_non_uniform_scan_exclusive_add( int value );
+uint __ovld sub_group_non_uniform_scan_exclusive_add( uint value );
+long __ovld sub_group_non_uniform_scan_exclusive_add( long value );
+ulong __ovld sub_group_non_uniform_scan_exclusive_add( ulong value );
+float __ovld sub_group_non_uniform_scan_exclusive_add( float value );
+
+char __ovld sub_group_non_uniform_scan_exclusive_mul( char value );
+uchar __ovld sub_group_non_uniform_scan_exclusive_mul( uchar value );
+short __ovld sub_group_non_uniform_scan_exclusive_mul( short value );
+ushort __ovld sub_group_non_uniform_scan_exclusive_mul( ushort value );
+int __ovld sub_group_non_uniform_scan_exclusive_mul( int value );
+uint __ovld sub_group_non_uniform_scan_exclusive_mul( uint value );
+long __ovld sub_group_non_uniform_scan_exclusive_mul( long value );
+ulong __ovld sub_group_non_uniform_scan_exclusive_mul( ulong value );
+float __ovld sub_group_non_uniform_scan_exclusive_mul( float value );
+
+char __ovld sub_group_non_uniform_scan_exclusive_min( char value );
+uchar __ovld sub_group_non_uniform_scan_exclusive_min( uchar value );
+short __ovld sub_group_non_uniform_scan_exclusive_min( short value );
+ushort __ovld sub_group_non_uniform_scan_exclusive_min( ushort value );
+int __ovld sub_group_non_uniform_scan_exclusive_min( int value );
+uint __ovld sub_group_non_uniform_scan_exclusive_min( uint value );
+long __ovld sub_group_non_uniform_scan_exclusive_min( long value );
+ulong __ovld sub_group_non_uniform_scan_exclusive_min( ulong value );
+float __ovld sub_group_non_uniform_scan_exclusive_min( float value );
+
+char __ovld sub_group_non_uniform_scan_exclusive_max( char value );
+uchar __ovld sub_group_non_uniform_scan_exclusive_max( uchar value );
+short __ovld sub_group_non_uniform_scan_exclusive_max( short value );
+ushort __ovld sub_group_non_uniform_scan_exclusive_max( ushort value );
+int __ovld sub_group_non_uniform_scan_exclusive_max( int value );
+uint __ovld sub_group_non_uniform_scan_exclusive_max( uint value );
+long __ovld sub_group_non_uniform_scan_exclusive_max( long value );
+ulong __ovld sub_group_non_uniform_scan_exclusive_max( ulong value );
+float __ovld sub_group_non_uniform_scan_exclusive_max( float value );
+
+char __ovld sub_group_non_uniform_reduce_and( char value );
+uchar __ovld sub_group_non_uniform_reduce_and( uchar value );
+short __ovld sub_group_non_uniform_reduce_and( short value );
+ushort __ovld sub_group_non_uniform_reduce_and( ushort value );
+int __ovld sub_group_non_uniform_reduce_and( int value );
+uint __ovld sub_group_non_uniform_reduce_and( uint value );
+long __ovld sub_group_non_uniform_reduce_and( long value );
+ulong __ovld sub_group_non_uniform_reduce_and( ulong value );
+
+char __ovld sub_group_non_uniform_reduce_or( char value );
+uchar __ovld sub_group_non_uniform_reduce_or( uchar value );
+short __ovld sub_group_non_uniform_reduce_or( short value );
+ushort __ovld sub_group_non_uniform_reduce_or( ushort value );
+int __ovld sub_group_non_uniform_reduce_or( int value );
+uint __ovld sub_group_non_uniform_reduce_or( uint value );
+long __ovld sub_group_non_uniform_reduce_or( long value );
+ulong __ovld sub_group_non_uniform_reduce_or( ulong value );
+
+char __ovld sub_group_non_uniform_reduce_xor( char value );
+uchar __ovld sub_group_non_uniform_reduce_xor( uchar value );
+short __ovld sub_group_non_uniform_reduce_xor( short value );
+ushort __ovld sub_group_non_uniform_reduce_xor( ushort value );
+int __ovld sub_group_non_uniform_reduce_xor( int value );
+uint __ovld sub_group_non_uniform_reduce_xor( uint value );
+long __ovld sub_group_non_uniform_reduce_xor( long value );
+ulong __ovld sub_group_non_uniform_reduce_xor( ulong value );
+
+char __ovld sub_group_non_uniform_scan_inclusive_and( char value );
+uchar __ovld sub_group_non_uniform_scan_inclusive_and( uchar value );
+short __ovld sub_group_non_uniform_scan_inclusive_and( short value );
+ushort __ovld sub_group_non_uniform_scan_inclusive_and( ushort value );
+int __ovld sub_group_non_uniform_scan_inclusive_and( int value );
+uint __ovld sub_group_non_uniform_scan_inclusive_and( uint value );
+long __ovld sub_group_non_uniform_scan_inclusive_and( long value );
+ulong __ovld sub_group_non_uniform_scan_inclusive_and( ulong value );
+
+char __ovld sub_group_non_uniform_scan_inclusive_or( char value );
+uchar __ovld sub_group_non_uniform_scan_inclusive_or( uchar value );
+short __ovld sub_group_non_uniform_scan_inclusive_or( short value );
+ushort __ovld sub_group_non_uniform_scan_inclusive_or( ushort value );
+int __ovld sub_group_non_uniform_scan_inclusive_or( int value );
+uint __ovld sub_group_non_uniform_scan_inclusive_or( uint value );
+long __ovld sub_group_non_uniform_scan_inclusive_or( long value );
+ulong __ovld sub_group_non_uniform_scan_inclusive_or( ulong value );
+
+char __ovld sub_group_non_uniform_scan_inclusive_xor( char value );
+uchar __ovld sub_group_non_uniform_scan_inclusive_xor( uchar value );
+short __ovld sub_group_non_uniform_scan_inclusive_xor( short value );
+ushort __ovld sub_group_non_uniform_scan_inclusive_xor( ushort value );
+int __ovld sub_group_non_uniform_scan_inclusive_xor( int value );
+uint __ovld sub_group_non_uniform_scan_inclusive_xor( uint value );
+long __ovld sub_group_non_uniform_scan_inclusive_xor( long value );
+ulong __ovld sub_group_non_uniform_scan_inclusive_xor( ulong value );
+
+char __ovld sub_group_non_uniform_scan_exclusive_and( char value );
+uchar __ovld sub_group_non_uniform_scan_exclusive_and( uchar value );
+short __ovld sub_group_non_uniform_scan_exclusive_and( short value );
+ushort __ovld sub_group_non_uniform_scan_exclusive_and( ushort value );
+int __ovld sub_group_non_uniform_scan_exclusive_and( int value );
+uint __ovld sub_group_non_uniform_scan_exclusive_and( uint value );
+long __ovld sub_group_non_uniform_scan_exclusive_and( long value );
+ulong __ovld sub_group_non_uniform_scan_exclusive_and( ulong value );
+
+char __ovld sub_group_non_uniform_scan_exclusive_or( char value );
+uchar __ovld sub_group_non_uniform_scan_exclusive_or( uchar value );
+short __ovld sub_group_non_uniform_scan_exclusive_or( short value );
+ushort __ovld sub_group_non_uniform_scan_exclusive_or( ushort value );
+int __ovld sub_group_non_uniform_scan_exclusive_or( int value );
+uint __ovld sub_group_non_uniform_scan_exclusive_or( uint value );
+long __ovld sub_group_non_uniform_scan_exclusive_or( long value );
+ulong __ovld sub_group_non_uniform_scan_exclusive_or( ulong value );
+
+char __ovld sub_group_non_uniform_scan_exclusive_xor( char value );
+uchar __ovld sub_group_non_uniform_scan_exclusive_xor( uchar value );
+short __ovld sub_group_non_uniform_scan_exclusive_xor( short value );
+ushort __ovld sub_group_non_uniform_scan_exclusive_xor( ushort value );
+int __ovld sub_group_non_uniform_scan_exclusive_xor( int value );
+uint __ovld sub_group_non_uniform_scan_exclusive_xor( uint value );
+long __ovld sub_group_non_uniform_scan_exclusive_xor( long value );
+ulong __ovld sub_group_non_uniform_scan_exclusive_xor( ulong value );
+
+int __ovld sub_group_non_uniform_reduce_logical_and( int predicate );
+int __ovld sub_group_non_uniform_reduce_logical_or( int predicate );
+int __ovld sub_group_non_uniform_reduce_logical_xor( int predicate );
+
+int __ovld sub_group_non_uniform_scan_inclusive_logical_and( int predicate );
+int __ovld sub_group_non_uniform_scan_inclusive_logical_or( int predicate );
+int __ovld sub_group_non_uniform_scan_inclusive_logical_xor( int predicate );
+
+int __ovld sub_group_non_uniform_scan_exclusive_logical_and( int predicate );
+int __ovld sub_group_non_uniform_scan_exclusive_logical_or( int predicate );
+int __ovld sub_group_non_uniform_scan_exclusive_logical_xor( int predicate );
+
+#if defined(cl_khr_fp16)
+half __ovld sub_group_non_uniform_reduce_add( half value );
+half __ovld sub_group_non_uniform_reduce_mul( half value );
+half __ovld sub_group_non_uniform_reduce_min( half value );
+half __ovld sub_group_non_uniform_reduce_max( half value );
+half __ovld sub_group_non_uniform_scan_inclusive_add( half value );
+half __ovld sub_group_non_uniform_scan_inclusive_mul( half value );
+half __ovld sub_group_non_uniform_scan_inclusive_min( half value );
+half __ovld sub_group_non_uniform_scan_inclusive_max( half value );
+half __ovld sub_group_non_uniform_scan_exclusive_add( half value );
+half __ovld sub_group_non_uniform_scan_exclusive_mul( half value );
+half __ovld sub_group_non_uniform_scan_exclusive_min( half value );
+half __ovld sub_group_non_uniform_scan_exclusive_max( half value );
+#endif // cl_khr_fp16
+
+#if defined(cl_khr_fp64)
+double __ovld sub_group_non_uniform_reduce_add( double value );
+double __ovld sub_group_non_uniform_reduce_mul( double value );
+double __ovld sub_group_non_uniform_reduce_min( double value );
+double __ovld sub_group_non_uniform_reduce_max( double value );
+double __ovld sub_group_non_uniform_scan_inclusive_add( double value );
+double __ovld sub_group_non_uniform_scan_inclusive_mul( double value );
+double __ovld sub_group_non_uniform_scan_inclusive_min( double value );
+double __ovld sub_group_non_uniform_scan_inclusive_max( double value );
+double __ovld sub_group_non_uniform_scan_exclusive_add( double value );
+double __ovld sub_group_non_uniform_scan_exclusive_mul( double value );
+double __ovld sub_group_non_uniform_scan_exclusive_min( double value );
+double __ovld sub_group_non_uniform_scan_exclusive_max( double value );
+#endif // cl_khr_fp64
+
+#endif // cl_khr_subgroup_non_uniform_arithmetic
+
+#if defined(cl_khr_subgroup_shuffle)
+char __ovld sub_group_shuffle( char value, uint index );
+uchar __ovld sub_group_shuffle( uchar value, uint index );
+short __ovld sub_group_shuffle( short value, uint index );
+ushort __ovld sub_group_shuffle( ushort value, uint index );
+int __ovld sub_group_shuffle( int value, uint index );
+uint __ovld sub_group_shuffle( uint value, uint index );
+long __ovld sub_group_shuffle( long value, uint index );
+ulong __ovld sub_group_shuffle( ulong value, uint index );
+float __ovld sub_group_shuffle( float value, uint index );
+
+char __ovld sub_group_shuffle_xor( char value, uint mask );
+uchar __ovld sub_group_shuffle_xor( uchar value, uint mask );
+short __ovld sub_group_shuffle_xor( short value, uint mask );
+ushort __ovld sub_group_shuffle_xor( ushort value, uint mask );
+int __ovld sub_group_shuffle_xor( int value, uint mask );
+uint __ovld sub_group_shuffle_xor( uint value, uint mask );
+long __ovld sub_group_shuffle_xor( long value, uint mask );
+ulong __ovld sub_group_shuffle_xor( ulong value, uint mask );
+float __ovld sub_group_shuffle_xor( float value, uint mask );
+
+#if defined(cl_khr_fp16)
+half __ovld sub_group_shuffle( half value, uint index );
+half __ovld sub_group_shuffle_xor( half value, uint mask );
+#endif // cl_khr_fp16
+
+#if defined(cl_khr_fp64)
+double __ovld sub_group_shuffle( double value, uint index );
+double __ovld sub_group_shuffle_xor( double value, uint mask );
+#endif // cl_khr_fp64
+
+#endif // cl_khr_subgroup_shuffle
+
+#if defined(cl_khr_subgroup_shuffle_relative)
+char __ovld sub_group_shuffle_up( char value, uint delta );
+uchar __ovld sub_group_shuffle_up( uchar value, uint delta );
+short __ovld sub_group_shuffle_up( short value, uint delta );
+ushort __ovld sub_group_shuffle_up( ushort value, uint delta );
+int __ovld sub_group_shuffle_up( int value, uint delta );
+uint __ovld sub_group_shuffle_up( uint value, uint delta );
+long __ovld sub_group_shuffle_up( long value, uint delta );
+ulong __ovld sub_group_shuffle_up( ulong value, uint delta );
+float __ovld sub_group_shuffle_up( float value, uint delta );
+
+char __ovld sub_group_shuffle_down( char value, uint delta );
+uchar __ovld sub_group_shuffle_down( uchar value, uint delta );
+short __ovld sub_group_shuffle_down( short value, uint delta );
+ushort __ovld sub_group_shuffle_down( ushort value, uint delta );
+int __ovld sub_group_shuffle_down( int value, uint delta );
+uint __ovld sub_group_shuffle_down( uint value, uint delta );
+long __ovld sub_group_shuffle_down( long value, uint delta );
+ulong __ovld sub_group_shuffle_down( ulong value, uint delta );
+float __ovld sub_group_shuffle_down( float value, uint delta );
+
+#if defined(cl_khr_fp16)
+half __ovld sub_group_shuffle_up( half value, uint delta );
+half __ovld sub_group_shuffle_down( half value, uint delta );
+#endif // cl_khr_fp16
+
+#if defined(cl_khr_fp64)
+double __ovld sub_group_shuffle_up( double value, uint delta );
+double __ovld sub_group_shuffle_down( double value, uint delta );
+#endif // cl_khr_fp64
+
+#endif // cl_khr_subgroup_shuffle_relative
+
+#if defined(cl_khr_subgroup_clustered_reduce)
+char __ovld sub_group_clustered_reduce_add( char value, uint clustersize );
+uchar __ovld sub_group_clustered_reduce_add( uchar value, uint clustersize );
+short __ovld sub_group_clustered_reduce_add( short value, uint clustersize );
+ushort __ovld sub_group_clustered_reduce_add( ushort value, uint clustersize );
+int __ovld sub_group_clustered_reduce_add( int value, uint clustersize );
+uint __ovld sub_group_clustered_reduce_add( uint value, uint clustersize );
+long __ovld sub_group_clustered_reduce_add( long value, uint clustersize );
+ulong __ovld sub_group_clustered_reduce_add( ulong value, uint clustersize );
+float __ovld sub_group_clustered_reduce_add( float value, uint clustersize );
+
+char __ovld sub_group_clustered_reduce_mul( char value, uint clustersize );
+uchar __ovld sub_group_clustered_reduce_mul( uchar value, uint clustersize );
+short __ovld sub_group_clustered_reduce_mul( short value, uint clustersize );
+ushort __ovld sub_group_clustered_reduce_mul( ushort value, uint clustersize );
+int __ovld sub_group_clustered_reduce_mul( int value, uint clustersize );
+uint __ovld sub_group_clustered_reduce_mul( uint value, uint clustersize );
+long __ovld sub_group_clustered_reduce_mul( long value, uint clustersize );
+ulong __ovld sub_group_clustered_reduce_mul( ulong value, uint clustersize );
+float __ovld sub_group_clustered_reduce_mul( float value, uint clustersize );
+
+char __ovld sub_group_clustered_reduce_min( char value, uint clustersize );
+uchar __ovld sub_group_clustered_reduce_min( uchar value, uint clustersize );
+short __ovld sub_group_clustered_reduce_min( short value, uint clustersize );
+ushort __ovld sub_group_clustered_reduce_min( ushort value, uint clustersize );
+int __ovld sub_group_clustered_reduce_min( int value, uint clustersize );
+uint __ovld sub_group_clustered_reduce_min( uint value, uint clustersize );
+long __ovld sub_group_clustered_reduce_min( long value, uint clustersize );
+ulong __ovld sub_group_clustered_reduce_min( ulong value, uint clustersize );
+float __ovld sub_group_clustered_reduce_min( float value, uint clustersize );
+
+char __ovld sub_group_clustered_reduce_max( char value, uint clustersize );
+uchar __ovld sub_group_clustered_reduce_max( uchar value, uint clustersize );
+short __ovld sub_group_clustered_reduce_max( short value, uint clustersize );
+ushort __ovld sub_group_clustered_reduce_max( ushort value, uint clustersize );
+int __ovld sub_group_clustered_reduce_max( int value, uint clustersize );
+uint __ovld sub_group_clustered_reduce_max( uint value, uint clustersize );
+long __ovld sub_group_clustered_reduce_max( long value, uint clustersize );
+ulong __ovld sub_group_clustered_reduce_max( ulong value, uint clustersize );
+float __ovld sub_group_clustered_reduce_max( float value, uint clustersize );
+
+char __ovld sub_group_clustered_reduce_and( char value, uint clustersize );
+uchar __ovld sub_group_clustered_reduce_and( uchar value, uint clustersize );
+short __ovld sub_group_clustered_reduce_and( short value, uint clustersize );
+ushort __ovld sub_group_clustered_reduce_and( ushort value, uint clustersize );
+int __ovld sub_group_clustered_reduce_and( int value, uint clustersize );
+uint __ovld sub_group_clustered_reduce_and( uint value, uint clustersize );
+long __ovld sub_group_clustered_reduce_and( long value, uint clustersize );
+ulong __ovld sub_group_clustered_reduce_and( ulong value, uint clustersize );
+
+char __ovld sub_group_clustered_reduce_or( char value, uint clustersize );
+uchar __ovld sub_group_clustered_reduce_or( uchar value, uint clustersize );
+short __ovld sub_group_clustered_reduce_or( short value, uint clustersize );
+ushort __ovld sub_group_clustered_reduce_or( ushort value, uint clustersize );
+int __ovld sub_group_clustered_reduce_or( int value, uint clustersize );
+uint __ovld sub_group_clustered_reduce_or( uint value, uint clustersize );
+long __ovld sub_group_clustered_reduce_or( long value, uint clustersize );
+ulong __ovld sub_group_clustered_reduce_or( ulong value, uint clustersize );
+
+char __ovld sub_group_clustered_reduce_xor( char value, uint clustersize );
+uchar __ovld sub_group_clustered_reduce_xor( uchar value, uint clustersize );
+short __ovld sub_group_clustered_reduce_xor( short value, uint clustersize );
+ushort __ovld sub_group_clustered_reduce_xor( ushort value, uint clustersize );
+int __ovld sub_group_clustered_reduce_xor( int value, uint clustersize );
+uint __ovld sub_group_clustered_reduce_xor( uint value, uint clustersize );
+long __ovld sub_group_clustered_reduce_xor( long value, uint clustersize );
+ulong __ovld sub_group_clustered_reduce_xor( ulong value, uint clustersize );
+
+int __ovld sub_group_clustered_reduce_logical_and( int predicate, uint clustersize );
+int __ovld sub_group_clustered_reduce_logical_or( int predicate, uint clustersize );
+int __ovld sub_group_clustered_reduce_logical_xor( int predicate, uint clustersize );
+
+#if defined(cl_khr_fp16)
+half __ovld sub_group_clustered_reduce_add( half value, uint clustersize );
+half __ovld sub_group_clustered_reduce_mul( half value, uint clustersize );
+half __ovld sub_group_clustered_reduce_min( half value, uint clustersize );
+half __ovld sub_group_clustered_reduce_max( half value, uint clustersize );
+#endif // cl_khr_fp16
+
+#if defined(cl_khr_fp64)
+double __ovld sub_group_clustered_reduce_add( double value, uint clustersize );
+double __ovld sub_group_clustered_reduce_mul( double value, uint clustersize );
+double __ovld sub_group_clustered_reduce_min( double value, uint clustersize );
+double __ovld sub_group_clustered_reduce_max( double value, uint clustersize );
+#endif // cl_khr_fp64
+
+#endif // cl_khr_subgroup_clustered_reduce
+
#if defined(cl_intel_subgroups)
// Intel-Specific Sub Group Functions
float __ovld __conv intel_sub_group_shuffle( float x, uint c );
diff --git a/clang/test/SemaOpenCL/extension-version.cl b/clang/test/SemaOpenCL/extension-version.cl
index 0e6bbb7d3bcd..86c78143a0eb 100644
--- a/clang/test/SemaOpenCL/extension-version.cl
+++ b/clang/test/SemaOpenCL/extension-version.cl
@@ -333,3 +333,86 @@
#endif
#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : enable
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
+#ifndef cl_khr_subgroup_extended_types
+#error "Missing cl_khr_subgroup_extended_types"
+#endif
+#else
+#ifdef cl_khr_subgroup_extended_types
+#error "Incorrect cl_khr_subgroup_extended_types define"
+#endif
+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_extended_types' - ignoring}}
+#endif
+#pragma OPENCL EXTENSION cl_khr_subgroup_extended_types : enable
+
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
+#ifndef cl_khr_subgroup_non_uniform_vote
+#error "Missing cl_khr_subgroup_non_uniform_vote"
+#endif
+#else
+#ifdef cl_khr_subgroup_non_uniform_vote
+#error "Incorrect cl_khr_subgroup_non_uniform_vote define"
+#endif
+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_non_uniform_vote' - ignoring}}
+#endif
+#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_vote : enable
+
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
+#ifndef cl_khr_subgroup_ballot
+#error "Missing cl_khr_subgroup_ballot"
+#endif
+#else
+#ifdef cl_khr_subgroup_ballot
+#error "Incorrect cl_khr_subgroup_ballot define"
+#endif
+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_ballot' - ignoring}}
+#endif
+#pragma OPENCL EXTENSION cl_khr_subgroup_ballot : enable
+
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
+#ifndef cl_khr_subgroup_non_uniform_arithmetic
+#error "Missing cl_khr_subgroup_non_uniform_arithmetic"
+#endif
+#else
+#ifdef cl_khr_subgroup_non_uniform_arithmetic
+#error "Incorrect cl_khr_subgroup_non_uniform_arithmetic define"
+#endif
+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_non_uniform_arithmetic' - ignoring}}
+#endif
+#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_arithmetic : enable
+
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
+#ifndef cl_khr_subgroup_shuffle
+#error "Missing cl_khr_subgroup_shuffle"
+#endif
+#else
+#ifdef cl_khr_subgroup_shuffle
+#error "Incorrect cl_khr_subgroup_shuffle define"
+#endif
+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_shuffle' - ignoring}}
+#endif
+#pragma OPENCL EXTENSION cl_khr_subgroup_shuffle : enable
+
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
+#ifndef cl_khr_subgroup_shuffle_relative
+#error "Missing cl_khr_subgroup_shuffle_relative"
+#endif
+#else
+#ifdef cl_khr_subgroup_shuffle_relative
+#error "Incorrect cl_khr_subgroup_shuffle_relative define"
+#endif
+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_shuffle_relative' - ignoring}}
+#endif
+#pragma OPENCL EXTENSION cl_khr_subgroup_shuffle_relative : enable
+
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
+#ifndef cl_khr_subgroup_clustered_reduce
+#error "Missing cl_khr_subgroup_clustered_reduce"
+#endif
+#else
+#ifdef cl_khr_subgroup_clustered_reduce
+#error "Incorrect cl_khr_subgroup_clustered_reduce define"
+#endif
+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_clustered_reduce' - ignoring}}
+#endif
+#pragma OPENCL EXTENSION cl_khr_subgroup_clustered_reduce : enable
--
2.17.1

View File

@ -0,0 +1,50 @@
From b53fd86ffdeacb9b13624bdb110fd25e8c35cb92 Mon Sep 17 00:00:00 2001
From: Feng Zou <feng.zou@intel.com>
Date: Mon, 19 Oct 2020 14:43:38 +0800
Subject: [PATCH] Remove repo name in LLVM IR
Upstream-Status: Backport [Taken from opencl-clang patches, https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/llvm/0003-Remove-repo-name-in-LLVM-IR.patch]
Signed-off-by: Feng Zou <feng.zou@intel.com>
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
llvm/cmake/modules/VersionFromVCS.cmake | 23 ++++++++++++-----------
1 file changed, 12 insertions(+), 11 deletions(-)
diff --git a/llvm/cmake/modules/VersionFromVCS.cmake b/llvm/cmake/modules/VersionFromVCS.cmake
index 1b6519b4b7c..8fd6b23bb34 100644
--- a/llvm/cmake/modules/VersionFromVCS.cmake
+++ b/llvm/cmake/modules/VersionFromVCS.cmake
@@ -33,17 +33,18 @@ function(get_source_info path revision repository)
else()
set(remote "origin")
endif()
- execute_process(COMMAND ${GIT_EXECUTABLE} remote get-url ${remote}
- WORKING_DIRECTORY ${path}
- RESULT_VARIABLE git_result
- OUTPUT_VARIABLE git_output
- ERROR_QUIET)
- if(git_result EQUAL 0)
- string(STRIP "${git_output}" git_output)
- set(${repository} ${git_output} PARENT_SCOPE)
- else()
- set(${repository} ${path} PARENT_SCOPE)
- endif()
+ # Do not show repo name in IR
+ # execute_process(COMMAND ${GIT_EXECUTABLE} remote get-url ${remote}
+ # WORKING_DIRECTORY ${path}
+ # RESULT_VARIABLE git_result
+ # OUTPUT_VARIABLE git_output
+ # ERROR_QUIET)
+ # if(git_result EQUAL 0)
+ # string(STRIP "${git_output}" git_output)
+ # set(${repository} ${git_output} PARENT_SCOPE)
+ # else()
+ # set(${repository} ${path} PARENT_SCOPE)
+ # endif()
endif()
endif()
endfunction()
--
2.18.1

View File

@ -0,0 +1,50 @@
From ff0a6da84b94c16c4519c649f1f7bed3cdf89bbb Mon Sep 17 00:00:00 2001
From: Feng Zou <feng.zou@intel.com>
Date: Tue, 20 Oct 2020 11:29:04 +0800
Subject: [PATCH] Remove repo name in LLVM IR
Upstream-Status: Backport [Taken from opencl-clang patches, https://github.com/intel/opencl-clang/blob/ocl-open-110/patches/llvm/0002-Remove-repo-name-in-LLVM-IR.patch]
Signed-off-by: Feng Zou <feng.zou@intel.com>
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
llvm/cmake/modules/VersionFromVCS.cmake | 23 ++++++++++++-----------
1 file changed, 12 insertions(+), 11 deletions(-)
diff --git a/llvm/cmake/modules/VersionFromVCS.cmake b/llvm/cmake/modules/VersionFromVCS.cmake
index 18edbeabe3e..2d965263478 100644
--- a/llvm/cmake/modules/VersionFromVCS.cmake
+++ b/llvm/cmake/modules/VersionFromVCS.cmake
@@ -33,17 +33,18 @@ function(get_source_info path revision repository)
else()
set(remote "origin")
endif()
- execute_process(COMMAND ${GIT_EXECUTABLE} remote get-url ${remote}
- WORKING_DIRECTORY ${path}
- RESULT_VARIABLE git_result
- OUTPUT_VARIABLE git_output
- ERROR_QUIET)
- if(git_result EQUAL 0)
- string(STRIP "${git_output}" git_output)
- set(${repository} ${git_output} PARENT_SCOPE)
- else()
- set(${repository} ${path} PARENT_SCOPE)
- endif()
+ # Do not show repo name in IR
+ # execute_process(COMMAND ${GIT_EXECUTABLE} remote get-url ${remote}
+ # WORKING_DIRECTORY ${path}
+ # RESULT_VARIABLE git_result
+ # OUTPUT_VARIABLE git_output
+ # ERROR_QUIET)
+ # if(git_result EQUAL 0)
+ # string(STRIP "${git_output}" git_output)
+ # set(${repository} ${git_output} PARENT_SCOPE)
+ # else()
+ # set(${repository} ${path} PARENT_SCOPE)
+ # endif()
endif()
else()
message(WARNING "Git not found. Version cannot be determined.")
--
2.18.1

View File

@ -2,8 +2,8 @@ FILESEXTRAPATHS_prepend_intel-x86-common := "${THISDIR}/files:"
SPIRV_BRANCH = "${@bb.utils.contains('LLVMVERSION', '10.0.1', 'llvm_release_100', 'llvm_release_110', d)}" SPIRV_BRANCH = "${@bb.utils.contains('LLVMVERSION', '10.0.1', 'llvm_release_100', 'llvm_release_110', d)}"
SPIRV10_SRCREV = "4d43f68a30a510b4e7607351caab3df8e7426a6b" SPIRV10_SRCREV = "576abae62cecd171992017a4a786e3831221ab8d"
SPIRV11_SRCREV = "93032d36d2fe17befb7994714c07c67ea68efbea" SPIRV11_SRCREV = "2a8c1e6c9778deaa720a23e08c293006dc5d56fd"
SPIRV_SRCREV = "${@bb.utils.contains('LLVMVERSION', '10.0.1', '${SPIRV10_SRCREV}', '${SPIRV11_SRCREV}', d)}" SPIRV_SRCREV = "${@bb.utils.contains('LLVMVERSION', '10.0.1', '${SPIRV10_SRCREV}', '${SPIRV11_SRCREV}', d)}"
@ -12,10 +12,18 @@ SRC_URI_LLVM10 = " \
file://fix-shared-libs.patch;patchdir=llvm/projects/llvm-spirv \ file://fix-shared-libs.patch;patchdir=llvm/projects/llvm-spirv \
file://BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch;patchdir=llvm \ file://BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch;patchdir=llvm \
file://IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch;patchdir=llvm \ file://IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch;patchdir=llvm \
file://llvm10-OpenCL-3.0-support.patch \
file://0002-Add-cl_khr_extended_subgroup-extensions.patch \
file://0001-Memory-leak-fix-for-Managed-Static-Mutex.patch \
file://llvm10-Remove-repo-name-in-LLVM-IR.patch \
file://0001-Fix-debug-info-of-work-item-builtin-translation-745.patch;patchdir=llvm/projects/llvm-spirv \
" "
SRC_URI_LLVM11 = " \ SRC_URI_LLVM11 = " \
file://llvm11-skip-building-tests.patch;patchdir=llvm/projects/llvm-spirv \ file://llvm11-skip-building-tests.patch;patchdir=llvm/projects/llvm-spirv \
file://llvm11-OpenCL-3.0-support.patch \
file://0001-Memory-leak-fix-for-Managed-Static-Mutex.patch \
file://llvm11-Remove-repo-name-in-LLVM-IR.patch \
" "
SPIRV_LLVM_SRC_URI = "git://github.com/KhronosGroup/SPIRV-LLVM-Translator.git;protocol=https;branch=${SPIRV_BRANCH};destsuffix=git/llvm/projects/llvm-spirv;name=spirv" SPIRV_LLVM_SRC_URI = "git://github.com/KhronosGroup/SPIRV-LLVM-Translator.git;protocol=https;branch=${SPIRV_BRANCH};destsuffix=git/llvm/projects/llvm-spirv;name=spirv"