llvm-project-source: refresh patches

Refresh the patches as recommended by the opencl-clang and include
support for OpenCL 3.0.

Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
This commit is contained in:
Anuj Mittal 2022-04-19 15:43:41 +08:00
parent 10944d5308
commit b9da851fa5
6 changed files with 5033 additions and 40 deletions

View File

@ -4,7 +4,7 @@ Date: Tue, 11 May 2021 11:13:02 +0800
Subject: [PATCH 1/3] Remove __IMAGE_SUPPORT__ macro for SPIR since SPIR
doesn't require image support
Upstream-Status: Backport [Taken from opencl-clang patches, https://github.com/intel/opencl-clang/blob/ocl-open-120/patches/clang/0001-Remove-__IMAGE_SUPPORT__-macro-for-SPIR.patch]
Upstream-Status: Inappropriate
Signed-off-by: haonanya <haonan.yang@intel.com>
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>

View File

@ -1,7 +1,7 @@
From 06cf750d2ef892eaa4f0ff5d0a9e9e5c49697264 Mon Sep 17 00:00:00 2001
From 60854c328d8729b2ef10b9bb4dcbcc282f43c5e7 Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Thu, 1 Apr 2021 18:41:44 +0200
Subject: [PATCH 2/3] Avoid calling ParseCommandLineOptions in BackendUtil if
Subject: [PATCH] Avoid calling ParseCommandLineOptions in BackendUtil if
possible
Calling `ParseCommandLineOptions` should only be called from `main` as the
@ -20,18 +20,18 @@ Reviewed By: JDevlieghere
Differential Revision: https://reviews.llvm.org/D99740
Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-120/patches/clang/0002-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch]
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/60854c328d8729b2ef10b9bb4dcbcc282f43c5e7]
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
clang/lib/CodeGen/BackendUtil.cpp | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 52bcd971dc8c..f9f891247530 100644
index 41eafd13d97c..00d92e7beadd 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -850,7 +850,15 @@ static void setCommandLineOpts(const CodeGenOptions &CodeGenOpts) {
@@ -871,7 +871,15 @@ static void setCommandLineOpts(const CodeGenOptions &CodeGenOpts) {
BackendArgs.push_back("-limit-float-precision");
BackendArgs.push_back(CodeGenOpts.LimitFloatPrecision.c_str());
}
@ -48,5 +48,5 @@ index 52bcd971dc8c..f9f891247530 100644
BackendArgs.data());
}
--
2.17.1
2.29.2

View File

@ -1,20 +1,28 @@
From f1a24eeb89342186c6c718e02dd394775620799f Mon Sep 17 00:00:00 2001
From 85505bdb386a426310c1fb0a845780beeeec4353 Mon Sep 17 00:00:00 2001
From: haonanya <haonan.yang@intel.com>
Date: Wed, 28 Jul 2021 14:20:08 +0800
Subject: [PATCH 3/3] Support cl_ext_float_atomics
Date: Wed, 9 Feb 2022 09:16:35 +0800
Subject: [PATCH] Support cl_ext_float_atomics
Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-120/patches/clang/0003-OpenCL-Support-cl_ext_float_atomics.patch]
This backports https://reviews.llvm.org/D106343 and https://reviews.llvm.org/D109740
Signed-off-by: haonanya <haonan.yang@intel.com>
Upstream-Status: Backport
https://github.com/llvm/llvm-project/commit/d353d1c50112a1cb315eccdab18ce7bd1563cd06
https://github.com/llvm/llvm-project/commit/544d89e847d42ce8856296752b0fb279aa89aace
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
clang/lib/Headers/opencl-c-base.h | 19 +++
clang/lib/Headers/opencl-c.h | 195 ++++++++++++++++++++++++++
clang/test/Headers/opencl-c-header.cl | 72 ++++++++++
3 files changed, 286 insertions(+)
clang/lib/Headers/opencl-c-base.h | 19 ++
clang/lib/Headers/opencl-c.h | 378 ++++++++++++++++++++++++++
clang/lib/Sema/Sema.cpp | 3 +
clang/test/Headers/opencl-c-header.cl | 84 ++++++
4 files changed, 484 insertions(+)
diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h
index e8dcd70377e5..c8b6d36029ec 100644
index e8dcd70377e5..d94d64372dbb 100644
--- a/clang/lib/Headers/opencl-c-base.h
+++ b/clang/lib/Headers/opencl-c-base.h
@@ -21,6 +21,25 @@
@ -30,7 +38,7 @@ index e8dcd70377e5..c8b6d36029ec 100644
+#define __opencl_c_ext_fp16_global_atomic_min_max 1
+#define __opencl_c_ext_fp16_local_atomic_min_max 1
+#endif
+#ifdef __opencl_c_fp64
+#ifdef cl_khr_fp64
+#define __opencl_c_ext_fp64_global_atomic_add 1
+#define __opencl_c_ext_fp64_local_atomic_add 1
+#define __opencl_c_ext_fp64_global_atomic_min_max 1
@ -44,15 +52,128 @@ index e8dcd70377e5..c8b6d36029ec 100644
#endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index ab665628c8e1..6676da858d2a 100644
index ab665628c8e1..584db7e81e04 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -13531,6 +13531,201 @@ intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uint
@@ -13531,6 +13531,384 @@ intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uint
intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope);
#endif
+// The functionality added by cl_ext_float_atomics extension
+#if defined(cl_ext_float_atomics)
+
+#if defined(__opencl_c_ext_fp16_global_atomic_load_store)
+void __ovld atomic_store(volatile __global atomic_half *object, half operand);
+void __ovld atomic_store_explicit(volatile __global atomic_half *object,
+ half operand, memory_order order);
+void __ovld atomic_store_explicit(volatile __global atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+half __ovld atomic_load(volatile __global atomic_half *object);
+half __ovld atomic_load_explicit(volatile __global atomic_half *object,
+ memory_order order);
+half __ovld atomic_load_explicit(volatile __global atomic_half *object,
+ memory_order order, memory_scope scope);
+half __ovld atomic_exchange(volatile __global atomic_half *object,
+ half operand);
+half __ovld atomic_exchange_explicit(volatile __global atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_exchange_explicit(volatile __global atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp16_global_atomic_load_store)
+
+#if defined(__opencl_c_ext_fp16_local_atomic_load_store)
+void __ovld atomic_store(volatile __local atomic_half *object, half operand);
+void __ovld atomic_store_explicit(volatile __local atomic_half *object,
+ half operand, memory_order order);
+void __ovld atomic_store_explicit(volatile __local atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+half __ovld atomic_load(volatile __local atomic_half *object);
+half __ovld atomic_load_explicit(volatile __local atomic_half *object,
+ memory_order order);
+half __ovld atomic_load_explicit(volatile __local atomic_half *object,
+ memory_order order, memory_scope scope);
+half __ovld atomic_exchange(volatile __local atomic_half *object, half operand);
+half __ovld atomic_exchange_explicit(volatile __local atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_exchange_explicit(volatile __local atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp16_local_atomic_load_store)
+
+#if defined(__opencl_c_ext_fp16_global_atomic_load_store) && \
+ defined(__opencl_c_ext_fp16_local_atomic_load_store)
+void __ovld atomic_store(volatile atomic_half *object, half operand);
+void __ovld atomic_store_explicit(volatile atomic_half *object, half operand,
+ memory_order order);
+void __ovld atomic_store_explicit(volatile atomic_half *object, half operand,
+ memory_order order, memory_scope scope);
+half __ovld atomic_load(volatile atomic_half *object);
+half __ovld atomic_load_explicit(volatile atomic_half *object,
+ memory_order order);
+half __ovld atomic_load_explicit(volatile atomic_half *object,
+ memory_order order, memory_scope scope);
+half __ovld atomic_exchange(volatile atomic_half *object, half operand);
+half __ovld atomic_exchange_explicit(volatile atomic_half *object, half operand,
+ memory_order order);
+half __ovld atomic_exchange_explicit(volatile atomic_half *object, half operand,
+ memory_order order, memory_scope scope);
+#endif // defined(__opencl_c_ext_fp16_global_atomic_load_store) &&
+ // defined(__opencl_c_ext_fp16_local_atomic_load_store)
+
+#if defined(__opencl_c_ext_fp16_global_atomic_min_max)
+half __ovld atomic_fetch_min(volatile __global atomic_half *object,
+ half operand);
+half __ovld atomic_fetch_max(volatile __global atomic_half *object,
+ half operand);
+half __ovld atomic_fetch_min_explicit(volatile __global atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_fetch_max_explicit(volatile __global atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_fetch_min_explicit(volatile __global atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+half __ovld atomic_fetch_max_explicit(volatile __global atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp16_global_atomic_min_max)
+
+#if defined(__opencl_c_ext_fp16_local_atomic_min_max)
+half __ovld atomic_fetch_min(volatile __local atomic_half *object,
+ half operand);
+half __ovld atomic_fetch_max(volatile __local atomic_half *object,
+ half operand);
+half __ovld atomic_fetch_min_explicit(volatile __local atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_fetch_max_explicit(volatile __local atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_fetch_min_explicit(volatile __local atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+half __ovld atomic_fetch_max_explicit(volatile __local atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp16_local_atomic_min_max)
+
+#if defined(__opencl_c_ext_fp16_global_atomic_min_max) && \
+ defined(__opencl_c_ext_fp16_local_atomic_min_max)
+half __ovld atomic_fetch_min(volatile atomic_half *object, half operand);
+half __ovld atomic_fetch_max(volatile atomic_half *object, half operand);
+half __ovld atomic_fetch_min_explicit(volatile atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_fetch_max_explicit(volatile atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_fetch_min_explicit(volatile atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+half __ovld atomic_fetch_max_explicit(volatile atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp16_global_atomic_min_max) && \
+ defined(__opencl_c_ext_fp16_local_atomic_min_max)
+
+#if defined(__opencl_c_ext_fp32_global_atomic_min_max)
+float __ovld atomic_fetch_min(volatile __global atomic_float *object,
+ float operand);
@ -68,7 +189,8 @@ index ab665628c8e1..6676da858d2a 100644
+float __ovld atomic_fetch_max_explicit(volatile __global atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+#endif
+#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max)
+
+#if defined(__opencl_c_ext_fp32_local_atomic_min_max)
+float __ovld atomic_fetch_min(volatile __local atomic_float *object,
+ float operand);
@ -84,8 +206,9 @@ index ab665628c8e1..6676da858d2a 100644
+float __ovld atomic_fetch_max_explicit(volatile __local atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+#endif
+#if defined(__opencl_c_ext_fp32_global_atomic_min_max) || \
+#endif // defined(__opencl_c_ext_fp32_local_atomic_min_max)
+
+#if defined(__opencl_c_ext_fp32_global_atomic_min_max) && \
+ defined(__opencl_c_ext_fp32_local_atomic_min_max)
+float __ovld atomic_fetch_min(volatile atomic_float *object, float operand);
+float __ovld atomic_fetch_max(volatile atomic_float *object, float operand);
@ -99,7 +222,10 @@ index ab665628c8e1..6676da858d2a 100644
+float __ovld atomic_fetch_max_explicit(volatile atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+#endif
+#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) && \
+ defined(__opencl_c_ext_fp32_local_atomic_min_max)
+
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#if defined(__opencl_c_ext_fp64_global_atomic_min_max)
+double __ovld atomic_fetch_min(volatile __global atomic_double *object,
+ double operand);
@ -115,7 +241,8 @@ index ab665628c8e1..6676da858d2a 100644
+double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+#endif
+#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max)
+
+#if defined(__opencl_c_ext_fp64_local_atomic_min_max)
+double __ovld atomic_fetch_min(volatile __local atomic_double *object,
+ double operand);
@ -131,8 +258,9 @@ index ab665628c8e1..6676da858d2a 100644
+double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+#endif
+#if defined(__opencl_c_ext_fp64_global_atomic_min_max) || \
+#endif // defined(__opencl_c_ext_fp64_local_atomic_min_max)
+
+#if defined(__opencl_c_ext_fp64_global_atomic_min_max) && \
+ defined(__opencl_c_ext_fp64_local_atomic_min_max)
+double __ovld atomic_fetch_min(volatile atomic_double *object, double operand);
+double __ovld atomic_fetch_max(volatile atomic_double *object, double operand);
@ -146,7 +274,61 @@ index ab665628c8e1..6676da858d2a 100644
+double __ovld atomic_fetch_max_explicit(volatile atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+#endif
+#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) && \
+ defined(__opencl_c_ext_fp64_local_atomic_min_max)
+#endif // defined(cl_khr_int64_base_atomics) &&
+ // defined(cl_khr_int64_extended_atomics)
+
+#if defined(__opencl_c_ext_fp16_global_atomic_add)
+half __ovld atomic_fetch_add(volatile __global atomic_half *object,
+ half operand);
+half __ovld atomic_fetch_sub(volatile __global atomic_half *object,
+ half operand);
+half __ovld atomic_fetch_add_explicit(volatile __global atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_fetch_sub_explicit(volatile __global atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_fetch_add_explicit(volatile __global atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+half __ovld atomic_fetch_sub_explicit(volatile __global atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp16_global_atomic_add)
+
+#if defined(__opencl_c_ext_fp16_local_atomic_add)
+half __ovld atomic_fetch_add(volatile __local atomic_half *object,
+ half operand);
+half __ovld atomic_fetch_sub(volatile __local atomic_half *object,
+ half operand);
+half __ovld atomic_fetch_add_explicit(volatile __local atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_fetch_sub_explicit(volatile __local atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_fetch_add_explicit(volatile __local atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+half __ovld atomic_fetch_sub_explicit(volatile __local atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp16_local_atomic_add)
+
+#if defined(__opencl_c_ext_fp16_global_atomic_add) && \
+ defined(__opencl_c_ext_fp16_local_atomic_add)
+half __ovld atomic_fetch_add(volatile atomic_half *object, half operand);
+half __ovld atomic_fetch_sub(volatile atomic_half *object, half operand);
+half __ovld atomic_fetch_add_explicit(volatile atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_fetch_sub_explicit(volatile atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_fetch_add_explicit(volatile atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+half __ovld atomic_fetch_sub_explicit(volatile atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp16_global_atomic_add) && \
+ defined(__opencl_c_ext_fp16_local_atomic_add)
+
+#if defined(__opencl_c_ext_fp32_global_atomic_add)
+float __ovld atomic_fetch_add(volatile __global atomic_float *object,
@ -163,7 +345,8 @@ index ab665628c8e1..6676da858d2a 100644
+float __ovld atomic_fetch_sub_explicit(volatile __global atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+#endif
+#endif // defined(__opencl_c_ext_fp32_global_atomic_add)
+
+#if defined(__opencl_c_ext_fp32_local_atomic_add)
+float __ovld atomic_fetch_add(volatile __local atomic_float *object,
+ float operand);
@ -179,8 +362,9 @@ index ab665628c8e1..6676da858d2a 100644
+float __ovld atomic_fetch_sub_explicit(volatile __local atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+#endif
+#if defined(__opencl_c_ext_fp32_global_atomic_add) || \
+#endif // defined(__opencl_c_ext_fp32_local_atomic_add)
+
+#if defined(__opencl_c_ext_fp32_global_atomic_add) && \
+ defined(__opencl_c_ext_fp32_local_atomic_add)
+float __ovld atomic_fetch_add(volatile atomic_float *object, float operand);
+float __ovld atomic_fetch_sub(volatile atomic_float *object, float operand);
@ -194,8 +378,10 @@ index ab665628c8e1..6676da858d2a 100644
+float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+#endif
+#endif // defined(__opencl_c_ext_fp32_global_atomic_add) && \
+ defined(__opencl_c_ext_fp32_local_atomic_add)
+
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#if defined(__opencl_c_ext_fp64_global_atomic_add)
+double __ovld atomic_fetch_add(volatile __global atomic_double *object,
+ double operand);
@ -211,7 +397,8 @@ index ab665628c8e1..6676da858d2a 100644
+double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+#endif
+#endif // defined(__opencl_c_ext_fp64_global_atomic_add)
+
+#if defined(__opencl_c_ext_fp64_local_atomic_add)
+double __ovld atomic_fetch_add(volatile __local atomic_double *object,
+ double operand);
@ -227,8 +414,9 @@ index ab665628c8e1..6676da858d2a 100644
+double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+#endif
+#if defined(__opencl_c_ext_fp64_global_atomic_add) || \
+#endif // defined(__opencl_c_ext_fp64_local_atomic_add)
+
+#if defined(__opencl_c_ext_fp64_global_atomic_add) && \
+ defined(__opencl_c_ext_fp64_local_atomic_add)
+double __ovld atomic_fetch_add(volatile atomic_double *object, double operand);
+double __ovld atomic_fetch_sub(volatile atomic_double *object, double operand);
@ -242,18 +430,35 @@ index ab665628c8e1..6676da858d2a 100644
+double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+#endif
+#endif // defined(__opencl_c_ext_fp64_global_atomic_add) && \
+ defined(__opencl_c_ext_fp64_local_atomic_add)
+#endif // defined(cl_khr_int64_base_atomics) &&
+ // defined(cl_khr_int64_extended_atomics)
+
+#endif // cl_ext_float_atomics
+
// atomic_store()
void __ovld atomic_store(volatile atomic_int *object, int desired);
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 450f9c020f7f..a91291c7af38 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -313,6 +313,9 @@ void Sema::Initialize() {
addImplicitTypedef("atomic_long", AtomicLongT);
auto AtomicULongT = Context.getAtomicType(Context.UnsignedLongTy);
addImplicitTypedef("atomic_ulong", AtomicULongT);
+ auto AtomicHalfT = Context.getAtomicType(Context.HalfTy);
+ addImplicitTypedef("atomic_half", AtomicHalfT);
+ setOpenCLExtensionForType(AtomicHalfT, "cl_khr_fp16");
addImplicitTypedef("atomic_float",
Context.getAtomicType(Context.FloatTy));
auto AtomicDoubleT = Context.getAtomicType(Context.DoubleTy);
diff --git a/clang/test/Headers/opencl-c-header.cl b/clang/test/Headers/opencl-c-header.cl
index 13a3b62481ec..2c02d14f25c3 100644
index 13a3b62481ec..443f682c711a 100644
--- a/clang/test/Headers/opencl-c-header.cl
+++ b/clang/test/Headers/opencl-c-header.cl
@@ -124,6 +124,36 @@ global atomic_int z = ATOMIC_VAR_INIT(99);
@@ -124,6 +124,48 @@ global atomic_int z = ATOMIC_VAR_INIT(99);
#if cl_khr_subgroup_clustered_reduce != 1
#error "Incorrectly defined cl_khr_subgroup_clustered_reduce"
#endif
@ -269,28 +474,40 @@ index 13a3b62481ec..2c02d14f25c3 100644
+#if __opencl_c_ext_fp32_global_atomic_add != 1
+#error "Incorrectly defined __opencl_c_ext_fp32_global_atomic_add"
+#endif
+#if __opencl_c_ext_fp64_global_atomic_add != 1
+#error "Incorrectly defined __opencl_c_ext_fp64_global_atomic_add"
+#endif
+#if __opencl_c_ext_fp16_local_atomic_add != 1
+#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_add"
+#endif
+#if __opencl_c_ext_fp32_local_atomic_add != 1
+#error "Incorrectly defined __opencl_c_ext_fp32_local_atomic_add"
+#endif
+#if __opencl_c_ext_fp64_local_atomic_add != 1
+#error "Incorrectly defined __opencl_c_ext_fp64_local_atomic_add"
+#endif
+#if __opencl_c_ext_fp16_global_atomic_min_max != 1
+#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_min_max"
+#endif
+#if __opencl_c_ext_fp32_global_atomic_min_max != 1
+#error "Incorrectly defined __opencl_c_ext_fp32_global_atomic_min_max"
+#endif
+#if __opencl_c_ext_fp64_global_atomic_min_max != 1
+#error "Incorrectly defined __opencl_c_ext_fp64_global_atomic_min_max"
+#endif
+#if __opencl_c_ext_fp16_local_atomic_min_max != 1
+#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_min_max"
+#endif
+#if __opencl_c_ext_fp32_local_atomic_min_max != 1
+#error "Incorrectly defined __opencl_c_ext_fp32_local_atomic_min_max"
+#endif
+#if __opencl_c_ext_fp64_local_atomic_min_max != 1
+#error "Incorrectly defined __opencl_c_ext_fp64_local_atomic_min_max"
+#endif
#else
@@ -148,6 +178,48 @@ global atomic_int z = ATOMIC_VAR_INIT(99);
@@ -148,6 +190,48 @@ global atomic_int z = ATOMIC_VAR_INIT(99);
#ifdef cl_khr_subgroup_clustered_reduce
#error "Incorrect cl_khr_subgroup_clustered_reduce define"
#endif
@ -340,5 +557,5 @@ index 13a3b62481ec..2c02d14f25c3 100644
#endif //(defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
--
2.17.1
2.29.2

View File

@ -0,0 +1,146 @@
From 54802abece1e4742050795162e3a773f0e143aa3 Mon Sep 17 00:00:00 2001
From: haonanya <haonan.yang@intel.com>
Date: Fri, 24 Dec 2021 15:27:46 +0800
Subject: [PATCH] [OpenCL] Add cl_khr_integer_dot_product
This is backporting of https://reviews.llvm.org/D106434
Add the builtins defined by Section 42 "Integer dot product" in the OpenCL
Extension Specification.
See https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_integer_dot_product
Signed-off-by: haonanya <haonan.yang@intel.com>
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/989bedec7a6ae95a0db865f23677047f78dc9257]
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
---
clang/lib/Headers/opencl-c-base.h | 3 +++
clang/lib/Headers/opencl-c.h | 24 ++++++++++++++++++++++++
clang/lib/Sema/OpenCLBuiltins.td | 25 +++++++++++++++++++++++++
clang/test/Headers/opencl-c-header.cl | 18 ++++++++++++++++++
4 files changed, 70 insertions(+)
diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h
index d94d64372dbb..b55d9601a452 100644
--- a/clang/lib/Headers/opencl-c-base.h
+++ b/clang/lib/Headers/opencl-c-base.h
@@ -40,6 +40,9 @@
#define __opencl_c_ext_fp32_local_atomic_add 1
#define __opencl_c_ext_fp32_global_atomic_min_max 1
#define __opencl_c_ext_fp32_local_atomic_min_max 1
+#define cl_khr_integer_dot_product 1
+#define __opencl_c_integer_dot_product_input_4x8bit 1
+#define __opencl_c_integer_dot_product_input_4x8bit_packed 1
#endif // defined(__SPIR__)
#endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index 501a04f6e82b..50dcc03955cb 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -16362,6 +16362,30 @@ double __ovld sub_group_clustered_reduce_max( double value, uint clustersize );
#endif // cl_khr_subgroup_clustered_reduce
+#if defined(__opencl_c_integer_dot_product_input_4x8bit)
+uint __ovld __cnfn dot(uchar4, uchar4);
+int __ovld __cnfn dot(char4, char4);
+int __ovld __cnfn dot(uchar4, char4);
+int __ovld __cnfn dot(char4, uchar4);
+
+uint __ovld __cnfn dot_acc_sat(uchar4, uchar4, uint);
+int __ovld __cnfn dot_acc_sat(char4, char4, int);
+int __ovld __cnfn dot_acc_sat(uchar4, char4, int);
+int __ovld __cnfn dot_acc_sat(char4, uchar4, int);
+#endif // __opencl_c_integer_dot_product_input_4x8bit
+
+#if defined(__opencl_c_integer_dot_product_input_4x8bit_packed)
+uint __ovld __cnfn dot_4x8packed_uu_uint(uint, uint);
+int __ovld __cnfn dot_4x8packed_ss_int(uint, uint);
+int __ovld __cnfn dot_4x8packed_us_int(uint, uint);
+int __ovld __cnfn dot_4x8packed_su_int(uint, uint);
+
+uint __ovld __cnfn dot_acc_sat_4x8packed_uu_uint(uint, uint, uint);
+int __ovld __cnfn dot_acc_sat_4x8packed_ss_int(uint, uint, int);
+int __ovld __cnfn dot_acc_sat_4x8packed_us_int(uint, uint, int);
+int __ovld __cnfn dot_acc_sat_4x8packed_su_int(uint, uint, int);
+#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
+
#if defined(cl_intel_subgroups)
// Intel-Specific Sub Group Functions
float __ovld __conv intel_sub_group_shuffle( float x, uint c );
diff --git a/clang/lib/Sema/OpenCLBuiltins.td b/clang/lib/Sema/OpenCLBuiltins.td
index 745363a6b43f..3c5f8821063d 100644
--- a/clang/lib/Sema/OpenCLBuiltins.td
+++ b/clang/lib/Sema/OpenCLBuiltins.td
@@ -1482,6 +1482,31 @@ let Extension = FuncExtKhrSubgroups in {
}
}
+// Section 42.3 - cl_khr_integer_dot_product
+let Extension = FunctionExtension<"__opencl_c_integer_dot_product_input_4x8bit"> in {
+ def : Builtin<"dot", [UInt, VectorType<UChar, 4>, VectorType<UChar, 4>], Attr.Const>;
+ def : Builtin<"dot", [Int, VectorType<Char, 4>, VectorType<Char, 4>], Attr.Const>;
+ def : Builtin<"dot", [Int, VectorType<UChar, 4>, VectorType<Char, 4>], Attr.Const>;
+ def : Builtin<"dot", [Int, VectorType<Char, 4>, VectorType<UChar, 4>], Attr.Const>;
+
+ def : Builtin<"dot_acc_sat", [UInt, VectorType<UChar, 4>, VectorType<UChar, 4>, UInt], Attr.Const>;
+ def : Builtin<"dot_acc_sat", [Int, VectorType<Char, 4>, VectorType<Char, 4>, Int], Attr.Const>;
+ def : Builtin<"dot_acc_sat", [Int, VectorType<UChar, 4>, VectorType<Char, 4>, Int], Attr.Const>;
+ def : Builtin<"dot_acc_sat", [Int, VectorType<Char, 4>, VectorType<UChar, 4>, Int], Attr.Const>;
+}
+
+let Extension = FunctionExtension<"__opencl_c_integer_dot_product_input_4x8bit_packed"> in {
+ def : Builtin<"dot_4x8packed_uu_uint", [UInt, UInt, UInt], Attr.Const>;
+ def : Builtin<"dot_4x8packed_ss_int", [Int, UInt, UInt], Attr.Const>;
+ def : Builtin<"dot_4x8packed_us_int", [Int, UInt, UInt], Attr.Const>;
+ def : Builtin<"dot_4x8packed_su_int", [Int, UInt, UInt], Attr.Const>;
+
+ def : Builtin<"dot_acc_sat_4x8packed_uu_uint", [UInt, UInt, UInt, UInt], Attr.Const>;
+ def : Builtin<"dot_acc_sat_4x8packed_ss_int", [Int, UInt, UInt, Int], Attr.Const>;
+ def : Builtin<"dot_acc_sat_4x8packed_us_int", [Int, UInt, UInt, Int], Attr.Const>;
+ def : Builtin<"dot_acc_sat_4x8packed_su_int", [Int, UInt, UInt, Int], Attr.Const>;
+}
+
//--------------------------------------------------------------------
// Arm extensions.
let Extension = ArmIntegerDotProductInt8 in {
diff --git a/clang/test/Headers/opencl-c-header.cl b/clang/test/Headers/opencl-c-header.cl
index 443f682c711a..184eefd9f9c3 100644
--- a/clang/test/Headers/opencl-c-header.cl
+++ b/clang/test/Headers/opencl-c-header.cl
@@ -166,6 +166,15 @@ global atomic_int z = ATOMIC_VAR_INIT(99);
#if __opencl_c_ext_fp64_local_atomic_min_max != 1
#error "Incorrectly defined __opencl_c_ext_fp64_local_atomic_min_max"
#endif
+#if cl_khr_integer_dot_product != 1
+#error "Incorrectly defined cl_khr_integer_dot_product"
+#endif
+#if __opencl_c_integer_dot_product_input_4x8bit != 1
+#error "Incorrectly defined __opencl_c_integer_dot_product_input_4x8bit"
+#endif
+#if __opencl_c_integer_dot_product_input_4x8bit_packed != 1
+#error "Incorrectly defined __opencl_c_integer_dot_product_input_4x8bit_packed"
+#endif
#else
@@ -232,6 +241,15 @@ global atomic_int z = ATOMIC_VAR_INIT(99);
#ifdef __opencl_c_ext_fp64_local_atomic_min_max
#error "Incorrectly __opencl_c_ext_fp64_local_atomic_min_max defined"
#endif
+#ifdef cl_khr_integer_dot_product
+#error "Incorrect cl_khr_integer_dot_product define"
+#endif
+#ifdef __opencl_c_integer_dot_product_input_4x8bit
+#error "Incorrect __opencl_c_integer_dot_product_input_4x8bit define"
+#endif
+#ifdef __opencl_c_integer_dot_product_input_4x8bit_packed
+#error "Incorrect __opencl_c_integer_dot_product_input_4x8bit_packed define"
+#endif
#endif //(defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
--
2.17.1

View File

@ -6,6 +6,8 @@ SRC_URI_LLVM12_PATCHES = " \
file://llvm12-0003-Support-cl_ext_float_atomics.patch \
file://llvm12-0004-ispc-12_0_disable-A-B-A-B-and-BSWAP-in-InstCombine.patch \
file://llvm12-0005-ispc-12_0_fix_for_2111.patch \
file://llvm12-0006-OpenCL-Add-cl_khr_integer_dot_product.patch \
file://llvm12-0007-OpenCL-3.0-support.patch \
"
SRC_URI:append:intel-x86-common = "${@bb.utils.contains('LLVMVERSION', '12.0.0', ' ${SRC_URI_LLVM12_PATCHES} ', '', d)}"