mirror of
git://git.yoctoproject.org/meta-intel.git
synced 2025-07-05 05:04:45 +02:00
Remove support for LLVM 12
We can now build with LLVM 14 and no longer need to keep LLVM 12 patches and compatibility code. Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
This commit is contained in:
parent
94d6ec3730
commit
0a7687b2c1
|
@ -21,9 +21,6 @@ PREFERRED_PROVIDER_libva-utils = "libva-intel-utils"
|
|||
PREFERRED_PROVIDER_libva-utils-native = "libva-intel-utils-native"
|
||||
PREFERRED_PROVIDER_nativesdk-libva-utils = "nativesdk-libva-intel-utils"
|
||||
|
||||
PREFERRED_VERSION_opencl-clang ?= "${@bb.utils.contains('LLVMVERSION', '12.0.0', '12.0.0', '14.0.0', d)}"
|
||||
PREFERRED_VERSION_opencl-clang-native ?= "${@bb.utils.contains('LLVMVERSION', '12.0.0', '12.0.0', '14.0.0', d)}"
|
||||
|
||||
XSERVER_X86_ASPEED_AST = "xf86-video-ast \
|
||||
"
|
||||
|
||||
|
|
|
@ -1,51 +0,0 @@
|
|||
From 3632f727dfd786a8eca50bd01219669bbe7b0df9 Mon Sep 17 00:00:00 2001
|
||||
From: haonanya <haonan.yang@intel.com>
|
||||
Date: Tue, 11 May 2021 11:13:02 +0800
|
||||
Subject: [PATCH 1/3] Remove __IMAGE_SUPPORT__ macro for SPIR since SPIR
|
||||
doesn't require image support
|
||||
|
||||
Upstream-Status: Inappropriate
|
||||
|
||||
Signed-off-by: haonanya <haonan.yang@intel.com>
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
clang/lib/Frontend/InitPreprocessor.cpp | 3 ---
|
||||
clang/test/Preprocessor/predefined-macros.c | 2 --
|
||||
2 files changed, 5 deletions(-)
|
||||
|
||||
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
|
||||
index c64a912ce919..c60972c96e5d 100644
|
||||
--- a/clang/lib/Frontend/InitPreprocessor.cpp
|
||||
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
|
||||
@@ -1121,9 +1121,6 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
|
||||
// OpenCL definitions.
|
||||
if (LangOpts.OpenCL) {
|
||||
TI.getOpenCLFeatureDefines(LangOpts, Builder);
|
||||
-
|
||||
- if (TI.getTriple().isSPIR())
|
||||
- Builder.defineMacro("__IMAGE_SUPPORT__");
|
||||
}
|
||||
|
||||
if (TI.hasInt128Type() && LangOpts.CPlusPlus && LangOpts.GNUMode) {
|
||||
diff --git a/clang/test/Preprocessor/predefined-macros.c b/clang/test/Preprocessor/predefined-macros.c
|
||||
index e406b9a70570..88606518c7de 100644
|
||||
--- a/clang/test/Preprocessor/predefined-macros.c
|
||||
+++ b/clang/test/Preprocessor/predefined-macros.c
|
||||
@@ -188,14 +188,12 @@
|
||||
|
||||
// RUN: %clang_cc1 %s -E -dM -o - -x cl -triple spir-unknown-unknown \
|
||||
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-SPIR
|
||||
-// CHECK-SPIR-DAG: #define __IMAGE_SUPPORT__ 1
|
||||
// CHECK-SPIR-DAG: #define __SPIR__ 1
|
||||
// CHECK-SPIR-DAG: #define __SPIR32__ 1
|
||||
// CHECK-SPIR-NOT: #define __SPIR64__ 1
|
||||
|
||||
// RUN: %clang_cc1 %s -E -dM -o - -x cl -triple spir64-unknown-unknown \
|
||||
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-SPIR64
|
||||
-// CHECK-SPIR64-DAG: #define __IMAGE_SUPPORT__ 1
|
||||
// CHECK-SPIR64-DAG: #define __SPIR__ 1
|
||||
// CHECK-SPIR64-DAG: #define __SPIR64__ 1
|
||||
// CHECK-SPIR64-NOT: #define __SPIR32__ 1
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -1,52 +0,0 @@
|
|||
From 60854c328d8729b2ef10b9bb4dcbcc282f43c5e7 Mon Sep 17 00:00:00 2001
|
||||
From: Raphael Isemann <teemperor@gmail.com>
|
||||
Date: Thu, 1 Apr 2021 18:41:44 +0200
|
||||
Subject: [PATCH] Avoid calling ParseCommandLineOptions in BackendUtil if
|
||||
possible
|
||||
|
||||
Calling `ParseCommandLineOptions` should only be called from `main` as the
|
||||
CommandLine setup code isn't thread-safe. As BackendUtil is part of the
|
||||
generic Clang FrontendAction logic, a process which has several threads executing
|
||||
Clang FrontendActions will randomly crash in the unsafe setup code.
|
||||
|
||||
This patch avoids calling the function unless either the debug-pass option or
|
||||
limit-float-precision option is set. Without these two options set the
|
||||
`ParseCommandLineOptions` call doesn't do anything beside parsing
|
||||
the command line `clang` which doesn't set any options.
|
||||
|
||||
See also D99652 where LLDB received a workaround for this crash.
|
||||
|
||||
Reviewed By: JDevlieghere
|
||||
|
||||
Differential Revision: https://reviews.llvm.org/D99740
|
||||
|
||||
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/60854c328d8729b2ef10b9bb4dcbcc282f43c5e7]
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
|
||||
---
|
||||
clang/lib/CodeGen/BackendUtil.cpp | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
|
||||
index 41eafd13d97c..00d92e7beadd 100644
|
||||
--- a/clang/lib/CodeGen/BackendUtil.cpp
|
||||
+++ b/clang/lib/CodeGen/BackendUtil.cpp
|
||||
@@ -871,7 +871,15 @@ static void setCommandLineOpts(const CodeGenOptions &CodeGenOpts) {
|
||||
BackendArgs.push_back("-limit-float-precision");
|
||||
BackendArgs.push_back(CodeGenOpts.LimitFloatPrecision.c_str());
|
||||
}
|
||||
+ // Check for the default "clang" invocation that won't set any cl::opt values.
|
||||
+ // Skip trying to parse the command line invocation to avoid the issues
|
||||
+ // described below.
|
||||
+ if (BackendArgs.size() == 1)
|
||||
+ return;
|
||||
BackendArgs.push_back(nullptr);
|
||||
+ // FIXME: The command line parser below is not thread-safe and shares a global
|
||||
+ // state, so this call might crash or overwrite the options of another Clang
|
||||
+ // instance in the same process.
|
||||
llvm::cl::ParseCommandLineOptions(BackendArgs.size() - 1,
|
||||
BackendArgs.data());
|
||||
}
|
||||
--
|
||||
2.29.2
|
||||
|
|
@ -1,561 +0,0 @@
|
|||
From 85505bdb386a426310c1fb0a845780beeeec4353 Mon Sep 17 00:00:00 2001
|
||||
From: haonanya <haonan.yang@intel.com>
|
||||
Date: Wed, 9 Feb 2022 09:16:35 +0800
|
||||
Subject: [PATCH] Support cl_ext_float_atomics
|
||||
|
||||
This backports https://reviews.llvm.org/D106343 and https://reviews.llvm.org/D109740
|
||||
|
||||
Signed-off-by: haonanya <haonan.yang@intel.com>
|
||||
|
||||
Upstream-Status: Backport
|
||||
|
||||
https://github.com/llvm/llvm-project/commit/d353d1c50112a1cb315eccdab18ce7bd1563cd06
|
||||
https://github.com/llvm/llvm-project/commit/544d89e847d42ce8856296752b0fb279aa89aace
|
||||
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
|
||||
---
|
||||
clang/lib/Headers/opencl-c-base.h | 19 ++
|
||||
clang/lib/Headers/opencl-c.h | 378 ++++++++++++++++++++++++++
|
||||
clang/lib/Sema/Sema.cpp | 3 +
|
||||
clang/test/Headers/opencl-c-header.cl | 84 ++++++
|
||||
4 files changed, 484 insertions(+)
|
||||
|
||||
diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h
|
||||
index e8dcd70377e5..d94d64372dbb 100644
|
||||
--- a/clang/lib/Headers/opencl-c-base.h
|
||||
+++ b/clang/lib/Headers/opencl-c-base.h
|
||||
@@ -21,6 +21,25 @@
|
||||
#define cl_khr_subgroup_shuffle 1
|
||||
#define cl_khr_subgroup_shuffle_relative 1
|
||||
#define cl_khr_subgroup_clustered_reduce 1
|
||||
+#define cl_ext_float_atomics
|
||||
+#ifdef cl_khr_fp16
|
||||
+#define __opencl_c_ext_fp16_global_atomic_load_store 1
|
||||
+#define __opencl_c_ext_fp16_local_atomic_load_store 1
|
||||
+#define __opencl_c_ext_fp16_global_atomic_add 1
|
||||
+#define __opencl_c_ext_fp16_local_atomic_add 1
|
||||
+#define __opencl_c_ext_fp16_global_atomic_min_max 1
|
||||
+#define __opencl_c_ext_fp16_local_atomic_min_max 1
|
||||
+#endif
|
||||
+#ifdef cl_khr_fp64
|
||||
+#define __opencl_c_ext_fp64_global_atomic_add 1
|
||||
+#define __opencl_c_ext_fp64_local_atomic_add 1
|
||||
+#define __opencl_c_ext_fp64_global_atomic_min_max 1
|
||||
+#define __opencl_c_ext_fp64_local_atomic_min_max 1
|
||||
+#endif
|
||||
+#define __opencl_c_ext_fp32_global_atomic_add 1
|
||||
+#define __opencl_c_ext_fp32_local_atomic_add 1
|
||||
+#define __opencl_c_ext_fp32_global_atomic_min_max 1
|
||||
+#define __opencl_c_ext_fp32_local_atomic_min_max 1
|
||||
#endif // defined(__SPIR__)
|
||||
#endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
|
||||
|
||||
diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
|
||||
index ab665628c8e1..584db7e81e04 100644
|
||||
--- a/clang/lib/Headers/opencl-c.h
|
||||
+++ b/clang/lib/Headers/opencl-c.h
|
||||
@@ -13531,6 +13531,384 @@ intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uint
|
||||
intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope);
|
||||
#endif
|
||||
|
||||
+// The functionality added by cl_ext_float_atomics extension
|
||||
+#if defined(cl_ext_float_atomics)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp16_global_atomic_load_store)
|
||||
+void __ovld atomic_store(volatile __global atomic_half *object, half operand);
|
||||
+void __ovld atomic_store_explicit(volatile __global atomic_half *object,
|
||||
+ half operand, memory_order order);
|
||||
+void __ovld atomic_store_explicit(volatile __global atomic_half *object,
|
||||
+ half operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+half __ovld atomic_load(volatile __global atomic_half *object);
|
||||
+half __ovld atomic_load_explicit(volatile __global atomic_half *object,
|
||||
+ memory_order order);
|
||||
+half __ovld atomic_load_explicit(volatile __global atomic_half *object,
|
||||
+ memory_order order, memory_scope scope);
|
||||
+half __ovld atomic_exchange(volatile __global atomic_half *object,
|
||||
+ half operand);
|
||||
+half __ovld atomic_exchange_explicit(volatile __global atomic_half *object,
|
||||
+ half operand, memory_order order);
|
||||
+half __ovld atomic_exchange_explicit(volatile __global atomic_half *object,
|
||||
+ half operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp16_global_atomic_load_store)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp16_local_atomic_load_store)
|
||||
+void __ovld atomic_store(volatile __local atomic_half *object, half operand);
|
||||
+void __ovld atomic_store_explicit(volatile __local atomic_half *object,
|
||||
+ half operand, memory_order order);
|
||||
+void __ovld atomic_store_explicit(volatile __local atomic_half *object,
|
||||
+ half operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+half __ovld atomic_load(volatile __local atomic_half *object);
|
||||
+half __ovld atomic_load_explicit(volatile __local atomic_half *object,
|
||||
+ memory_order order);
|
||||
+half __ovld atomic_load_explicit(volatile __local atomic_half *object,
|
||||
+ memory_order order, memory_scope scope);
|
||||
+half __ovld atomic_exchange(volatile __local atomic_half *object, half operand);
|
||||
+half __ovld atomic_exchange_explicit(volatile __local atomic_half *object,
|
||||
+ half operand, memory_order order);
|
||||
+half __ovld atomic_exchange_explicit(volatile __local atomic_half *object,
|
||||
+ half operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp16_local_atomic_load_store)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp16_global_atomic_load_store) && \
|
||||
+ defined(__opencl_c_ext_fp16_local_atomic_load_store)
|
||||
+void __ovld atomic_store(volatile atomic_half *object, half operand);
|
||||
+void __ovld atomic_store_explicit(volatile atomic_half *object, half operand,
|
||||
+ memory_order order);
|
||||
+void __ovld atomic_store_explicit(volatile atomic_half *object, half operand,
|
||||
+ memory_order order, memory_scope scope);
|
||||
+half __ovld atomic_load(volatile atomic_half *object);
|
||||
+half __ovld atomic_load_explicit(volatile atomic_half *object,
|
||||
+ memory_order order);
|
||||
+half __ovld atomic_load_explicit(volatile atomic_half *object,
|
||||
+ memory_order order, memory_scope scope);
|
||||
+half __ovld atomic_exchange(volatile atomic_half *object, half operand);
|
||||
+half __ovld atomic_exchange_explicit(volatile atomic_half *object, half operand,
|
||||
+ memory_order order);
|
||||
+half __ovld atomic_exchange_explicit(volatile atomic_half *object, half operand,
|
||||
+ memory_order order, memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp16_global_atomic_load_store) &&
|
||||
+ // defined(__opencl_c_ext_fp16_local_atomic_load_store)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp16_global_atomic_min_max)
|
||||
+half __ovld atomic_fetch_min(volatile __global atomic_half *object,
|
||||
+ half operand);
|
||||
+half __ovld atomic_fetch_max(volatile __global atomic_half *object,
|
||||
+ half operand);
|
||||
+half __ovld atomic_fetch_min_explicit(volatile __global atomic_half *object,
|
||||
+ half operand, memory_order order);
|
||||
+half __ovld atomic_fetch_max_explicit(volatile __global atomic_half *object,
|
||||
+ half operand, memory_order order);
|
||||
+half __ovld atomic_fetch_min_explicit(volatile __global atomic_half *object,
|
||||
+ half operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+half __ovld atomic_fetch_max_explicit(volatile __global atomic_half *object,
|
||||
+ half operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp16_global_atomic_min_max)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp16_local_atomic_min_max)
|
||||
+half __ovld atomic_fetch_min(volatile __local atomic_half *object,
|
||||
+ half operand);
|
||||
+half __ovld atomic_fetch_max(volatile __local atomic_half *object,
|
||||
+ half operand);
|
||||
+half __ovld atomic_fetch_min_explicit(volatile __local atomic_half *object,
|
||||
+ half operand, memory_order order);
|
||||
+half __ovld atomic_fetch_max_explicit(volatile __local atomic_half *object,
|
||||
+ half operand, memory_order order);
|
||||
+half __ovld atomic_fetch_min_explicit(volatile __local atomic_half *object,
|
||||
+ half operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+half __ovld atomic_fetch_max_explicit(volatile __local atomic_half *object,
|
||||
+ half operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp16_local_atomic_min_max)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp16_global_atomic_min_max) && \
|
||||
+ defined(__opencl_c_ext_fp16_local_atomic_min_max)
|
||||
+half __ovld atomic_fetch_min(volatile atomic_half *object, half operand);
|
||||
+half __ovld atomic_fetch_max(volatile atomic_half *object, half operand);
|
||||
+half __ovld atomic_fetch_min_explicit(volatile atomic_half *object,
|
||||
+ half operand, memory_order order);
|
||||
+half __ovld atomic_fetch_max_explicit(volatile atomic_half *object,
|
||||
+ half operand, memory_order order);
|
||||
+half __ovld atomic_fetch_min_explicit(volatile atomic_half *object,
|
||||
+ half operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+half __ovld atomic_fetch_max_explicit(volatile atomic_half *object,
|
||||
+ half operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp16_global_atomic_min_max) && \
|
||||
+ defined(__opencl_c_ext_fp16_local_atomic_min_max)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp32_global_atomic_min_max)
|
||||
+float __ovld atomic_fetch_min(volatile __global atomic_float *object,
|
||||
+ float operand);
|
||||
+float __ovld atomic_fetch_max(volatile __global atomic_float *object,
|
||||
+ float operand);
|
||||
+float __ovld atomic_fetch_min_explicit(volatile __global atomic_float *object,
|
||||
+ float operand, memory_order order);
|
||||
+float __ovld atomic_fetch_max_explicit(volatile __global atomic_float *object,
|
||||
+ float operand, memory_order order);
|
||||
+float __ovld atomic_fetch_min_explicit(volatile __global atomic_float *object,
|
||||
+ float operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+float __ovld atomic_fetch_max_explicit(volatile __global atomic_float *object,
|
||||
+ float operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp32_local_atomic_min_max)
|
||||
+float __ovld atomic_fetch_min(volatile __local atomic_float *object,
|
||||
+ float operand);
|
||||
+float __ovld atomic_fetch_max(volatile __local atomic_float *object,
|
||||
+ float operand);
|
||||
+float __ovld atomic_fetch_min_explicit(volatile __local atomic_float *object,
|
||||
+ float operand, memory_order order);
|
||||
+float __ovld atomic_fetch_max_explicit(volatile __local atomic_float *object,
|
||||
+ float operand, memory_order order);
|
||||
+float __ovld atomic_fetch_min_explicit(volatile __local atomic_float *object,
|
||||
+ float operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+float __ovld atomic_fetch_max_explicit(volatile __local atomic_float *object,
|
||||
+ float operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp32_local_atomic_min_max)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp32_global_atomic_min_max) && \
|
||||
+ defined(__opencl_c_ext_fp32_local_atomic_min_max)
|
||||
+float __ovld atomic_fetch_min(volatile atomic_float *object, float operand);
|
||||
+float __ovld atomic_fetch_max(volatile atomic_float *object, float operand);
|
||||
+float __ovld atomic_fetch_min_explicit(volatile atomic_float *object,
|
||||
+ float operand, memory_order order);
|
||||
+float __ovld atomic_fetch_max_explicit(volatile atomic_float *object,
|
||||
+ float operand, memory_order order);
|
||||
+float __ovld atomic_fetch_min_explicit(volatile atomic_float *object,
|
||||
+ float operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+float __ovld atomic_fetch_max_explicit(volatile atomic_float *object,
|
||||
+ float operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) && \
|
||||
+ defined(__opencl_c_ext_fp32_local_atomic_min_max)
|
||||
+
|
||||
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
|
||||
+#if defined(__opencl_c_ext_fp64_global_atomic_min_max)
|
||||
+double __ovld atomic_fetch_min(volatile __global atomic_double *object,
|
||||
+ double operand);
|
||||
+double __ovld atomic_fetch_max(volatile __global atomic_double *object,
|
||||
+ double operand);
|
||||
+double __ovld atomic_fetch_min_explicit(volatile __global atomic_double *object,
|
||||
+ double operand, memory_order order);
|
||||
+double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *object,
|
||||
+ double operand, memory_order order);
|
||||
+double __ovld atomic_fetch_min_explicit(volatile __global atomic_double *object,
|
||||
+ double operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *object,
|
||||
+ double operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp64_local_atomic_min_max)
|
||||
+double __ovld atomic_fetch_min(volatile __local atomic_double *object,
|
||||
+ double operand);
|
||||
+double __ovld atomic_fetch_max(volatile __local atomic_double *object,
|
||||
+ double operand);
|
||||
+double __ovld atomic_fetch_min_explicit(volatile __local atomic_double *object,
|
||||
+ double operand, memory_order order);
|
||||
+double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *object,
|
||||
+ double operand, memory_order order);
|
||||
+double __ovld atomic_fetch_min_explicit(volatile __local atomic_double *object,
|
||||
+ double operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *object,
|
||||
+ double operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp64_local_atomic_min_max)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp64_global_atomic_min_max) && \
|
||||
+ defined(__opencl_c_ext_fp64_local_atomic_min_max)
|
||||
+double __ovld atomic_fetch_min(volatile atomic_double *object, double operand);
|
||||
+double __ovld atomic_fetch_max(volatile atomic_double *object, double operand);
|
||||
+double __ovld atomic_fetch_min_explicit(volatile atomic_double *object,
|
||||
+ double operand, memory_order order);
|
||||
+double __ovld atomic_fetch_max_explicit(volatile atomic_double *object,
|
||||
+ double operand, memory_order order);
|
||||
+double __ovld atomic_fetch_min_explicit(volatile atomic_double *object,
|
||||
+ double operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+double __ovld atomic_fetch_max_explicit(volatile atomic_double *object,
|
||||
+ double operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) && \
|
||||
+ defined(__opencl_c_ext_fp64_local_atomic_min_max)
|
||||
+#endif // defined(cl_khr_int64_base_atomics) &&
|
||||
+ // defined(cl_khr_int64_extended_atomics)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp16_global_atomic_add)
|
||||
+half __ovld atomic_fetch_add(volatile __global atomic_half *object,
|
||||
+ half operand);
|
||||
+half __ovld atomic_fetch_sub(volatile __global atomic_half *object,
|
||||
+ half operand);
|
||||
+half __ovld atomic_fetch_add_explicit(volatile __global atomic_half *object,
|
||||
+ half operand, memory_order order);
|
||||
+half __ovld atomic_fetch_sub_explicit(volatile __global atomic_half *object,
|
||||
+ half operand, memory_order order);
|
||||
+half __ovld atomic_fetch_add_explicit(volatile __global atomic_half *object,
|
||||
+ half operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+half __ovld atomic_fetch_sub_explicit(volatile __global atomic_half *object,
|
||||
+ half operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp16_global_atomic_add)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp16_local_atomic_add)
|
||||
+half __ovld atomic_fetch_add(volatile __local atomic_half *object,
|
||||
+ half operand);
|
||||
+half __ovld atomic_fetch_sub(volatile __local atomic_half *object,
|
||||
+ half operand);
|
||||
+half __ovld atomic_fetch_add_explicit(volatile __local atomic_half *object,
|
||||
+ half operand, memory_order order);
|
||||
+half __ovld atomic_fetch_sub_explicit(volatile __local atomic_half *object,
|
||||
+ half operand, memory_order order);
|
||||
+half __ovld atomic_fetch_add_explicit(volatile __local atomic_half *object,
|
||||
+ half operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+half __ovld atomic_fetch_sub_explicit(volatile __local atomic_half *object,
|
||||
+ half operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp16_local_atomic_add)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp16_global_atomic_add) && \
|
||||
+ defined(__opencl_c_ext_fp16_local_atomic_add)
|
||||
+half __ovld atomic_fetch_add(volatile atomic_half *object, half operand);
|
||||
+half __ovld atomic_fetch_sub(volatile atomic_half *object, half operand);
|
||||
+half __ovld atomic_fetch_add_explicit(volatile atomic_half *object,
|
||||
+ half operand, memory_order order);
|
||||
+half __ovld atomic_fetch_sub_explicit(volatile atomic_half *object,
|
||||
+ half operand, memory_order order);
|
||||
+half __ovld atomic_fetch_add_explicit(volatile atomic_half *object,
|
||||
+ half operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+half __ovld atomic_fetch_sub_explicit(volatile atomic_half *object,
|
||||
+ half operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp16_global_atomic_add) && \
|
||||
+ defined(__opencl_c_ext_fp16_local_atomic_add)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp32_global_atomic_add)
|
||||
+float __ovld atomic_fetch_add(volatile __global atomic_float *object,
|
||||
+ float operand);
|
||||
+float __ovld atomic_fetch_sub(volatile __global atomic_float *object,
|
||||
+ float operand);
|
||||
+float __ovld atomic_fetch_add_explicit(volatile __global atomic_float *object,
|
||||
+ float operand, memory_order order);
|
||||
+float __ovld atomic_fetch_sub_explicit(volatile __global atomic_float *object,
|
||||
+ float operand, memory_order order);
|
||||
+float __ovld atomic_fetch_add_explicit(volatile __global atomic_float *object,
|
||||
+ float operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+float __ovld atomic_fetch_sub_explicit(volatile __global atomic_float *object,
|
||||
+ float operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp32_global_atomic_add)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp32_local_atomic_add)
|
||||
+float __ovld atomic_fetch_add(volatile __local atomic_float *object,
|
||||
+ float operand);
|
||||
+float __ovld atomic_fetch_sub(volatile __local atomic_float *object,
|
||||
+ float operand);
|
||||
+float __ovld atomic_fetch_add_explicit(volatile __local atomic_float *object,
|
||||
+ float operand, memory_order order);
|
||||
+float __ovld atomic_fetch_sub_explicit(volatile __local atomic_float *object,
|
||||
+ float operand, memory_order order);
|
||||
+float __ovld atomic_fetch_add_explicit(volatile __local atomic_float *object,
|
||||
+ float operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+float __ovld atomic_fetch_sub_explicit(volatile __local atomic_float *object,
|
||||
+ float operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp32_local_atomic_add)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp32_global_atomic_add) && \
|
||||
+ defined(__opencl_c_ext_fp32_local_atomic_add)
|
||||
+float __ovld atomic_fetch_add(volatile atomic_float *object, float operand);
|
||||
+float __ovld atomic_fetch_sub(volatile atomic_float *object, float operand);
|
||||
+float __ovld atomic_fetch_add_explicit(volatile atomic_float *object,
|
||||
+ float operand, memory_order order);
|
||||
+float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object,
|
||||
+ float operand, memory_order order);
|
||||
+float __ovld atomic_fetch_add_explicit(volatile atomic_float *object,
|
||||
+ float operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object,
|
||||
+ float operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp32_global_atomic_add) && \
|
||||
+ defined(__opencl_c_ext_fp32_local_atomic_add)
|
||||
+
|
||||
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
|
||||
+#if defined(__opencl_c_ext_fp64_global_atomic_add)
|
||||
+double __ovld atomic_fetch_add(volatile __global atomic_double *object,
|
||||
+ double operand);
|
||||
+double __ovld atomic_fetch_sub(volatile __global atomic_double *object,
|
||||
+ double operand);
|
||||
+double __ovld atomic_fetch_add_explicit(volatile __global atomic_double *object,
|
||||
+ double operand, memory_order order);
|
||||
+double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *object,
|
||||
+ double operand, memory_order order);
|
||||
+double __ovld atomic_fetch_add_explicit(volatile __global atomic_double *object,
|
||||
+ double operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *object,
|
||||
+ double operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp64_global_atomic_add)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp64_local_atomic_add)
|
||||
+double __ovld atomic_fetch_add(volatile __local atomic_double *object,
|
||||
+ double operand);
|
||||
+double __ovld atomic_fetch_sub(volatile __local atomic_double *object,
|
||||
+ double operand);
|
||||
+double __ovld atomic_fetch_add_explicit(volatile __local atomic_double *object,
|
||||
+ double operand, memory_order order);
|
||||
+double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *object,
|
||||
+ double operand, memory_order order);
|
||||
+double __ovld atomic_fetch_add_explicit(volatile __local atomic_double *object,
|
||||
+ double operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *object,
|
||||
+ double operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp64_local_atomic_add)
|
||||
+
|
||||
+#if defined(__opencl_c_ext_fp64_global_atomic_add) && \
|
||||
+ defined(__opencl_c_ext_fp64_local_atomic_add)
|
||||
+double __ovld atomic_fetch_add(volatile atomic_double *object, double operand);
|
||||
+double __ovld atomic_fetch_sub(volatile atomic_double *object, double operand);
|
||||
+double __ovld atomic_fetch_add_explicit(volatile atomic_double *object,
|
||||
+ double operand, memory_order order);
|
||||
+double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object,
|
||||
+ double operand, memory_order order);
|
||||
+double __ovld atomic_fetch_add_explicit(volatile atomic_double *object,
|
||||
+ double operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object,
|
||||
+ double operand, memory_order order,
|
||||
+ memory_scope scope);
|
||||
+#endif // defined(__opencl_c_ext_fp64_global_atomic_add) && \
|
||||
+ defined(__opencl_c_ext_fp64_local_atomic_add)
|
||||
+#endif // defined(cl_khr_int64_base_atomics) &&
|
||||
+ // defined(cl_khr_int64_extended_atomics)
|
||||
+
|
||||
+#endif // cl_ext_float_atomics
|
||||
+
|
||||
// atomic_store()
|
||||
|
||||
void __ovld atomic_store(volatile atomic_int *object, int desired);
|
||||
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
|
||||
index 450f9c020f7f..a91291c7af38 100644
|
||||
--- a/clang/lib/Sema/Sema.cpp
|
||||
+++ b/clang/lib/Sema/Sema.cpp
|
||||
@@ -313,6 +313,9 @@ void Sema::Initialize() {
|
||||
addImplicitTypedef("atomic_long", AtomicLongT);
|
||||
auto AtomicULongT = Context.getAtomicType(Context.UnsignedLongTy);
|
||||
addImplicitTypedef("atomic_ulong", AtomicULongT);
|
||||
+ auto AtomicHalfT = Context.getAtomicType(Context.HalfTy);
|
||||
+ addImplicitTypedef("atomic_half", AtomicHalfT);
|
||||
+ setOpenCLExtensionForType(AtomicHalfT, "cl_khr_fp16");
|
||||
addImplicitTypedef("atomic_float",
|
||||
Context.getAtomicType(Context.FloatTy));
|
||||
auto AtomicDoubleT = Context.getAtomicType(Context.DoubleTy);
|
||||
diff --git a/clang/test/Headers/opencl-c-header.cl b/clang/test/Headers/opencl-c-header.cl
|
||||
index 13a3b62481ec..443f682c711a 100644
|
||||
--- a/clang/test/Headers/opencl-c-header.cl
|
||||
+++ b/clang/test/Headers/opencl-c-header.cl
|
||||
@@ -124,6 +124,48 @@ global atomic_int z = ATOMIC_VAR_INIT(99);
|
||||
#if cl_khr_subgroup_clustered_reduce != 1
|
||||
#error "Incorrectly defined cl_khr_subgroup_clustered_reduce"
|
||||
#endif
|
||||
+#if __opencl_c_ext_fp16_global_atomic_load_store != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_load_store"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp16_local_atomic_load_store != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_load_store"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp16_global_atomic_add != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_add"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp32_global_atomic_add != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp32_global_atomic_add"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp64_global_atomic_add != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp64_global_atomic_add"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp16_local_atomic_add != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_add"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp32_local_atomic_add != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp32_local_atomic_add"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp64_local_atomic_add != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp64_local_atomic_add"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp16_global_atomic_min_max != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_min_max"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp32_global_atomic_min_max != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp32_global_atomic_min_max"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp64_global_atomic_min_max != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp64_global_atomic_min_max"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp16_local_atomic_min_max != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_min_max"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp32_local_atomic_min_max != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp32_local_atomic_min_max"
|
||||
+#endif
|
||||
+#if __opencl_c_ext_fp64_local_atomic_min_max != 1
|
||||
+#error "Incorrectly defined __opencl_c_ext_fp64_local_atomic_min_max"
|
||||
+#endif
|
||||
|
||||
#else
|
||||
|
||||
@@ -148,6 +190,48 @@ global atomic_int z = ATOMIC_VAR_INIT(99);
|
||||
#ifdef cl_khr_subgroup_clustered_reduce
|
||||
#error "Incorrect cl_khr_subgroup_clustered_reduce define"
|
||||
#endif
|
||||
+#ifdef __opencl_c_ext_fp16_global_atomic_load_store
|
||||
+#error "Incorrectly __opencl_c_ext_fp16_global_atomic_load_store defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp16_local_atomic_load_store
|
||||
+#error "Incorrectly __opencl_c_ext_fp16_local_atomic_load_store defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp16_global_atomic_add
|
||||
+#error "Incorrectly __opencl_c_ext_fp16_global_atomic_add defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp32_global_atomic_add
|
||||
+#error "Incorrectly __opencl_c_ext_fp32_global_atomic_add defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp64_global_atomic_add
|
||||
+#error "Incorrectly __opencl_c_ext_fp64_global_atomic_add defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp16_local_atomic_add
|
||||
+#error "Incorrectly __opencl_c_ext_fp16_local_atomic_add defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp32_local_atomic_add
|
||||
+#error "Incorrectly __opencl_c_ext_fp32_local_atomic_add defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp64_local_atomic_add
|
||||
+#error "Incorrectly __opencl_c_ext_fp64_local_atomic_add defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp16_global_atomic_min_max
|
||||
+#error "Incorrectly __opencl_c_ext_fp16_global_atomic_min_max defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp32_global_atomic_min_max
|
||||
+#error "Incorrectly __opencl_c_ext_fp32_global_atomic_min_max defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp64_global_atomic_min_max
|
||||
+#error "Incorrectly __opencl_c_ext_fp64_global_atomic_min_max defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp16_local_atomic_min_max
|
||||
+#error "Incorrectly __opencl_c_ext_fp16_local_atomic_min_max defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp32_local_atomic_min_max
|
||||
+#error "Incorrectly __opencl_c_ext_fp32_local_atomic_min_max defined"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_ext_fp64_local_atomic_min_max
|
||||
+#error "Incorrectly __opencl_c_ext_fp64_local_atomic_min_max defined"
|
||||
+#endif
|
||||
|
||||
#endif //(defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
|
||||
|
||||
--
|
||||
2.29.2
|
||||
|
|
@ -1,67 +0,0 @@
|
|||
From 0c4ba4947d1630f2e13fc260399f0892b2c9b323 Mon Sep 17 00:00:00 2001
|
||||
From: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
Date: Fri, 27 Aug 2021 10:55:13 +0800
|
||||
Subject: [PATCH 1/2] This patch is needed for ISPC for Gen only
|
||||
|
||||
1. Transformation of add to or is not safe for VC backend.
|
||||
2. bswap intrinsics is not supported in VC backend yet.
|
||||
|
||||
Upstream-Status: Backport [Taken from ispc, https://github.com/ispc/ispc/blob/v1.16.1/llvm_patches/12_0_disable-A-B-A-B-and-BSWAP-in-InstCombine.patch]
|
||||
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | 10 +++++++---
|
||||
.../lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 9 ++++++---
|
||||
2 files changed, 13 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
|
||||
index bacb8689892a..f3d0120db256 100644
|
||||
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
|
||||
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
|
||||
@@ -15,6 +15,7 @@
|
||||
#include "llvm/ADT/APInt.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
+#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/Analysis/InstructionSimplify.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/IR/Constant.h"
|
||||
@@ -1363,9 +1364,12 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
|
||||
}
|
||||
}
|
||||
|
||||
- // A+B --> A|B iff A and B have no bits set in common.
|
||||
- if (haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT))
|
||||
- return BinaryOperator::CreateOr(LHS, RHS);
|
||||
+ // Disable this transformation for ISPC SPIR-V
|
||||
+ if (!Triple(I.getModule()->getTargetTriple()).isSPIR()) {
|
||||
+ // A+B --> A|B iff A and B have no bits set in common.
|
||||
+ if (haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT))
|
||||
+ return BinaryOperator::CreateOr(LHS, RHS);
|
||||
+ }
|
||||
|
||||
// add (select X 0 (sub n A)) A --> select X A n
|
||||
{
|
||||
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
|
||||
index 68c4156af2c4..b145b863ca84 100644
|
||||
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
|
||||
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
|
||||
@@ -2584,9 +2584,12 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
|
||||
if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I))
|
||||
return FoldedLogic;
|
||||
|
||||
- if (Instruction *BSwap = matchBSwapOrBitReverse(I, /*MatchBSwaps*/ true,
|
||||
- /*MatchBitReversals*/ false))
|
||||
- return BSwap;
|
||||
+ // Disable this transformation for ISPC SPIR-V
|
||||
+ if (!Triple(I.getModule()->getTargetTriple()).isSPIR()) {
|
||||
+ if (Instruction *BSwap = matchBSwapOrBitReverse(I, /*MatchBSwaps*/ true,
|
||||
+ /*MatchBitReversals*/ false))
|
||||
+ return BSwap;
|
||||
+ }
|
||||
|
||||
if (Instruction *Funnel = matchFunnelShift(I, *this))
|
||||
return Funnel;
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -1,35 +0,0 @@
|
|||
From 913e07ea5acf2148e3748b45ddfe3fac3b2d051c Mon Sep 17 00:00:00 2001
|
||||
From: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
Date: Fri, 27 Aug 2021 10:56:57 +0800
|
||||
Subject: [PATCH 2/2] This patch is a fix for #2111
|
||||
|
||||
It ensures that shuffle is lowered for this particular case correctly.
|
||||
|
||||
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/9ab99f773fec7da4183495a3fdc655a797d3bea2]
|
||||
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
llvm/lib/Target/X86/X86ISelLowering.cpp | 7 ++++---
|
||||
1 file changed, 4 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
index 6b816c710f98..3121b0e818ac 100644
|
||||
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
@@ -43192,9 +43192,10 @@ static SDValue combineHorizOpWithShuffle(SDNode *N, SelectionDAG &DAG,
|
||||
ShuffleVectorSDNode::commuteMask(ShuffleMask1);
|
||||
}
|
||||
if ((Op00 == Op10) && (Op01 == Op11)) {
|
||||
- SmallVector<int, 4> ShuffleMask;
|
||||
- ShuffleMask.append(ShuffleMask0.begin(), ShuffleMask0.end());
|
||||
- ShuffleMask.append(ShuffleMask1.begin(), ShuffleMask1.end());
|
||||
+ const int Map[4] = {0, 2, 1, 3};
|
||||
+ SmallVector<int, 4> ShuffleMask(
|
||||
+ {Map[ShuffleMask0[0]], Map[ShuffleMask1[0]], Map[ShuffleMask0[1]],
|
||||
+ Map[ShuffleMask1[1]]});
|
||||
SDLoc DL(N);
|
||||
MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64;
|
||||
SDValue Res = DAG.getNode(Opcode, DL, VT, Op00, Op01);
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -1,146 +0,0 @@
|
|||
From 54802abece1e4742050795162e3a773f0e143aa3 Mon Sep 17 00:00:00 2001
|
||||
From: haonanya <haonan.yang@intel.com>
|
||||
Date: Fri, 24 Dec 2021 15:27:46 +0800
|
||||
Subject: [PATCH] [OpenCL] Add cl_khr_integer_dot_product
|
||||
|
||||
This is backporting of https://reviews.llvm.org/D106434
|
||||
Add the builtins defined by Section 42 "Integer dot product" in the OpenCL
|
||||
Extension Specification.
|
||||
|
||||
See https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_integer_dot_product
|
||||
|
||||
Signed-off-by: haonanya <haonan.yang@intel.com>
|
||||
|
||||
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/989bedec7a6ae95a0db865f23677047f78dc9257]
|
||||
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
|
||||
---
|
||||
clang/lib/Headers/opencl-c-base.h | 3 +++
|
||||
clang/lib/Headers/opencl-c.h | 24 ++++++++++++++++++++++++
|
||||
clang/lib/Sema/OpenCLBuiltins.td | 25 +++++++++++++++++++++++++
|
||||
clang/test/Headers/opencl-c-header.cl | 18 ++++++++++++++++++
|
||||
4 files changed, 70 insertions(+)
|
||||
|
||||
diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h
|
||||
index d94d64372dbb..b55d9601a452 100644
|
||||
--- a/clang/lib/Headers/opencl-c-base.h
|
||||
+++ b/clang/lib/Headers/opencl-c-base.h
|
||||
@@ -40,6 +40,9 @@
|
||||
#define __opencl_c_ext_fp32_local_atomic_add 1
|
||||
#define __opencl_c_ext_fp32_global_atomic_min_max 1
|
||||
#define __opencl_c_ext_fp32_local_atomic_min_max 1
|
||||
+#define cl_khr_integer_dot_product 1
|
||||
+#define __opencl_c_integer_dot_product_input_4x8bit 1
|
||||
+#define __opencl_c_integer_dot_product_input_4x8bit_packed 1
|
||||
#endif // defined(__SPIR__)
|
||||
#endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
|
||||
|
||||
diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
|
||||
index 501a04f6e82b..50dcc03955cb 100644
|
||||
--- a/clang/lib/Headers/opencl-c.h
|
||||
+++ b/clang/lib/Headers/opencl-c.h
|
||||
@@ -16362,6 +16362,30 @@ double __ovld sub_group_clustered_reduce_max( double value, uint clustersize );
|
||||
|
||||
#endif // cl_khr_subgroup_clustered_reduce
|
||||
|
||||
+#if defined(__opencl_c_integer_dot_product_input_4x8bit)
|
||||
+uint __ovld __cnfn dot(uchar4, uchar4);
|
||||
+int __ovld __cnfn dot(char4, char4);
|
||||
+int __ovld __cnfn dot(uchar4, char4);
|
||||
+int __ovld __cnfn dot(char4, uchar4);
|
||||
+
|
||||
+uint __ovld __cnfn dot_acc_sat(uchar4, uchar4, uint);
|
||||
+int __ovld __cnfn dot_acc_sat(char4, char4, int);
|
||||
+int __ovld __cnfn dot_acc_sat(uchar4, char4, int);
|
||||
+int __ovld __cnfn dot_acc_sat(char4, uchar4, int);
|
||||
+#endif // __opencl_c_integer_dot_product_input_4x8bit
|
||||
+
|
||||
+#if defined(__opencl_c_integer_dot_product_input_4x8bit_packed)
|
||||
+uint __ovld __cnfn dot_4x8packed_uu_uint(uint, uint);
|
||||
+int __ovld __cnfn dot_4x8packed_ss_int(uint, uint);
|
||||
+int __ovld __cnfn dot_4x8packed_us_int(uint, uint);
|
||||
+int __ovld __cnfn dot_4x8packed_su_int(uint, uint);
|
||||
+
|
||||
+uint __ovld __cnfn dot_acc_sat_4x8packed_uu_uint(uint, uint, uint);
|
||||
+int __ovld __cnfn dot_acc_sat_4x8packed_ss_int(uint, uint, int);
|
||||
+int __ovld __cnfn dot_acc_sat_4x8packed_us_int(uint, uint, int);
|
||||
+int __ovld __cnfn dot_acc_sat_4x8packed_su_int(uint, uint, int);
|
||||
+#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
|
||||
+
|
||||
#if defined(cl_intel_subgroups)
|
||||
// Intel-Specific Sub Group Functions
|
||||
float __ovld __conv intel_sub_group_shuffle( float x, uint c );
|
||||
diff --git a/clang/lib/Sema/OpenCLBuiltins.td b/clang/lib/Sema/OpenCLBuiltins.td
|
||||
index 745363a6b43f..3c5f8821063d 100644
|
||||
--- a/clang/lib/Sema/OpenCLBuiltins.td
|
||||
+++ b/clang/lib/Sema/OpenCLBuiltins.td
|
||||
@@ -1482,6 +1482,31 @@ let Extension = FuncExtKhrSubgroups in {
|
||||
}
|
||||
}
|
||||
|
||||
+// Section 42.3 - cl_khr_integer_dot_product
|
||||
+let Extension = FunctionExtension<"__opencl_c_integer_dot_product_input_4x8bit"> in {
|
||||
+ def : Builtin<"dot", [UInt, VectorType<UChar, 4>, VectorType<UChar, 4>], Attr.Const>;
|
||||
+ def : Builtin<"dot", [Int, VectorType<Char, 4>, VectorType<Char, 4>], Attr.Const>;
|
||||
+ def : Builtin<"dot", [Int, VectorType<UChar, 4>, VectorType<Char, 4>], Attr.Const>;
|
||||
+ def : Builtin<"dot", [Int, VectorType<Char, 4>, VectorType<UChar, 4>], Attr.Const>;
|
||||
+
|
||||
+ def : Builtin<"dot_acc_sat", [UInt, VectorType<UChar, 4>, VectorType<UChar, 4>, UInt], Attr.Const>;
|
||||
+ def : Builtin<"dot_acc_sat", [Int, VectorType<Char, 4>, VectorType<Char, 4>, Int], Attr.Const>;
|
||||
+ def : Builtin<"dot_acc_sat", [Int, VectorType<UChar, 4>, VectorType<Char, 4>, Int], Attr.Const>;
|
||||
+ def : Builtin<"dot_acc_sat", [Int, VectorType<Char, 4>, VectorType<UChar, 4>, Int], Attr.Const>;
|
||||
+}
|
||||
+
|
||||
+let Extension = FunctionExtension<"__opencl_c_integer_dot_product_input_4x8bit_packed"> in {
|
||||
+ def : Builtin<"dot_4x8packed_uu_uint", [UInt, UInt, UInt], Attr.Const>;
|
||||
+ def : Builtin<"dot_4x8packed_ss_int", [Int, UInt, UInt], Attr.Const>;
|
||||
+ def : Builtin<"dot_4x8packed_us_int", [Int, UInt, UInt], Attr.Const>;
|
||||
+ def : Builtin<"dot_4x8packed_su_int", [Int, UInt, UInt], Attr.Const>;
|
||||
+
|
||||
+ def : Builtin<"dot_acc_sat_4x8packed_uu_uint", [UInt, UInt, UInt, UInt], Attr.Const>;
|
||||
+ def : Builtin<"dot_acc_sat_4x8packed_ss_int", [Int, UInt, UInt, Int], Attr.Const>;
|
||||
+ def : Builtin<"dot_acc_sat_4x8packed_us_int", [Int, UInt, UInt, Int], Attr.Const>;
|
||||
+ def : Builtin<"dot_acc_sat_4x8packed_su_int", [Int, UInt, UInt, Int], Attr.Const>;
|
||||
+}
|
||||
+
|
||||
//--------------------------------------------------------------------
|
||||
// Arm extensions.
|
||||
let Extension = ArmIntegerDotProductInt8 in {
|
||||
diff --git a/clang/test/Headers/opencl-c-header.cl b/clang/test/Headers/opencl-c-header.cl
|
||||
index 443f682c711a..184eefd9f9c3 100644
|
||||
--- a/clang/test/Headers/opencl-c-header.cl
|
||||
+++ b/clang/test/Headers/opencl-c-header.cl
|
||||
@@ -166,6 +166,15 @@ global atomic_int z = ATOMIC_VAR_INIT(99);
|
||||
#if __opencl_c_ext_fp64_local_atomic_min_max != 1
|
||||
#error "Incorrectly defined __opencl_c_ext_fp64_local_atomic_min_max"
|
||||
#endif
|
||||
+#if cl_khr_integer_dot_product != 1
|
||||
+#error "Incorrectly defined cl_khr_integer_dot_product"
|
||||
+#endif
|
||||
+#if __opencl_c_integer_dot_product_input_4x8bit != 1
|
||||
+#error "Incorrectly defined __opencl_c_integer_dot_product_input_4x8bit"
|
||||
+#endif
|
||||
+#if __opencl_c_integer_dot_product_input_4x8bit_packed != 1
|
||||
+#error "Incorrectly defined __opencl_c_integer_dot_product_input_4x8bit_packed"
|
||||
+#endif
|
||||
|
||||
#else
|
||||
|
||||
@@ -232,6 +241,15 @@ global atomic_int z = ATOMIC_VAR_INIT(99);
|
||||
#ifdef __opencl_c_ext_fp64_local_atomic_min_max
|
||||
#error "Incorrectly __opencl_c_ext_fp64_local_atomic_min_max defined"
|
||||
#endif
|
||||
+#ifdef cl_khr_integer_dot_product
|
||||
+#error "Incorrect cl_khr_integer_dot_product define"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_integer_dot_product_input_4x8bit
|
||||
+#error "Incorrect __opencl_c_integer_dot_product_input_4x8bit define"
|
||||
+#endif
|
||||
+#ifdef __opencl_c_integer_dot_product_input_4x8bit_packed
|
||||
+#error "Incorrect __opencl_c_integer_dot_product_input_4x8bit_packed define"
|
||||
+#endif
|
||||
|
||||
#endif //(defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
|
||||
|
||||
--
|
||||
2.17.1
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -1,13 +0,0 @@
|
|||
FILESEXTRAPATHS:prepend:intel-x86-common := "${THISDIR}/files:"
|
||||
|
||||
SRC_URI_LLVM12_PATCHES = " \
|
||||
file://llvm12-0001-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch \
|
||||
file://llvm12-0002-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch \
|
||||
file://llvm12-0003-Support-cl_ext_float_atomics.patch \
|
||||
file://llvm12-0004-ispc-12_0_disable-A-B-A-B-and-BSWAP-in-InstCombine.patch \
|
||||
file://llvm12-0005-ispc-12_0_fix_for_2111.patch \
|
||||
file://llvm12-0006-OpenCL-Add-cl_khr_integer_dot_product.patch \
|
||||
file://llvm12-0007-OpenCL-3.0-support.patch \
|
||||
"
|
||||
|
||||
SRC_URI:append:intel-x86-common = "${@bb.utils.contains('LLVMVERSION', '12.0.0', ' ${SRC_URI_LLVM12_PATCHES} ', '', d)}"
|
|
@ -1,12 +0,0 @@
|
|||
require opencl-clang.inc
|
||||
|
||||
SRCREV = "8fc6b059248dc6c9c40c7cbe5fedcc6ebb951983"
|
||||
|
||||
DEPENDS += " spirv-llvm-translator"
|
||||
|
||||
BRANCH = "ocl-open-120"
|
||||
|
||||
EXTRA_OECMAKE += "\
|
||||
-DCMAKE_SKIP_RPATH=TRUE \
|
||||
-DPREFERRED_LLVM_VERSION="12.0.0" \
|
||||
"
|
Loading…
Reference in New Issue
Block a user