mirror of
git://git.yoctoproject.org/meta-intel.git
synced 2025-07-19 12:59:03 +02:00
llvm/10.0.0: apply ispc recommended patches
ISPC recommends building LLVM 10 with some additional patches to work around some bugs in this version. Add those patches to our build as well. https://github.com/ispc/ispc/tree/v1.16.1/llvm_patches Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
This commit is contained in:
parent
e69b38c4b7
commit
23d702e38f
|
@ -0,0 +1,96 @@
|
|||
From 294ca2fd69a077b35acec9d498120d6cb0324dae Mon Sep 17 00:00:00 2001
|
||||
From: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
Date: Fri, 27 Aug 2021 11:53:27 +0800
|
||||
Subject: [PATCH 1/2] This patch is required to fix the crash referenced to in
|
||||
#1767
|
||||
|
||||
It is a port of the following llvm 11.0 commit : https://reviews.llvm.org/D76994.
|
||||
|
||||
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/41f13f1f64d2074ae7512fb23656c22585e912bd]
|
||||
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
.../CodeGen/SelectionDAG/LegalizeTypes.cpp | 3 +-
|
||||
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 31 ++++++++++++-------
|
||||
2 files changed, 21 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
|
||||
index 63ddb59fce68..822da2183269 100644
|
||||
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
|
||||
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
|
||||
@@ -173,7 +173,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
|
||||
}
|
||||
}
|
||||
}
|
||||
-
|
||||
+#ifndef NDEBUG
|
||||
// Checked that NewNodes are only used by other NewNodes.
|
||||
for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) {
|
||||
SDNode *N = NewNodes[i];
|
||||
@@ -181,6 +181,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
|
||||
UI != UE; ++UI)
|
||||
assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!");
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
/// This is the main entry point for the type legalizer. This does a top-down
|
||||
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
|
||||
index faae14444d51..b908c5c58e9f 100644
|
||||
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
|
||||
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
|
||||
@@ -155,7 +155,9 @@ private:
|
||||
const SDValue &getSDValue(TableId &Id) {
|
||||
RemapId(Id);
|
||||
assert(Id && "TableId should be non-zero");
|
||||
- return IdToValueMap[Id];
|
||||
+ auto I = IdToValueMap.find(Id);
|
||||
+ assert(I != IdToValueMap.end() && "cannot find Id in map");
|
||||
+ return I->second;
|
||||
}
|
||||
|
||||
public:
|
||||
@@ -172,24 +174,29 @@ public:
|
||||
bool run();
|
||||
|
||||
void NoteDeletion(SDNode *Old, SDNode *New) {
|
||||
+ assert(Old != New && "node replaced with self");
|
||||
for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) {
|
||||
TableId NewId = getTableId(SDValue(New, i));
|
||||
TableId OldId = getTableId(SDValue(Old, i));
|
||||
|
||||
- if (OldId != NewId)
|
||||
+ if (OldId != NewId) {
|
||||
ReplacedValues[OldId] = NewId;
|
||||
|
||||
- // Delete Node from tables.
|
||||
+ // Delete Node from tables. We cannot do this when OldId == NewId,
|
||||
+ // because NewId can still have table references to it in
|
||||
+ // ReplacedValues.
|
||||
+ IdToValueMap.erase(OldId);
|
||||
+ PromotedIntegers.erase(OldId);
|
||||
+ ExpandedIntegers.erase(OldId);
|
||||
+ SoftenedFloats.erase(OldId);
|
||||
+ PromotedFloats.erase(OldId);
|
||||
+ ExpandedFloats.erase(OldId);
|
||||
+ ScalarizedVectors.erase(OldId);
|
||||
+ SplitVectors.erase(OldId);
|
||||
+ WidenedVectors.erase(OldId);
|
||||
+ }
|
||||
+
|
||||
ValueToIdMap.erase(SDValue(Old, i));
|
||||
- IdToValueMap.erase(OldId);
|
||||
- PromotedIntegers.erase(OldId);
|
||||
- ExpandedIntegers.erase(OldId);
|
||||
- SoftenedFloats.erase(OldId);
|
||||
- PromotedFloats.erase(OldId);
|
||||
- ExpandedFloats.erase(OldId);
|
||||
- ScalarizedVectors.erase(OldId);
|
||||
- SplitVectors.erase(OldId);
|
||||
- WidenedVectors.erase(OldId);
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -0,0 +1,105 @@
|
|||
From d266087e8dba9e8fd4984e1cb85c20376e2c8ea3 Mon Sep 17 00:00:00 2001
|
||||
From: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
Date: Fri, 27 Aug 2021 11:56:01 +0800
|
||||
Subject: [PATCH 2/2] This patch is a fix for #1788.
|
||||
|
||||
It is a port of the following llvm 11.0 commit: https://reviews.llvm.org/D81698
|
||||
This also needed part of another llvm 11.0 commit: https://reviews.llvm.org/D72975
|
||||
|
||||
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/aeb50448019ce1b1002f3781f9647d486320d83c]
|
||||
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
llvm/include/llvm/IR/PatternMatch.h | 22 ++++++++++++---
|
||||
.../InstCombine/InstructionCombining.cpp | 27 +++++++++++++++++--
|
||||
2 files changed, 44 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
|
||||
index 6621fc9f819c..fb7ad93519f6 100644
|
||||
--- a/llvm/include/llvm/IR/PatternMatch.h
|
||||
+++ b/llvm/include/llvm/IR/PatternMatch.h
|
||||
@@ -152,8 +152,10 @@ inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) {
|
||||
|
||||
struct apint_match {
|
||||
const APInt *&Res;
|
||||
+ bool AllowUndef;
|
||||
|
||||
- apint_match(const APInt *&R) : Res(R) {}
|
||||
+ apint_match(const APInt *&Res, bool AllowUndef)
|
||||
+ : Res(Res), AllowUndef(AllowUndef) {}
|
||||
|
||||
template <typename ITy> bool match(ITy *V) {
|
||||
if (auto *CI = dyn_cast<ConstantInt>(V)) {
|
||||
@@ -162,7 +164,8 @@ struct apint_match {
|
||||
}
|
||||
if (V->getType()->isVectorTy())
|
||||
if (const auto *C = dyn_cast<Constant>(V))
|
||||
- if (auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) {
|
||||
+ if (auto *CI = dyn_cast_or_null<ConstantInt>(
|
||||
+ C->getSplatValue(AllowUndef))) {
|
||||
Res = &CI->getValue();
|
||||
return true;
|
||||
}
|
||||
@@ -192,7 +195,20 @@ struct apfloat_match {
|
||||
|
||||
/// Match a ConstantInt or splatted ConstantVector, binding the
|
||||
/// specified pointer to the contained APInt.
|
||||
-inline apint_match m_APInt(const APInt *&Res) { return Res; }
|
||||
+inline apint_match m_APInt(const APInt *&Res) {
|
||||
+ // Forbid undefs by default to maintain previous behavior.
|
||||
+ return apint_match(Res, /* AllowUndef */ false);
|
||||
+}
|
||||
+
|
||||
+/// Match APInt while allowing undefs in splat vector constants.
|
||||
+inline apint_match m_APIntAllowUndef(const APInt *&Res) {
|
||||
+ return apint_match(Res, /* AllowUndef */ true);
|
||||
+}
|
||||
+
|
||||
+/// Match APInt while forbidding undefs in splat vector constants.
|
||||
+inline apint_match m_APIntForbidUndef(const APInt *&Res) {
|
||||
+ return apint_match(Res, /* AllowUndef */ false);
|
||||
+}
|
||||
|
||||
/// Match a ConstantFP or splatted ConstantVector, binding the
|
||||
/// specified pointer to the contained APFloat.
|
||||
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
|
||||
index bf32996d96e2..40a246b9d7a7 100644
|
||||
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
|
||||
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
|
||||
@@ -925,8 +925,31 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
|
||||
if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
|
||||
if (CI->hasOneUse()) {
|
||||
Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
|
||||
- if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
|
||||
- (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
|
||||
+
|
||||
+ // FIXME: This is a hack to avoid infinite looping with min/max patterns.
|
||||
+ // We have to ensure that vector constants that only differ with
|
||||
+ // undef elements are treated as equivalent.
|
||||
+ auto areLooselyEqual = [](Value *A, Value *B) {
|
||||
+ if (A == B)
|
||||
+ return true;
|
||||
+
|
||||
+ // Test for vector constants.
|
||||
+ Constant *ConstA, *ConstB;
|
||||
+ if (!match(A, m_Constant(ConstA)) || !match(B, m_Constant(ConstB)))
|
||||
+ return false;
|
||||
+
|
||||
+ // TODO: Deal with FP constants?
|
||||
+ if (!A->getType()->isIntOrIntVectorTy() || A->getType() != B->getType())
|
||||
+ return false;
|
||||
+
|
||||
+ // Compare for equality including undefs as equal.
|
||||
+ auto *Cmp = ConstantExpr::getCompare(ICmpInst::ICMP_EQ, ConstA, ConstB);
|
||||
+ const APInt *C;
|
||||
+ return match(Cmp, m_APIntAllowUndef(C)) && C->isOneValue();
|
||||
+ };
|
||||
+
|
||||
+ if ((areLooselyEqual(TV, Op0) && areLooselyEqual(FV, Op1)) ||
|
||||
+ (areLooselyEqual(FV, Op0) && areLooselyEqual(TV, Op1)))
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
From 8f83e2b7618da7a98a30839a8f41a6dd82dec468 Mon Sep 17 00:00:00 2001
|
||||
From: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
Date: Fri, 27 Aug 2021 12:00:23 +0800
|
||||
Subject: [PATCH 1/2] This patch is required to fix stability problem #1793
|
||||
|
||||
It's backport of the following llvm 11.0 commit: 120c5f1057dc50229f73bc75bbabf4df6ee50fef
|
||||
|
||||
Upstream-Status: Backport
|
||||
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 ++++--
|
||||
1 file changed, 4 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
|
||||
index 2476fd26f250..2743acc89bca 100644
|
||||
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
|
||||
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
|
||||
@@ -10702,8 +10702,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
+ // zext_vector_inreg(undef) = 0 because the top bits will be zero.
|
||||
if (N0.isUndef())
|
||||
- return DAG.getUNDEF(VT);
|
||||
+ return DAG.getConstant(0, SDLoc(N), VT);
|
||||
|
||||
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
|
||||
return Res;
|
||||
@@ -10718,8 +10719,9 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
+ // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
|
||||
if (N0.isUndef())
|
||||
- return DAG.getUNDEF(VT);
|
||||
+ return DAG.getConstant(0, SDLoc(N), VT);
|
||||
|
||||
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
|
||||
return Res;
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
From 62b05a69b4a185cd0b7535f19742686e19fcaf22 Mon Sep 17 00:00:00 2001
|
||||
From: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
Date: Fri, 27 Aug 2021 12:02:37 +0800
|
||||
Subject: [PATCH 2/2] Fix for #1844, affects avx512skx-i8x64 and
|
||||
avx512skx-i16x32.
|
||||
|
||||
It's a port of 11.0 commit edcfb47ff6d5562e22207f364c65f84302aa346b
|
||||
https://reviews.llvm.org/D76312
|
||||
|
||||
Upstream-Status: Backport
|
||||
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
|
||||
index 2743acc89bca..439a8367dabe 100644
|
||||
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
|
||||
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
|
||||
@@ -10841,7 +10841,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
|
||||
|
||||
// Attempt to pre-truncate BUILD_VECTOR sources.
|
||||
if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
|
||||
- TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
|
||||
+ TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
|
||||
+ // Avoid creating illegal types if running after type legalizer.
|
||||
+ (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
|
||||
SDLoc DL(N);
|
||||
EVT SVT = VT.getScalarType();
|
||||
SmallVector<SDValue, 8> TruncOps;
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
From cc4301f82ca1bde1d438c3708de285b0ab8c72d3 Mon Sep 17 00:00:00 2001
|
||||
From: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
Date: Fri, 27 Aug 2021 12:07:25 +0800
|
||||
Subject: [PATCH 1/2] [X86] createVariablePermute - handle case where recursive
|
||||
createVariablePermute call fails
|
||||
|
||||
Account for the case where a recursive createVariablePermute call with a wider vector type fails.
|
||||
|
||||
Original test case from @craig.topper (Craig Topper)
|
||||
|
||||
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/6bdd63dc28208a597542b0c6bc41093f32417804]
|
||||
|
||||
Signed-off-by: Simon Pilgrim <llvm-dev@redking.me.uk>
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
llvm/lib/Target/X86/X86ISelLowering.cpp | 8 +++++---
|
||||
1 file changed, 5 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
index c8720d9ae3a6..63eb050e9b3a 100644
|
||||
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
@@ -9571,9 +9571,11 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
|
||||
IndicesVT = EVT(VT).changeVectorElementTypeToInteger();
|
||||
IndicesVec = widenSubVector(IndicesVT.getSimpleVT(), IndicesVec, false,
|
||||
Subtarget, DAG, SDLoc(IndicesVec));
|
||||
- return extractSubVector(
|
||||
- createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget), 0,
|
||||
- DAG, DL, SizeInBits);
|
||||
+ SDValue NewSrcVec =
|
||||
+ createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget);
|
||||
+ if (NewSrcVec)
|
||||
+ return extractSubVector(NewSrcVec, 0, DAG, DL, SizeInBits);
|
||||
+ return SDValue();
|
||||
} else if (SrcVec.getValueSizeInBits() < SizeInBits) {
|
||||
// Widen smaller SrcVec to match VT.
|
||||
SrcVec = widenSubVector(VT, SrcVec, false, Subtarget, DAG, SDLoc(SrcVec));
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
From 9cdff0785d5cf9effc8e922d3330311c4d3dda78 Mon Sep 17 00:00:00 2001
|
||||
From: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
Date: Fri, 27 Aug 2021 12:09:42 +0800
|
||||
Subject: [PATCH 2/2] This patch is needed for avx512skx-i8x64 and
|
||||
avx512skx-i16x32 targets.
|
||||
|
||||
This is combination of two commits:
|
||||
- 0cd6712a7af0fa2702b5d4cc733500eb5e62e7d0 - stability fix.
|
||||
- d8ad7cc0885f32104a7cd83c77191aec15fd684f - performance follow up.
|
||||
|
||||
Upstream-Status: Backport
|
||||
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 23 +++++++++++++++++--
|
||||
1 file changed, 21 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
|
||||
index 439a8367dabe..b1639c7f275d 100644
|
||||
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
|
||||
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
|
||||
@@ -18471,6 +18471,26 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
|
||||
|
||||
// Allow targets to opt-out.
|
||||
EVT VT = Extract->getValueType(0);
|
||||
+
|
||||
+ // We can only create byte sized loads.
|
||||
+ if (!VT.isByteSized())
|
||||
+ return SDValue();
|
||||
+
|
||||
+ unsigned Index = ExtIdx->getZExtValue();
|
||||
+ unsigned NumElts = VT.getVectorNumElements();
|
||||
+
|
||||
+ // If the index is a multiple of the extract element count, we can offset the
|
||||
+ // address by the store size multiplied by the subvector index. Otherwise if
|
||||
+ // the scalar type is byte sized, we can just use the index multiplied by
|
||||
+ // the element size in bytes as the offset.
|
||||
+ unsigned Offset;
|
||||
+ if (Index % NumElts == 0)
|
||||
+ Offset = (Index / NumElts) * VT.getStoreSize();
|
||||
+ else if (VT.getScalarType().isByteSized())
|
||||
+ Offset = Index * VT.getScalarType().getStoreSize();
|
||||
+ else
|
||||
+ return SDValue();
|
||||
+
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
|
||||
return SDValue();
|
||||
@@ -18478,8 +18498,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
|
||||
// The narrow load will be offset from the base address of the old load if
|
||||
// we are extracting from something besides index 0 (little-endian).
|
||||
SDLoc DL(Extract);
|
||||
- SDValue BaseAddr = Ld->getOperand(1);
|
||||
- unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
|
||||
+ SDValue BaseAddr = Ld->getBasePtr();
|
||||
|
||||
// TODO: Use "BaseIndexOffset" to make this more effective.
|
||||
SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -0,0 +1,97 @@
|
|||
From c2ebd328979c081dd2c9fd0e359ed99473731d0e Mon Sep 17 00:00:00 2001
|
||||
From: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
Date: Fri, 27 Aug 2021 12:13:00 +0800
|
||||
Subject: [PATCH 1/2] [X86] When storing v1i1/v2i1/v4i1 to memory, make sure we
|
||||
store zeros in the rest of the byte
|
||||
|
||||
We can't store garbage in the unused bits. It possible that something like zextload from i1/i2/i4 is created to read the memory. Those zextloads would be legalized assuming the extra bits are 0.
|
||||
|
||||
I'm not sure that the code in lowerStore is executed for the v1i1/v2i1/v4i1 case. It looks like the DAG combine in combineStore may have converted them to v8i1 first. And I think we're missing some cases to avoid going to the stack in the first place. But I don't have time to investigate those things at the moment so I wanted to focus on the correctness issue.
|
||||
|
||||
Should fix PR48147.
|
||||
|
||||
Reviewed By: RKSimon
|
||||
|
||||
Differential Revision: https://reviews.llvm.org/D9129
|
||||
|
||||
Upstream-Status: Backport
|
||||
|
||||
Signed-off-by:Craig Topper <craig.topper@sifive.com>
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
llvm/lib/Target/X86/X86ISelLowering.cpp | 20 ++++++++++++++------
|
||||
llvm/lib/Target/X86/X86InstrAVX512.td | 2 --
|
||||
2 files changed, 14 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
index 63eb050e9b3a..96b5e2cfbd82 100644
|
||||
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
@@ -22688,17 +22688,22 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
|
||||
// Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 stores.
|
||||
if (StoredVal.getValueType().isVector() &&
|
||||
StoredVal.getValueType().getVectorElementType() == MVT::i1) {
|
||||
- assert(StoredVal.getValueType().getVectorNumElements() <= 8 &&
|
||||
- "Unexpected VT");
|
||||
+ unsigned NumElts = StoredVal.getValueType().getVectorNumElements();
|
||||
+ assert(NumElts <= 8 && "Unexpected VT");
|
||||
assert(!St->isTruncatingStore() && "Expected non-truncating store");
|
||||
assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
|
||||
"Expected AVX512F without AVX512DQI");
|
||||
|
||||
+ // We must pad with zeros to ensure we store zeroes to any unused bits.
|
||||
StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
|
||||
DAG.getUNDEF(MVT::v16i1), StoredVal,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
StoredVal = DAG.getBitcast(MVT::i16, StoredVal);
|
||||
StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal);
|
||||
+ // Make sure we store zeros in the extra bits.
|
||||
+ if (NumElts < 8)
|
||||
+ StoredVal = DAG.getZeroExtendInReg(StoredVal, dl,
|
||||
+ MVT::getIntegerVT(NumElts));
|
||||
|
||||
return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
|
||||
St->getPointerInfo(), St->getAlignment(),
|
||||
@@ -41585,8 +41590,10 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
|
||||
|
||||
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements());
|
||||
StoredVal = DAG.getBitcast(NewVT, StoredVal);
|
||||
-
|
||||
- return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
|
||||
+ SDValue Val = StoredVal.getOperand(0);
|
||||
+ // We must store zeros to the unused bits.
|
||||
+ Val = DAG.getZeroExtendInReg(Val, dl, MVT::i1);
|
||||
+ return DAG.getStore(St->getChain(), dl, Val, St->getBasePtr(),
|
||||
St->getPointerInfo(), St->getAlignment(),
|
||||
St->getMemOperand()->getFlags());
|
||||
}
|
||||
@@ -41602,10 +41609,11 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
|
||||
}
|
||||
|
||||
// Widen v2i1/v4i1 stores to v8i1.
|
||||
- if ((VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
|
||||
+ if ((VT == MVT::v1i1 || VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
|
||||
Subtarget.hasAVX512()) {
|
||||
unsigned NumConcats = 8 / VT.getVectorNumElements();
|
||||
- SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(VT));
|
||||
+ // We must store zeros to the unused bits.
|
||||
+ SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, VT));
|
||||
Ops[0] = StoredVal;
|
||||
StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops);
|
||||
return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
|
||||
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
|
||||
index 32f012033fb0..d3b92183f87b 100644
|
||||
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
|
||||
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
|
||||
@@ -2888,8 +2888,6 @@ def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
|
||||
|
||||
// Load/store kreg
|
||||
let Predicates = [HasDQI] in {
|
||||
- def : Pat<(store VK1:$src, addr:$dst),
|
||||
- (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
|
||||
|
||||
def : Pat<(v1i1 (load addr:$src)),
|
||||
(COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -0,0 +1,173 @@
|
|||
From c1565af764adceca118daad0f592e5f14c2bdd4a Mon Sep 17 00:00:00 2001
|
||||
From: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
Date: Fri, 27 Aug 2021 12:15:09 +0800
|
||||
Subject: [PATCH 2/2] [X86] Convert vXi1 vectors to xmm/ymm/zmm types via
|
||||
getRegisterTypeForCallingConv rather than using CCPromoteToType in the td
|
||||
file
|
||||
|
||||
Previously we tried to promote these to xmm/ymm/zmm by promoting
|
||||
in the X86CallingConv.td file. But this breaks when we run out
|
||||
of xmm/ymm/zmm registers and need to fall back to memory. We end
|
||||
up trying to create a non-sensical scalar to vector. This lead
|
||||
to an assertion. The new tests in avx512-calling-conv.ll all
|
||||
trigger this assertion.
|
||||
|
||||
Since we really want to treat these types like we do on avx2,
|
||||
it seems better to promote them before the calling convention
|
||||
code gets involved. Except when the calling convention is one
|
||||
that passes the vXi1 type in a k register.
|
||||
|
||||
The changes in avx512-regcall-Mask.ll are because we indicated
|
||||
that xmm/ymm/zmm types should be passed indirectly for the
|
||||
Win64 ABI before we go to the common lines that promoted the
|
||||
vXi1 types. This caused the promoted types to be picked up by
|
||||
the default calling convention code. Now we promote them earlier
|
||||
so they get passed indirectly as though they were xmm/ymm/zmm.
|
||||
|
||||
Differential Revision: https://reviews.llvm.org/D75154
|
||||
|
||||
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/eadea7868f5b7542ee6bdcd9a975697a0c919ffc]
|
||||
|
||||
Signed-off-by:Craig Topper <craig.topper@intel.com>
|
||||
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
|
||||
---
|
||||
llvm/lib/Target/X86/X86ISelLowering.cpp | 90 +++++++++++++++++--------
|
||||
1 file changed, 61 insertions(+), 29 deletions(-)
|
||||
|
||||
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
index 96b5e2cfbd82..d5de94aeb8a2 100644
|
||||
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
@@ -2085,51 +2085,83 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const {
|
||||
return TargetLoweringBase::getPreferredVectorAction(VT);
|
||||
}
|
||||
|
||||
+static std::pair<MVT, unsigned>
|
||||
+handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
|
||||
+ const X86Subtarget &Subtarget) {
|
||||
+ // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
|
||||
+ // convention is one that uses k registers.
|
||||
+ if (NumElts == 2)
|
||||
+ return {MVT::v2i64, 1};
|
||||
+ if (NumElts == 4)
|
||||
+ return {MVT::v4i32, 1};
|
||||
+ if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
|
||||
+ CC != CallingConv::Intel_OCL_BI)
|
||||
+ return {MVT::v8i16, 1};
|
||||
+ if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
|
||||
+ CC != CallingConv::Intel_OCL_BI)
|
||||
+ return {MVT::v16i8, 1};
|
||||
+ // v32i1 passes in ymm unless we have BWI and the calling convention is
|
||||
+ // regcall.
|
||||
+ if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
|
||||
+ return {MVT::v32i8, 1};
|
||||
+ // Split v64i1 vectors if we don't have v64i8 available.
|
||||
+ if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
|
||||
+ if (Subtarget.useAVX512Regs())
|
||||
+ return {MVT::v64i8, 1};
|
||||
+ return {MVT::v32i8, 2};
|
||||
+ }
|
||||
+
|
||||
+ // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
|
||||
+ if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
|
||||
+ NumElts > 64)
|
||||
+ return {MVT::i8, NumElts};
|
||||
+
|
||||
+ return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
|
||||
+}
|
||||
+
|
||||
MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
|
||||
CallingConv::ID CC,
|
||||
EVT VT) const {
|
||||
- // v32i1 vectors should be promoted to v32i8 to match avx2.
|
||||
- if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
|
||||
- return MVT::v32i8;
|
||||
- // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
|
||||
if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
|
||||
- Subtarget.hasAVX512() &&
|
||||
- (!isPowerOf2_32(VT.getVectorNumElements()) ||
|
||||
- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
|
||||
- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
|
||||
- return MVT::i8;
|
||||
- // Split v64i1 vectors if we don't have v64i8 available.
|
||||
- if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
|
||||
- CC != CallingConv::X86_RegCall)
|
||||
- return MVT::v32i1;
|
||||
+ Subtarget.hasAVX512()) {
|
||||
+ unsigned NumElts = VT.getVectorNumElements();
|
||||
+
|
||||
+ MVT RegisterVT;
|
||||
+ unsigned NumRegisters;
|
||||
+ std::tie(RegisterVT, NumRegisters) =
|
||||
+ handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
|
||||
+ if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
|
||||
+ return RegisterVT;
|
||||
+ }
|
||||
+
|
||||
// FIXME: Should we just make these types legal and custom split operations?
|
||||
if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
|
||||
Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
|
||||
return MVT::v16i32;
|
||||
+
|
||||
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
|
||||
}
|
||||
|
||||
unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
|
||||
CallingConv::ID CC,
|
||||
EVT VT) const {
|
||||
- // v32i1 vectors should be promoted to v32i8 to match avx2.
|
||||
- if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
|
||||
- return 1;
|
||||
- // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
|
||||
if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
|
||||
- Subtarget.hasAVX512() &&
|
||||
- (!isPowerOf2_32(VT.getVectorNumElements()) ||
|
||||
- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
|
||||
- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
|
||||
- return VT.getVectorNumElements();
|
||||
- // Split v64i1 vectors if we don't have v64i8 available.
|
||||
- if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
|
||||
- CC != CallingConv::X86_RegCall)
|
||||
- return 2;
|
||||
+ Subtarget.hasAVX512()) {
|
||||
+ unsigned NumElts = VT.getVectorNumElements();
|
||||
+
|
||||
+ MVT RegisterVT;
|
||||
+ unsigned NumRegisters;
|
||||
+ std::tie(RegisterVT, NumRegisters) =
|
||||
+ handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
|
||||
+ if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
|
||||
+ return NumRegisters;
|
||||
+ }
|
||||
+
|
||||
// FIXME: Should we just make these types legal and custom split operations?
|
||||
if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
|
||||
Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
|
||||
return 1;
|
||||
+
|
||||
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
|
||||
}
|
||||
|
||||
@@ -2140,8 +2172,8 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
|
||||
if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
|
||||
Subtarget.hasAVX512() &&
|
||||
(!isPowerOf2_32(VT.getVectorNumElements()) ||
|
||||
- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
|
||||
- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) {
|
||||
+ (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
|
||||
+ VT.getVectorNumElements() > 64)) {
|
||||
RegisterVT = MVT::i8;
|
||||
IntermediateVT = MVT::i1;
|
||||
NumIntermediates = VT.getVectorNumElements();
|
||||
@@ -2151,7 +2183,7 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
|
||||
// Split v64i1 vectors if we don't have v64i8 available.
|
||||
if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
|
||||
CC != CallingConv::X86_RegCall) {
|
||||
- RegisterVT = MVT::v32i1;
|
||||
+ RegisterVT = MVT::v32i8;
|
||||
IntermediateVT = MVT::v32i1;
|
||||
NumIntermediates = 2;
|
||||
return 2;
|
||||
--
|
||||
2.17.1
|
||||
|
|
@ -18,6 +18,14 @@ SRC_URI_LLVM10_PATCHES = " \
|
|||
file://llvm10-0005-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch \
|
||||
file://llvm10-0006-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch \
|
||||
file://llvm10-0007-support-cl_ext_float_atomics.patch \
|
||||
file://llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch \
|
||||
file://llvm10-0009-ispc-10_0_fix_for_1788.patch \
|
||||
file://llvm10-0010-ispc-10_0_fix_for_1793.patch \
|
||||
file://llvm10-0011-ispc-10_0_fix_for_1844.patch \
|
||||
file://llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch \
|
||||
file://llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch \
|
||||
file://llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch \
|
||||
file://llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch \
|
||||
"
|
||||
|
||||
SRC_URI_LLVM11_PATCHES = " \
|
||||
|
|
Loading…
Reference in New Issue
Block a user