llvm/10.0.0: apply ispc recommended patches

ISPC recommends building LLVM 10 with some additional patches to work
around some bugs in this version. Add those patches to our build as well.

https://github.com/ispc/ispc/tree/v1.16.1/llvm_patches

Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
This commit is contained in:
Naveen Saini 2021-08-27 15:28:28 +08:00 committed by Anuj Mittal
parent e69b38c4b7
commit 23d702e38f
9 changed files with 657 additions and 0 deletions

View File

@ -0,0 +1,96 @@
From 294ca2fd69a077b35acec9d498120d6cb0324dae Mon Sep 17 00:00:00 2001
From: Naveen Saini <naveen.kumar.saini@intel.com>
Date: Fri, 27 Aug 2021 11:53:27 +0800
Subject: [PATCH 1/2] This patch is required to fix the crash referenced to in
#1767
It is a port of the following llvm 11.0 commit : https://reviews.llvm.org/D76994.
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/41f13f1f64d2074ae7512fb23656c22585e912bd]
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
.../CodeGen/SelectionDAG/LegalizeTypes.cpp | 3 +-
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 31 ++++++++++++-------
2 files changed, 21 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 63ddb59fce68..822da2183269 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -173,7 +173,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
}
}
}
-
+#ifndef NDEBUG
// Checked that NewNodes are only used by other NewNodes.
for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) {
SDNode *N = NewNodes[i];
@@ -181,6 +181,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
UI != UE; ++UI)
assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!");
}
+#endif
}
/// This is the main entry point for the type legalizer. This does a top-down
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index faae14444d51..b908c5c58e9f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -155,7 +155,9 @@ private:
const SDValue &getSDValue(TableId &Id) {
RemapId(Id);
assert(Id && "TableId should be non-zero");
- return IdToValueMap[Id];
+ auto I = IdToValueMap.find(Id);
+ assert(I != IdToValueMap.end() && "cannot find Id in map");
+ return I->second;
}
public:
@@ -172,24 +174,29 @@ public:
bool run();
void NoteDeletion(SDNode *Old, SDNode *New) {
+ assert(Old != New && "node replaced with self");
for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) {
TableId NewId = getTableId(SDValue(New, i));
TableId OldId = getTableId(SDValue(Old, i));
- if (OldId != NewId)
+ if (OldId != NewId) {
ReplacedValues[OldId] = NewId;
- // Delete Node from tables.
+ // Delete Node from tables. We cannot do this when OldId == NewId,
+ // because NewId can still have table references to it in
+ // ReplacedValues.
+ IdToValueMap.erase(OldId);
+ PromotedIntegers.erase(OldId);
+ ExpandedIntegers.erase(OldId);
+ SoftenedFloats.erase(OldId);
+ PromotedFloats.erase(OldId);
+ ExpandedFloats.erase(OldId);
+ ScalarizedVectors.erase(OldId);
+ SplitVectors.erase(OldId);
+ WidenedVectors.erase(OldId);
+ }
+
ValueToIdMap.erase(SDValue(Old, i));
- IdToValueMap.erase(OldId);
- PromotedIntegers.erase(OldId);
- ExpandedIntegers.erase(OldId);
- SoftenedFloats.erase(OldId);
- PromotedFloats.erase(OldId);
- ExpandedFloats.erase(OldId);
- ScalarizedVectors.erase(OldId);
- SplitVectors.erase(OldId);
- WidenedVectors.erase(OldId);
}
}
--
2.17.1

View File

@ -0,0 +1,105 @@
From d266087e8dba9e8fd4984e1cb85c20376e2c8ea3 Mon Sep 17 00:00:00 2001
From: Naveen Saini <naveen.kumar.saini@intel.com>
Date: Fri, 27 Aug 2021 11:56:01 +0800
Subject: [PATCH 2/2] This patch is a fix for #1788.
It is a port of the following llvm 11.0 commit: https://reviews.llvm.org/D81698
This also needed part of another llvm 11.0 commit: https://reviews.llvm.org/D72975
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/aeb50448019ce1b1002f3781f9647d486320d83c]
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
llvm/include/llvm/IR/PatternMatch.h | 22 ++++++++++++---
.../InstCombine/InstructionCombining.cpp | 27 +++++++++++++++++--
2 files changed, 44 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 6621fc9f819c..fb7ad93519f6 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -152,8 +152,10 @@ inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) {
struct apint_match {
const APInt *&Res;
+ bool AllowUndef;
- apint_match(const APInt *&R) : Res(R) {}
+ apint_match(const APInt *&Res, bool AllowUndef)
+ : Res(Res), AllowUndef(AllowUndef) {}
template <typename ITy> bool match(ITy *V) {
if (auto *CI = dyn_cast<ConstantInt>(V)) {
@@ -162,7 +164,8 @@ struct apint_match {
}
if (V->getType()->isVectorTy())
if (const auto *C = dyn_cast<Constant>(V))
- if (auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) {
+ if (auto *CI = dyn_cast_or_null<ConstantInt>(
+ C->getSplatValue(AllowUndef))) {
Res = &CI->getValue();
return true;
}
@@ -192,7 +195,20 @@ struct apfloat_match {
/// Match a ConstantInt or splatted ConstantVector, binding the
/// specified pointer to the contained APInt.
-inline apint_match m_APInt(const APInt *&Res) { return Res; }
+inline apint_match m_APInt(const APInt *&Res) {
+ // Forbid undefs by default to maintain previous behavior.
+ return apint_match(Res, /* AllowUndef */ false);
+}
+
+/// Match APInt while allowing undefs in splat vector constants.
+inline apint_match m_APIntAllowUndef(const APInt *&Res) {
+ return apint_match(Res, /* AllowUndef */ true);
+}
+
+/// Match APInt while forbidding undefs in splat vector constants.
+inline apint_match m_APIntForbidUndef(const APInt *&Res) {
+ return apint_match(Res, /* AllowUndef */ false);
+}
/// Match a ConstantFP or splatted ConstantVector, binding the
/// specified pointer to the contained APFloat.
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index bf32996d96e2..40a246b9d7a7 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -925,8 +925,31 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
if (CI->hasOneUse()) {
Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
- if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
- (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
+
+ // FIXME: This is a hack to avoid infinite looping with min/max patterns.
+ // We have to ensure that vector constants that only differ with
+ // undef elements are treated as equivalent.
+ auto areLooselyEqual = [](Value *A, Value *B) {
+ if (A == B)
+ return true;
+
+ // Test for vector constants.
+ Constant *ConstA, *ConstB;
+ if (!match(A, m_Constant(ConstA)) || !match(B, m_Constant(ConstB)))
+ return false;
+
+ // TODO: Deal with FP constants?
+ if (!A->getType()->isIntOrIntVectorTy() || A->getType() != B->getType())
+ return false;
+
+ // Compare for equality including undefs as equal.
+ auto *Cmp = ConstantExpr::getCompare(ICmpInst::ICMP_EQ, ConstA, ConstB);
+ const APInt *C;
+ return match(Cmp, m_APIntAllowUndef(C)) && C->isOneValue();
+ };
+
+ if ((areLooselyEqual(TV, Op0) && areLooselyEqual(FV, Op1)) ||
+ (areLooselyEqual(FV, Op0) && areLooselyEqual(TV, Op1)))
return nullptr;
}
}
--
2.17.1

View File

@ -0,0 +1,43 @@
From 8f83e2b7618da7a98a30839a8f41a6dd82dec468 Mon Sep 17 00:00:00 2001
From: Naveen Saini <naveen.kumar.saini@intel.com>
Date: Fri, 27 Aug 2021 12:00:23 +0800
Subject: [PATCH 1/2] This patch is required to fix stability problem #1793
It's backport of the following llvm 11.0 commit: 120c5f1057dc50229f73bc75bbabf4df6ee50fef
Upstream-Status: Backport
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2476fd26f250..2743acc89bca 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10702,8 +10702,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // zext_vector_inreg(undef) = 0 because the top bits will be zero.
if (N0.isUndef())
- return DAG.getUNDEF(VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
return Res;
@@ -10718,8 +10719,9 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
if (N0.isUndef())
- return DAG.getUNDEF(VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
return Res;
--
2.17.1

View File

@ -0,0 +1,34 @@
From 62b05a69b4a185cd0b7535f19742686e19fcaf22 Mon Sep 17 00:00:00 2001
From: Naveen Saini <naveen.kumar.saini@intel.com>
Date: Fri, 27 Aug 2021 12:02:37 +0800
Subject: [PATCH 2/2] Fix for #1844, affects avx512skx-i8x64 and
avx512skx-i16x32.
It's a port of 11.0 commit edcfb47ff6d5562e22207f364c65f84302aa346b
https://reviews.llvm.org/D76312
Upstream-Status: Backport
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2743acc89bca..439a8367dabe 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10841,7 +10841,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// Attempt to pre-truncate BUILD_VECTOR sources.
if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
- TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
+ TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
+ // Avoid creating illegal types if running after type legalizer.
+ (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
SDLoc DL(N);
EVT SVT = VT.getScalarType();
SmallVector<SDValue, 8> TruncOps;
--
2.17.1

View File

@ -0,0 +1,40 @@
From cc4301f82ca1bde1d438c3708de285b0ab8c72d3 Mon Sep 17 00:00:00 2001
From: Naveen Saini <naveen.kumar.saini@intel.com>
Date: Fri, 27 Aug 2021 12:07:25 +0800
Subject: [PATCH 1/2] [X86] createVariablePermute - handle case where recursive
createVariablePermute call fails
Account for the case where a recursive createVariablePermute call with a wider vector type fails.
Original test case from @craig.topper (Craig Topper)
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/6bdd63dc28208a597542b0c6bc41093f32417804]
Signed-off-by: Simon Pilgrim <llvm-dev@redking.me.uk>
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c8720d9ae3a6..63eb050e9b3a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -9571,9 +9571,11 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
IndicesVT = EVT(VT).changeVectorElementTypeToInteger();
IndicesVec = widenSubVector(IndicesVT.getSimpleVT(), IndicesVec, false,
Subtarget, DAG, SDLoc(IndicesVec));
- return extractSubVector(
- createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget), 0,
- DAG, DL, SizeInBits);
+ SDValue NewSrcVec =
+ createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget);
+ if (NewSrcVec)
+ return extractSubVector(NewSrcVec, 0, DAG, DL, SizeInBits);
+ return SDValue();
} else if (SrcVec.getValueSizeInBits() < SizeInBits) {
// Widen smaller SrcVec to match VT.
SrcVec = widenSubVector(VT, SrcVec, false, Subtarget, DAG, SDLoc(SrcVec));
--
2.17.1

View File

@ -0,0 +1,61 @@
From 9cdff0785d5cf9effc8e922d3330311c4d3dda78 Mon Sep 17 00:00:00 2001
From: Naveen Saini <naveen.kumar.saini@intel.com>
Date: Fri, 27 Aug 2021 12:09:42 +0800
Subject: [PATCH 2/2] This patch is needed for avx512skx-i8x64 and
avx512skx-i16x32 targets.
This is combination of two commits:
- 0cd6712a7af0fa2702b5d4cc733500eb5e62e7d0 - stability fix.
- d8ad7cc0885f32104a7cd83c77191aec15fd684f - performance follow up.
Upstream-Status: Backport
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 23 +++++++++++++++++--
1 file changed, 21 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 439a8367dabe..b1639c7f275d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18471,6 +18471,26 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
// Allow targets to opt-out.
EVT VT = Extract->getValueType(0);
+
+ // We can only create byte sized loads.
+ if (!VT.isByteSized())
+ return SDValue();
+
+ unsigned Index = ExtIdx->getZExtValue();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // If the index is a multiple of the extract element count, we can offset the
+ // address by the store size multiplied by the subvector index. Otherwise if
+ // the scalar type is byte sized, we can just use the index multiplied by
+ // the element size in bytes as the offset.
+ unsigned Offset;
+ if (Index % NumElts == 0)
+ Offset = (Index / NumElts) * VT.getStoreSize();
+ else if (VT.getScalarType().isByteSized())
+ Offset = Index * VT.getScalarType().getStoreSize();
+ else
+ return SDValue();
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
return SDValue();
@@ -18478,8 +18498,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
// The narrow load will be offset from the base address of the old load if
// we are extracting from something besides index 0 (little-endian).
SDLoc DL(Extract);
- SDValue BaseAddr = Ld->getOperand(1);
- unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
+ SDValue BaseAddr = Ld->getBasePtr();
// TODO: Use "BaseIndexOffset" to make this more effective.
SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
--
2.17.1

View File

@ -0,0 +1,97 @@
From c2ebd328979c081dd2c9fd0e359ed99473731d0e Mon Sep 17 00:00:00 2001
From: Naveen Saini <naveen.kumar.saini@intel.com>
Date: Fri, 27 Aug 2021 12:13:00 +0800
Subject: [PATCH 1/2] [X86] When storing v1i1/v2i1/v4i1 to memory, make sure we
store zeros in the rest of the byte
We can't store garbage in the unused bits. It possible that something like zextload from i1/i2/i4 is created to read the memory. Those zextloads would be legalized assuming the extra bits are 0.
I'm not sure that the code in lowerStore is executed for the v1i1/v2i1/v4i1 case. It looks like the DAG combine in combineStore may have converted them to v8i1 first. And I think we're missing some cases to avoid going to the stack in the first place. But I don't have time to investigate those things at the moment so I wanted to focus on the correctness issue.
Should fix PR48147.
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D9129
Upstream-Status: Backport
Signed-off-by:Craig Topper <craig.topper@sifive.com>
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 20 ++++++++++++++------
llvm/lib/Target/X86/X86InstrAVX512.td | 2 --
2 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 63eb050e9b3a..96b5e2cfbd82 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -22688,17 +22688,22 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
// Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 stores.
if (StoredVal.getValueType().isVector() &&
StoredVal.getValueType().getVectorElementType() == MVT::i1) {
- assert(StoredVal.getValueType().getVectorNumElements() <= 8 &&
- "Unexpected VT");
+ unsigned NumElts = StoredVal.getValueType().getVectorNumElements();
+ assert(NumElts <= 8 && "Unexpected VT");
assert(!St->isTruncatingStore() && "Expected non-truncating store");
assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
"Expected AVX512F without AVX512DQI");
+ // We must pad with zeros to ensure we store zeroes to any unused bits.
StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
DAG.getUNDEF(MVT::v16i1), StoredVal,
DAG.getIntPtrConstant(0, dl));
StoredVal = DAG.getBitcast(MVT::i16, StoredVal);
StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal);
+ // Make sure we store zeros in the extra bits.
+ if (NumElts < 8)
+ StoredVal = DAG.getZeroExtendInReg(StoredVal, dl,
+ MVT::getIntegerVT(NumElts));
return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
St->getPointerInfo(), St->getAlignment(),
@@ -41585,8 +41590,10 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements());
StoredVal = DAG.getBitcast(NewVT, StoredVal);
-
- return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
+ SDValue Val = StoredVal.getOperand(0);
+ // We must store zeros to the unused bits.
+ Val = DAG.getZeroExtendInReg(Val, dl, MVT::i1);
+ return DAG.getStore(St->getChain(), dl, Val, St->getBasePtr(),
St->getPointerInfo(), St->getAlignment(),
St->getMemOperand()->getFlags());
}
@@ -41602,10 +41609,11 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
}
// Widen v2i1/v4i1 stores to v8i1.
- if ((VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
+ if ((VT == MVT::v1i1 || VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
Subtarget.hasAVX512()) {
unsigned NumConcats = 8 / VT.getVectorNumElements();
- SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(VT));
+ // We must store zeros to the unused bits.
+ SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, VT));
Ops[0] = StoredVal;
StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops);
return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 32f012033fb0..d3b92183f87b 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -2888,8 +2888,6 @@ def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
// Load/store kreg
let Predicates = [HasDQI] in {
- def : Pat<(store VK1:$src, addr:$dst),
- (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
def : Pat<(v1i1 (load addr:$src)),
(COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
--
2.17.1

View File

@ -0,0 +1,173 @@
From c1565af764adceca118daad0f592e5f14c2bdd4a Mon Sep 17 00:00:00 2001
From: Naveen Saini <naveen.kumar.saini@intel.com>
Date: Fri, 27 Aug 2021 12:15:09 +0800
Subject: [PATCH 2/2] [X86] Convert vXi1 vectors to xmm/ymm/zmm types via
getRegisterTypeForCallingConv rather than using CCPromoteToType in the td
file
Previously we tried to promote these to xmm/ymm/zmm by promoting
in the X86CallingConv.td file. But this breaks when we run out
of xmm/ymm/zmm registers and need to fall back to memory. We end
up trying to create a non-sensical scalar to vector. This lead
to an assertion. The new tests in avx512-calling-conv.ll all
trigger this assertion.
Since we really want to treat these types like we do on avx2,
it seems better to promote them before the calling convention
code gets involved. Except when the calling convention is one
that passes the vXi1 type in a k register.
The changes in avx512-regcall-Mask.ll are because we indicated
that xmm/ymm/zmm types should be passed indirectly for the
Win64 ABI before we go to the common lines that promoted the
vXi1 types. This caused the promoted types to be picked up by
the default calling convention code. Now we promote them earlier
so they get passed indirectly as though they were xmm/ymm/zmm.
Differential Revision: https://reviews.llvm.org/D75154
Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/eadea7868f5b7542ee6bdcd9a975697a0c919ffc]
Signed-off-by:Craig Topper <craig.topper@intel.com>
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 90 +++++++++++++++++--------
1 file changed, 61 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 96b5e2cfbd82..d5de94aeb8a2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2085,51 +2085,83 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const {
return TargetLoweringBase::getPreferredVectorAction(VT);
}
+static std::pair<MVT, unsigned>
+handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
+ const X86Subtarget &Subtarget) {
+ // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
+ // convention is one that uses k registers.
+ if (NumElts == 2)
+ return {MVT::v2i64, 1};
+ if (NumElts == 4)
+ return {MVT::v4i32, 1};
+ if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
+ CC != CallingConv::Intel_OCL_BI)
+ return {MVT::v8i16, 1};
+ if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
+ CC != CallingConv::Intel_OCL_BI)
+ return {MVT::v16i8, 1};
+ // v32i1 passes in ymm unless we have BWI and the calling convention is
+ // regcall.
+ if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
+ return {MVT::v32i8, 1};
+ // Split v64i1 vectors if we don't have v64i8 available.
+ if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
+ if (Subtarget.useAVX512Regs())
+ return {MVT::v64i8, 1};
+ return {MVT::v32i8, 2};
+ }
+
+ // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
+ if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
+ NumElts > 64)
+ return {MVT::i8, NumElts};
+
+ return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
+}
+
MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
- // v32i1 vectors should be promoted to v32i8 to match avx2.
- if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
- return MVT::v32i8;
- // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
- Subtarget.hasAVX512() &&
- (!isPowerOf2_32(VT.getVectorNumElements()) ||
- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
- return MVT::i8;
- // Split v64i1 vectors if we don't have v64i8 available.
- if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
- CC != CallingConv::X86_RegCall)
- return MVT::v32i1;
+ Subtarget.hasAVX512()) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ MVT RegisterVT;
+ unsigned NumRegisters;
+ std::tie(RegisterVT, NumRegisters) =
+ handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
+ if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
+ return RegisterVT;
+ }
+
// FIXME: Should we just make these types legal and custom split operations?
if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
return MVT::v16i32;
+
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
}
unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
- // v32i1 vectors should be promoted to v32i8 to match avx2.
- if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
- return 1;
- // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
- Subtarget.hasAVX512() &&
- (!isPowerOf2_32(VT.getVectorNumElements()) ||
- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
- return VT.getVectorNumElements();
- // Split v64i1 vectors if we don't have v64i8 available.
- if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
- CC != CallingConv::X86_RegCall)
- return 2;
+ Subtarget.hasAVX512()) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ MVT RegisterVT;
+ unsigned NumRegisters;
+ std::tie(RegisterVT, NumRegisters) =
+ handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
+ if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
+ return NumRegisters;
+ }
+
// FIXME: Should we just make these types legal and custom split operations?
if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
return 1;
+
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
}
@@ -2140,8 +2172,8 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
Subtarget.hasAVX512() &&
(!isPowerOf2_32(VT.getVectorNumElements()) ||
- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) {
+ (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
+ VT.getVectorNumElements() > 64)) {
RegisterVT = MVT::i8;
IntermediateVT = MVT::i1;
NumIntermediates = VT.getVectorNumElements();
@@ -2151,7 +2183,7 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
// Split v64i1 vectors if we don't have v64i8 available.
if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
CC != CallingConv::X86_RegCall) {
- RegisterVT = MVT::v32i1;
+ RegisterVT = MVT::v32i8;
IntermediateVT = MVT::v32i1;
NumIntermediates = 2;
return 2;
--
2.17.1

View File

@ -18,6 +18,14 @@ SRC_URI_LLVM10_PATCHES = " \
file://llvm10-0005-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch \
file://llvm10-0006-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch \
file://llvm10-0007-support-cl_ext_float_atomics.patch \
file://llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch \
file://llvm10-0009-ispc-10_0_fix_for_1788.patch \
file://llvm10-0010-ispc-10_0_fix_for_1793.patch \
file://llvm10-0011-ispc-10_0_fix_for_1844.patch \
file://llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch \
file://llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch \
file://llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch \
file://llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch \
"
SRC_URI_LLVM11_PATCHES = " \